diff --git a/.gitattributes b/.gitattributes
index f66c8a4e4763dfb33176865a936680818ecad3c4..426cf6ad7788d95a8ee7a3402721dc531616debb 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -123,3 +123,7 @@ segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadab
segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable4/Release/0_2_reloadable4 filter=lfs diff=lfs merge=lfs -text
segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5 filter=lfs diff=lfs merge=lfs -text
segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6 filter=lfs diff=lfs merge=lfs -text
+segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable2/Release/0_2_reloadable2 filter=lfs diff=lfs merge=lfs -text
+segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7 filter=lfs diff=lfs merge=lfs -text
+segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8 filter=lfs diff=lfs merge=lfs -text
+segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9 filter=lfs diff=lfs merge=lfs -text
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable14/elf_ctrl_pkt.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable14/elf_ctrl_pkt.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3b0fd7517845d4d46200c8a4bb4aaa4600a88d91
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable14/elf_ctrl_pkt.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78a21fd29dd4052c61ba6330d932e338928013aaab2aadd53c9be702b2842139
+size 15504
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable2/Release/0_2_reloadable2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable2/Release/0_2_reloadable2
new file mode 100644
index 0000000000000000000000000000000000000000..624f51485e147e551630b22e0da6c981ff0d6716
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable2/Release/0_2_reloadable2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a5a9df05a6c75bd80221e66433224379fdcc9b6f2a108f6df52dbe834b70c02
+size 2310932
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable4/elf_ctrl_pkt.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable4/elf_ctrl_pkt.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3b0fd7517845d4d46200c8a4bb4aaa4600a88d91
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable4/elf_ctrl_pkt.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78a21fd29dd4052c61ba6330d932e338928013aaab2aadd53c9be702b2842139
+size 15504
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7
new file mode 100644
index 0000000000000000000000000000000000000000..11d814fe2d988f2e9109399f6483a3961d3ca538
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:706ee15488c183fef5fe4a9eff5216803f39e37c43d42f1e192ed887923f9497
+size 4369168
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8
new file mode 100644
index 0000000000000000000000000000000000000000..624f51485e147e551630b22e0da6c981ff0d6716
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a5a9df05a6c75bd80221e66433224379fdcc9b6f2a108f6df52dbe834b70c02
+size 2310932
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/elf_ctrl_pkt.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/elf_ctrl_pkt.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3b0fd7517845d4d46200c8a4bb4aaa4600a88d91
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/elf_ctrl_pkt.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78a21fd29dd4052c61ba6330d932e338928013aaab2aadd53c9be702b2842139
+size 15504
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9
new file mode 100644
index 0000000000000000000000000000000000000000..be4270b4694a6a9d1af3baef0b3bf5a7c96cc45f
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a70af264ad094ffa92c818205bb69395912b10f6e59c002d01ed8d23592cb43c
+size 4336352
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/elf_ctrl_pkt.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/elf_ctrl_pkt.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ad1f6a67dd900f40e6c18d42540c4cc1308c85ba
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/elf_ctrl_pkt.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84953abe58446cba2c429d31ae9375ebb95b4c98f00663fb32e1493c108dbaa3
+size 19344
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable10/scripts/3_3_reloadable10.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable10/scripts/3_3_reloadable10.prx
new file mode 100644
index 0000000000000000000000000000000000000000..92de2a57ca244fb472163908fbe822a17bc79db0
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable10/scripts/3_3_reloadable10.prx
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable10/src/3_3_reloadable10.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable10/src/3_3_reloadable10.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7b211124072bdc08c2e3d113228cd9b65f8857a3
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable10/src/3_3_reloadable10.cc
@@ -0,0 +1,41 @@
+// Automatically generated processor driver using AIEngine tool-chain
+
+#include
+#include
+#include
+
+
+// Declare Kernel functions and initializers
+void superkernel_reduce_mean_c8(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+
+// Declare Kernel objects and external arrays
+
+
+void _b961_wrapper(void* args[])
+{
+ superkernel_reduce_mean_c8(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[2]),
+ *reinterpret_cast*>(args[1]));
+}
+
+using UniformKernelFunc = void (*)(void **);
+
+static UniformKernelFunc g_uniformKernelFuncs[1] = {
+ _b961_wrapper
+};
+
+__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut)
+{
+ uint32 idx = 0;
+ reinterpret_cast(args[idx])->acquire(numSyncIn > 0);
+ idx += (numSyncIn > 0) ? 1 : 0;
+ idx += numAsyncIn;
+
+ (*(g_uniformKernelFuncs[kernelId]))(args);
+
+ idx = 0;
+ reinterpret_cast(args[idx])->release(numSyncIn > 0);
+ idx += (numSyncIn > 0) ? 1 : 0;
+ idx += numAsyncIn;
+}
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.calltree
new file mode 100644
index 0000000000000000000000000000000000000000..a9aa937024e08d6db65ac17b5f174a0a1241e359
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.calltree
@@ -0,0 +1,108 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:20 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme
+
+
+// Release: ipp V-2024.06-TGT-241219
+
+_Z13kernelWrapperPPvjjjj
+ _Z13_b896_wrapperPPv (referenced text)
+ _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+ _Z13_b901_wrapperPPv (referenced text)
+ _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+ _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ _Z13_b906_wrapperPPv (referenced text)
+ _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+ _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+ _Z13_b881_wrapperPPv (referenced text)
+ _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+ _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+ _Z13_b891_wrapperPPv (referenced text)
+ _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+ _Z13_b924_wrapperPPv (referenced text)
+ _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+ _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (*)
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (*)
+ _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+ _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (*)
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (*)
+ _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (*)
+ _Z13_b919_wrapperPPv (referenced text)
+ _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+ _ZN12me_primitive10udiv_dstepEjjRjS0_
+ _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+
+
+Call tree stack and functions sizes:
+
+stack stack stack call func func function name
+ desc level level desc
+----- ----- ----- ----- ----- ----- --------------------------------------------------------------
+ 64 320 0 0 390 13150 _Z13kernelWrapperPPvjjjj
+ 0 192 1 1 36 4714 _Z13_b896_wrapperPPv
+ 64 192 1 2 568 4678 _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ 0 0 3 4 270 270 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+ 0 192 1 1 32 1252 _Z13_b901_wrapperPPv
+ 64 192 1 2 488 1220 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 128 2 3 62 304 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+ 64 64 3 4 162 186 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+ 0 0 4 5 24 24 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+ 0 0 2 4 56 56 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+ 128 128 2 3 114 428 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+ 0 0 3 4 314 314 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ 0 64 1 1 32 862 _Z13_b906_wrapperPPv
+ 64 64 1 2 488 830 _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 0 0 2 3 100 100 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+ 0 0 2 3 242 242 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+ 0 256 1 1 32 1394 _Z13_b881_wrapperPPv
+ 64 256 1 2 488 1362 _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 3 74 190 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+ 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+ 64 192 2 3 150 684 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+ 128 128 3 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+ 0 128 1 1 36 1092 _Z13_b891_wrapperPPv
+ 64 128 1 2 602 1056 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+ 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+ 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+ 0 192 1 1 40 6494 _Z13_b924_wrapperPPv
+ 64 192 1 2 1126 6454 _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+ 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (*)
+ 64 64 2 3 98 214 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+ 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+ 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (*)
+ 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+ 0 128 2 3 16 550 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ 128 128 2 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+ 0 192 1 1 36 2050 _Z13_b919_wrapperPPv
+ 128 192 1 2 478 2014 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 3 672 814 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+ 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_
+ 0 0 2 3 722 722 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+
+
+Maximum call level : 5
+Maximum stack level: 4
+Maximum stack size : 320
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.cmic2
new file mode 100644
index 0000000000000000000000000000000000000000..cc24263e196c609ab062129e37812e382b48d43f
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.cmic2
@@ -0,0 +1,19187 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me
+
+// Release: ipp V-2024.06-TGT-241219
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function kernelWrapper _Z13kernelWrapperPPvjjjj
+.src_ref 0 "0_0_reloadable5.cc" 94 first
+.src_ref 0 "0_0_reloadable5.cc" 96 60 first
+.src_ref 0 "0_0_reloadable5.cc" 96 110
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.function_start
+ 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2353 "01000001" // /* MW 5 */
+ 2354 "00100001" // /* MW 4 */
+ 2355 "11010001" // /* MW 3 */
+ 2356 "11000110" // /* MW 2 */
+ 2357 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 94
+ 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2359 "00000001" // /* MW 5 */
+ 2360 "00000000" // /* MW 4 */
+ 2361 "00000000" // /* MW 3 */
+ 2362 "00001000" // /* MW 2 */
+ 2363 "00000000" // /* MW 1 */
+ 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2365 "01110000" // /* MW 7 */
+ 2366 "11010000" // /* MW 6 */
+ 2367 "00101011" // /* MW 5 */
+ 2368 "00000000" // /* MW 4 */
+ 2369 "10110000" // /* MW 3 */
+ 2370 "11110011" // /* MW 2 */
+ 2371 "11111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 96 110
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2373 "01110000" // /* MW 7 */
+ 2374 "10010000" // /* MW 6 */
+ 2375 "11101000" // /* MW 5 */
+ 2376 "00000001" // /* MW 4 */
+ 2377 "10110000" // /* MW 3 */
+ 2378 "10000111" // /* MW 2 */
+ 2379 "11111111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 96 110 first
+ 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2381 "11100000" // /* MW 5 */
+ 2382 "11000001" // /* MW 4 */
+ 2383 "10110111" // /* MW 3 */
+ 2384 "00000110" // /* MW 2 */
+ 2385 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2387 "00100000" // /* MW 3 */
+ 2388 "10011000" // /* MW 2 */
+ 2389 "00011110" // /* MW 1 */
+ 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2391 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2393 "10000010" // /* MW 3 */
+ 2394 "01101000" // /* MW 2 */
+ 2395 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2397 "00110110" // /* MW 3 */
+ 2398 "00011110" // /* MW 2 */
+ 2399 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2401 "01110110" // /* MW 3 */
+ 2402 "00111110" // /* MW 2 */
+ 2403 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2405 "01010110" // /* MW 3 */
+ 2406 "11101110" // /* MW 2 */
+ 2407 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2409 "01110110" // /* MW 3 */
+ 2410 "00000111" // /* MW 2 */
+ 2411 "00000111" // /* MW 1 */
+ 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2413 "00000000" // /* MW 1 */
+ 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2415 "00000000" // /* MW 1 */
+ 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2417 "00000000" // /* MW 1 */
+ 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2419 "00000000" // /* MW 1 */
+ 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2421 "00000000" // /* MW 1 */
+ 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2423 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2425 "00110010" // /* MW 3 */
+ 2426 "01100011" // /* MW 2 */
+ 2427 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2429 "00110001" // /* MW 3 */
+ 2430 "11010110" // /* MW 2 */
+ 2431 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2433 "11111101" // /* MW 3 */
+ 2434 "11100010" // /* MW 2 */
+ 2435 "00010111" // /* MW 1 */
+ 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2437 "00000000" // /* MW 1 */
+ 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2439 "00000000" // /* MW 1 */
+ 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2441 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2443 "00011000" // /* MW 3 */
+ 2444 "10010111" // /* MW 2 */
+ 2445 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2447 "00001001" // /* MW 3 */
+ 2448 "00100100" // /* MW 2 */
+ 2449 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60 first
+ 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2451 "00101101" // /* MW 3 */
+ 2452 "00101001" // /* MW 2 */
+ 2453 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2455 "00100000" // /* MW 3 */
+ 2456 "10001010" // /* MW 2 */
+ 2457 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2459 "10001011" // /* MW 5 */
+ 2460 "11011000" // /* MW 4 */
+ 2461 "11011111" // /* MW 3 */
+ 2462 "01001110" // /* MW 2 */
+ 2463 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2465 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2467 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2469 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2471 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2473 "00000101" // /* MW 3 */
+ 2474 "00100110" // /* MW 2 */
+ 2475 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2477 "11111100" // /* MW 3 */
+ 2478 "11110100" // /* MW 2 */
+ 2479 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2481 "00000000" // /* MW 7 */
+ 2482 "11000001" // /* MW 6 */
+ 2483 "10110100" // /* MW 5 */
+ 2484 "00000011" // /* MW 4 */
+ 2485 "10110000" // /* MW 3 */
+ 2486 "01101010" // /* MW 2 */
+ 2487 "11111110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2489 "01110110" // /* MW 3 */
+ 2490 "00011110" // /* MW 2 */
+ 2491 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2493 "10110110" // /* MW 3 */
+ 2494 "00111110" // /* MW 2 */
+ 2495 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2497 "10010110" // /* MW 3 */
+ 2498 "11101110" // /* MW 2 */
+ 2499 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2501 "01110110" // /* MW 3 */
+ 2502 "00000111" // /* MW 2 */
+ 2503 "00000111" // /* MW 1 */
+ 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2505 "00000000" // /* MW 1 */
+ 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2507 "00000000" // /* MW 1 */
+ 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2509 "00000000" // /* MW 1 */
+ 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2511 "00000000" // /* MW 1 */
+ 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2513 "00000000" // /* MW 1 */
+ 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2515 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2517 "01010010" // /* MW 3 */
+ 2518 "11100111" // /* MW 2 */
+ 2519 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2521 "01110001" // /* MW 3 */
+ 2522 "11010110" // /* MW 2 */
+ 2523 "00001111" // /* MW 1 */
+ 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2525 "00000000" // /* MW 1 */
+ 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2527 "00000000" // /* MW 1 */
+ 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2529 "00000000" // /* MW 1 */
+ 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2531 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2533 "00011000" // /* MW 3 */
+ 2534 "00010111" // /* MW 2 */
+ 2535 "00010101" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7 first
+ 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2537 "00101101" // /* MW 3 */
+ 2538 "00100011" // /* MW 2 */
+ 2539 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2541 "10100000" // /* MW 3 */
+ 2542 "10001000" // /* MW 2 */
+ 2543 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2545 "00000000" // /* MW 5 */
+ 2546 "11001001" // /* MW 4 */
+ 2547 "11001110" // /* MW 3 */
+ 2548 "00000111" // /* MW 2 */
+ 2549 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2551 "00101011" // /* MW 5 */
+ 2552 "11010100" // /* MW 4 */
+ 2553 "11011111" // /* MW 3 */
+ 2554 "00010011" // /* MW 2 */
+ 2555 "11100000" // /* MW 1 */
+ 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2557 "00000000" // /* MW 1 */
+ 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2559 "00000000" // /* MW 1 */
+ 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2561 "00000000" // /* MW 1 */
+ 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2563 "00000000" // /* MW 1 */
+ 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2565 "00000000" // /* MW 1 */
+ 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2567 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 4
+.no_stack_arguments
+ 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */
+ 2569 "01000000" // /* MW 3 */
+ 2570 "00110000" // /* MW 2 */
+ 2571 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 105 60
+.src_ref 0 "0_0_reloadable5.cc" 107 60
+.delay_slot
+ 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2573 "11000000" // /* MW 3 */
+ 2574 "01100000" // /* MW 2 */
+ 2575 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2577 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2579 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2581 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2583 "01111110" // /* MW 9 */
+ 2584 "10100101" // /* MW 8 */
+ 2585 "00000001" // /* MW 7 */
+ 2586 "00000000" // /* MW 6 */
+ 2587 "00010000" // /* MW 5 */
+ 2588 "00000000" // /* MW 4 */
+ 2589 "11110000" // /* MW 3 */
+ 2590 "00101100" // /* MW 2 */
+ 2591 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 105 60 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+.src_ref 1 "io_buffer_main.h" 440 8
+.return_address
+ 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2593 "00001010" // /* MW 5 */
+ 2594 "01000000" // /* MW 4 */
+ 2595 "11010000" // /* MW 3 */
+ 2596 "11000110" // /* MW 2 */
+ 2597 "11100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2599 "01010001" // /* MW 3 */
+ 2600 "11101011" // /* MW 2 */
+ 2601 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 107 60
+ 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2603 "01000001" // /* MW 3 */
+ 2604 "11101100" // /* MW 2 */
+ 2605 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2607 "00101001" // /* MW 3 */
+ 2608 "11110000" // /* MW 2 */
+ 2609 "00000111" // /* MW 1 */
+ 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2611 "00000000" // /* MW 1 */
+ 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2613 "00000000" // /* MW 1 */
+ 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+ 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2617 "10001000" // /* MW 3 */
+ 2618 "01101000" // /* MW 2 */
+ 2619 "00011001" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+ 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2621 "00110110" // /* MW 3 */
+ 2622 "00000110" // /* MW 2 */
+ 2623 "00000001" // /* MW 1 */
+ 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2625 "00000000" // /* MW 1 */
+ 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2627 "00000000" // /* MW 1 */
+ 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2629 "00000000" // /* MW 1 */
+ 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2631 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2633 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2635 "00011100" // /* MW 3 */
+ 2636 "10100000" // /* MW 2 */
+ 2637 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2639 "00001000" // /* MW 3 */
+ 2640 "01010101" // /* MW 2 */
+ 2641 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2643 "01000001" // /* MW 5 */
+ 2644 "10101111" // /* MW 4 */
+ 2645 "11011101" // /* MW 3 */
+ 2646 "11000110" // /* MW 2 */
+ 2647 "00111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 107 60 first
+ 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2649 "01010110" // /* MW 3 */
+ 2650 "00000010" // /* MW 2 */
+ 2651 "00000111" // /* MW 1 */
+ 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2653 "00000000" // /* MW 1 */
+ 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2655 "00000000" // /* MW 1 */
+ 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2657 "00000000" // /* MW 1 */
+ 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2659 "00000000" // /* MW 1 */
+ 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2661 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2663 "00010001" // /* MW 3 */
+ 2664 "00100111" // /* MW 2 */
+ 2665 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2667 "00010000" // /* MW 5 */
+ 2668 "11010010" // /* MW 4 */
+ 2669 "01000000" // /* MW 3 */
+ 2670 "01100110" // /* MW 2 */
+ 2671 "10001100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+.src_ref 1 "io_buffer_compiler.h" 606 22 first
+ 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2673 "01100011" // /* MW 5 */
+ 2674 "11101100" // /* MW 4 */
+ 2675 "11010011" // /* MW 3 */
+ 2676 "11000110" // /* MW 2 */
+ 2677 "00000000" // /* MW 1 */
+ 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2679 "00000000" // /* MW 1 */
+ 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2681 "00000000" // /* MW 1 */
+ 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2683 "00000000" // /* MW 1 */
+ 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2685 "00000000" // /* MW 1 */
+ 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2687 "00000000" // /* MW 1 */
+ 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2689 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+ 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2691 "00001000" // /* MW 3 */
+ 2692 "01010101" // /* MW 2 */
+ 2693 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110
+ 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2695 "00111001" // /* MW 3 */
+ 2696 "11111100" // /* MW 2 */
+ 2697 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2699 "00110110" // /* MW 3 */
+ 2700 "11110110" // /* MW 2 */
+ 2701 "00000000" // /* MW 1 */
+ 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2703 "10011001" // /* MW 3 */
+ 2704 "11110111" // /* MW 2 */
+ 2705 "00000111" // /* MW 1 */
+ 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2707 "11110001" // /* MW 3 */
+ 2708 "11111001" // /* MW 2 */
+ 2709 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110 first
+ 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2711 "00000001" // /* MW 5 */
+ 2712 "00000000" // /* MW 4 */
+ 2713 "00000000" // /* MW 3 */
+ 2714 "11111000" // /* MW 2 */
+ 2715 "11111111" // /* MW 1 */
+ 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2717 "00000000" // /* MW 1 */
+ 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2719 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110
+ 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 2721 "00000000" // /* MW 3 */
+ 2722 "00101000" // /* MW 2 */
+ 2723 "00010000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2725 "00011100" // /* MW 3 */
+ 2726 "11100000" // /* MW 2 */
+ 2727 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+.delay_slot
+ 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2729 "00010001" // /* MW 3 */
+ 2730 "00100001" // /* MW 2 */
+ 2731 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2733 "00000010" // /* MW 3 */
+ 2734 "01100001" // /* MW 2 */
+ 2735 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 22
+.delay_slot
+ 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2737 "00010001" // /* MW 3 */
+ 2738 "11110110" // /* MW 2 */
+ 2739 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+ 2741 "00000000" // /* MW 1 */
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 432 first
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.function_start
+ 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2753 "01111000" // /* MW 9 */
+ 2754 "01100000" // /* MW 8 */
+ 2755 "01001001" // /* MW 7 */
+ 2756 "10001000" // /* MW 6 */
+ 2757 "01000000" // /* MW 5 */
+ 2758 "00000000" // /* MW 4 */
+ 2759 "11010000" // /* MW 3 */
+ 2760 "10000101" // /* MW 2 */
+ 2761 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2763 "01001000" // /* MW 9 */
+ 2764 "10000010" // /* MW 8 */
+ 2765 "00110000" // /* MW 7 */
+ 2766 "11101001" // /* MW 6 */
+ 2767 "01010111" // /* MW 5 */
+ 2768 "00111110" // /* MW 4 */
+ 2769 "11010000" // /* MW 3 */
+ 2770 "10000001" // /* MW 2 */
+ 2771 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 432
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+ 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2773 "01110000" // /* MW 9 */
+ 2774 "00000000" // /* MW 8 */
+ 2775 "00000000" // /* MW 7 */
+ 2776 "00000000" // /* MW 6 */
+ 2777 "00000010" // /* MW 5 */
+ 2778 "00000000" // /* MW 4 */
+ 2779 "00000000" // /* MW 3 */
+ 2780 "10000001" // /* MW 2 */
+ 2781 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+ 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2783 "01011000" // /* MW 11 */
+ 2784 "00010000" // /* MW 10 */
+ 2785 "00000000" // /* MW 9 */
+ 2786 "00101000" // /* MW 8 */
+ 2787 "00000000" // /* MW 7 */
+ 2788 "10000001" // /* MW 6 */
+ 2789 "10110101" // /* MW 5 */
+ 2790 "11111101" // /* MW 4 */
+ 2791 "00000111" // /* MW 3 */
+ 2792 "10000110" // /* MW 2 */
+ 2793 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2795 "01011000" // /* MW 11 */
+ 2796 "00001111" // /* MW 10 */
+ 2797 "10001000" // /* MW 9 */
+ 2798 "10101010" // /* MW 8 */
+ 2799 "01010111" // /* MW 7 */
+ 2800 "10111111" // /* MW 6 */
+ 2801 "11010101" // /* MW 5 */
+ 2802 "11111001" // /* MW 4 */
+ 2803 "00000111" // /* MW 3 */
+ 2804 "01100011" // /* MW 2 */
+ 2805 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2807 "00000010" // /* MW 5 */
+ 2808 "01100000" // /* MW 4 */
+ 2809 "10110000" // /* MW 3 */
+ 2810 "10111110" // /* MW 2 */
+ 2811 "11111110" // /* MW 1 */
+ 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2815 "00101001" // /* MW 3 */
+ 2816 "00011100" // /* MW 2 */
+ 2817 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2819 "00001001" // /* MW 3 */
+ 2820 "00011100" // /* MW 2 */
+ 2821 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2823 "00101110" // /* MW 3 */
+ 2824 "00011100" // /* MW 2 */
+ 2825 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2827 "00001110" // /* MW 3 */
+ 2828 "00011100" // /* MW 2 */
+ 2829 "00000000" // /* MW 1 */
+ 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2831 "00000000" // /* MW 1 */
+ 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2833 "00000000" // /* MW 1 */
+ 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2835 "00000000" // /* MW 1 */
+ 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2837 "00000000" // /* MW 1 */
+ 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2839 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2841 "00101001" // /* MW 3 */
+ 2842 "00011100" // /* MW 2 */
+ 2843 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2845 "00001001" // /* MW 3 */
+ 2846 "00011100" // /* MW 2 */
+ 2847 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2849 "00101110" // /* MW 3 */
+ 2850 "00011100" // /* MW 2 */
+ 2851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2853 "00001110" // /* MW 3 */
+ 2854 "00011100" // /* MW 2 */
+ 2855 "00000000" // /* MW 1 */
+ 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2857 "00000000" // /* MW 1 */
+ 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2859 "00000000" // /* MW 1 */
+ 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2861 "00000000" // /* MW 1 */
+ 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2863 "00000000" // /* MW 1 */
+ 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2865 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2867 "00101001" // /* MW 3 */
+ 2868 "00011100" // /* MW 2 */
+ 2869 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2871 "00001001" // /* MW 3 */
+ 2872 "00011100" // /* MW 2 */
+ 2873 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2875 "00001110" // /* MW 3 */
+ 2876 "00000100" // /* MW 2 */
+ 2877 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2879 "00101110" // /* MW 3 */
+ 2880 "00010100" // /* MW 2 */
+ 2881 "00000000" // /* MW 1 */
+ 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2883 "00000000" // /* MW 1 */
+ 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2885 "00000000" // /* MW 1 */
+ 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2887 "00000000" // /* MW 1 */
+ 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2889 "00000000" // /* MW 1 */
+ 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2891 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2893 "00001001" // /* MW 3 */
+ 2894 "00000100" // /* MW 2 */
+ 2895 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2897 "00101001" // /* MW 3 */
+ 2898 "00010100" // /* MW 2 */
+ 2899 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 40 first
+ 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2901 "10101010" // /* MW 3 */
+ 2902 "11011101" // /* MW 2 */
+ 2903 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 447 34 first
+ 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2905 "00101010" // /* MW 3 */
+ 2906 "00011110" // /* MW 2 */
+ 2907 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 448 34 first
+ 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2909 "11001010" // /* MW 3 */
+ 2910 "10111101" // /* MW 2 */
+ 2911 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2913 "11111010" // /* MW 3 */
+ 2914 "11111101" // /* MW 2 */
+ 2915 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+ 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2917 "01101010" // /* MW 3 */
+ 2918 "00001010" // /* MW 2 */
+ 2919 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 20 first
+ 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2921 "11101010" // /* MW 3 */
+ 2922 "10101100" // /* MW 2 */
+ 2923 "00000010" // /* MW 1 */
+ 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2925 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+ 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2927 "00011101" // /* MW 3 */
+ 2928 "01000010" // /* MW 2 */
+ 2929 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+ 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2931 "00000001" // /* MW 5 */
+ 2932 "00110001" // /* MW 4 */
+ 2933 "11111001" // /* MW 3 */
+ 2934 "00100000" // /* MW 2 */
+ 2935 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2937 "01011101" // /* MW 3 */
+ 2938 "10100100" // /* MW 2 */
+ 2939 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2941 "01000111" // /* MW 3 */
+ 2942 "11110110" // /* MW 2 */
+ 2943 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2945 "00111001" // /* MW 5 */
+ 2946 "10110111" // /* MW 4 */
+ 2947 "01000000" // /* MW 3 */
+ 2948 "01001010" // /* MW 2 */
+ 2949 "11000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2951 "00100010" // /* MW 3 */
+ 2952 "01111011" // /* MW 2 */
+ 2953 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+ 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2955 "01100111" // /* MW 3 */
+ 2956 "11001100" // /* MW 2 */
+ 2957 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+ 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2959 "00000100" // /* MW 3 */
+ 2960 "10110111" // /* MW 2 */
+ 2961 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+ 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2963 "01000001" // /* MW 5 */
+ 2964 "10111011" // /* MW 4 */
+ 2965 "10111100" // /* MW 3 */
+ 2966 "11101011" // /* MW 2 */
+ 2967 "01111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+ 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2969 "00000100" // /* MW 5 */
+ 2970 "10011011" // /* MW 4 */
+ 2971 "10110011" // /* MW 3 */
+ 2972 "10111110" // /* MW 2 */
+ 2973 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+ 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 2975 "00000001" // /* MW 5 */
+ 2976 "01000000" // /* MW 4 */
+ 2977 "11111000" // /* MW 3 */
+ 2978 "00000101" // /* MW 2 */
+ 2979 "11001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+.delay_slot
+ 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2981 "01000111" // /* MW 3 */
+ 2982 "10110110" // /* MW 2 */
+ 2983 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+.delay_slot
+ 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2985 "01000100" // /* MW 3 */
+ 2986 "01110001" // /* MW 2 */
+ 2987 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.delay_slot
+ 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2989 "01011101" // /* MW 3 */
+ 2990 "11111100" // /* MW 2 */
+ 2991 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11 first
+.delay_slot
+ 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2993 "01001101" // /* MW 3 */
+ 2994 "11101000" // /* MW 2 */
+ 2995 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.delay_slot
+ 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2997 "00110010" // /* MW 3 */
+ 2998 "10001100" // /* MW 2 */
+ 2999 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+ 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 3001 "00000001" // /* MW 5 */
+ 3002 "01000000" // /* MW 4 */
+ 3003 "11111000" // /* MW 3 */
+ 3004 "00000101" // /* MW 2 */
+ 3005 "11011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3007 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3013 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3015 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */
+ 3017 "00100000" // /* MW 9 */
+ 3018 "00000000" // /* MW 8 */
+ 3019 "00000000" // /* MW 7 */
+ 3020 "10000100" // /* MW 6 */
+ 3021 "00000001" // /* MW 5 */
+ 3022 "00000000" // /* MW 4 */
+ 3023 "00000000" // /* MW 3 */
+ 3024 "00101111" // /* MW 2 */
+ 3025 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3027 "01011000" // /* MW 9 */
+ 3028 "00001100" // /* MW 8 */
+ 3029 "10001000" // /* MW 7 */
+ 3030 "10101011" // /* MW 6 */
+ 3031 "01010111" // /* MW 5 */
+ 3032 "00111110" // /* MW 4 */
+ 3033 "00000000" // /* MW 3 */
+ 3034 "00011010" // /* MW 2 */
+ 3035 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3037 "01000001" // /* MW 5 */
+ 3038 "00100000" // /* MW 4 */
+ 3039 "00100001" // /* MW 3 */
+ 3040 "01000010" // /* MW 2 */
+ 3041 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.delay_slot
+ 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3043 "00001101" // /* MW 3 */
+ 3044 "00011010" // /* MW 2 */
+ 3045 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.delay_slot
+ 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3047 "00111101" // /* MW 3 */
+ 3048 "00001110" // /* MW 2 */
+ 3049 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3051 "11100010" // /* MW 5 */
+ 3052 "10010001" // /* MW 4 */
+ 3053 "11111111" // /* MW 3 */
+ 3054 "00101100" // /* MW 2 */
+ 3055 "00000000" // /* MW 1 */
+.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3057 "01011000" // /* MW 11 */
+ 3058 "11111100" // /* MW 10 */
+ 3059 "10001111" // /* MW 9 */
+ 3060 "10001000" // /* MW 8 */
+ 3061 "01010000" // /* MW 7 */
+ 3062 "00000001" // /* MW 6 */
+ 3063 "00001011" // /* MW 5 */
+ 3064 "10000010" // /* MW 4 */
+ 3065 "10000001" // /* MW 3 */
+ 3066 "00000010" // /* MW 2 */
+ 3067 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3069 "01011000" // /* MW 9 */
+ 3070 "00001100" // /* MW 8 */
+ 3071 "10001000" // /* MW 7 */
+ 3072 "00001011" // /* MW 6 */
+ 3073 "10100000" // /* MW 5 */
+ 3074 "00000001" // /* MW 4 */
+ 3075 "11100000" // /* MW 3 */
+ 3076 "00011000" // /* MW 2 */
+ 3077 "00100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3079 "01011000" // /* MW 9 */
+ 3080 "00000001" // /* MW 8 */
+ 3081 "11101000" // /* MW 7 */
+ 3082 "10101001" // /* MW 6 */
+ 3083 "01010111" // /* MW 5 */
+ 3084 "00111110" // /* MW 4 */
+ 3085 "00000000" // /* MW 3 */
+ 3086 "00000010" // /* MW 2 */
+ 3087 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+ 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 3089 "00000000" // /* MW 15 */
+ 3090 "00000000" // /* MW 14 */
+ 3091 "01011000" // /* MW 13 */
+ 3092 "00000011" // /* MW 12 */
+ 3093 "10101000" // /* MW 11 */
+ 3094 "11101001" // /* MW 10 */
+ 3095 "01110001" // /* MW 9 */
+ 3096 "00000000" // /* MW 8 */
+ 3097 "01011011" // /* MW 7 */
+ 3098 "00000001" // /* MW 6 */
+ 3099 "00100000" // /* MW 5 */
+ 3100 "00000000" // /* MW 4 */
+ 3101 "11110000" // /* MW 3 */
+ 3102 "00101100" // /* MW 2 */
+ 3103 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.src_ref 2 "conv2d_bf16_params.h" 495 68 first
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+ 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3105 "01011000" // /* MW 9 */
+ 3106 "00111100" // /* MW 8 */
+ 3107 "00000000" // /* MW 7 */
+ 3108 "00111100" // /* MW 6 */
+ 3109 "10110011" // /* MW 5 */
+ 3110 "00011011" // /* MW 4 */
+ 3111 "01010000" // /* MW 3 */
+ 3112 "11000101" // /* MW 2 */
+ 3113 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24 first
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+ 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3115 "01011000" // /* MW 9 */
+ 3116 "11001101" // /* MW 8 */
+ 3117 "10000111" // /* MW 7 */
+ 3118 "00010010" // /* MW 6 */
+ 3119 "00101101" // /* MW 5 */
+ 3120 "00000011" // /* MW 4 */
+ 3121 "01010000" // /* MW 3 */
+ 3122 "00000101" // /* MW 2 */
+ 3123 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18 first
+.src_ref 2 "conv2d_bf16_params.h" 496 68
+.src_ref 2 "conv2d_bf16_params.h" 504 35
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 578 47
+ 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3125 "01011000" // /* MW 9 */
+ 3126 "00110111" // /* MW 8 */
+ 3127 "10000000" // /* MW 7 */
+ 3128 "10010001" // /* MW 6 */
+ 3129 "11011010" // /* MW 5 */
+ 3130 "00111011" // /* MW 4 */
+ 3131 "00000000" // /* MW 3 */
+ 3132 "01010111" // /* MW 2 */
+ 3133 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.src_ref 2 "conv2d_bf16_params.h" 504 45 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+.src_ref 2 "conv2d_bf16_params.h" 519 42
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+ 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3135 "01011000" // /* MW 9 */
+ 3136 "10111100" // /* MW 8 */
+ 3137 "00000111" // /* MW 7 */
+ 3138 "00111101" // /* MW 6 */
+ 3139 "10110000" // /* MW 5 */
+ 3140 "00101011" // /* MW 4 */
+ 3141 "00000000" // /* MW 3 */
+ 3142 "00000011" // /* MW 2 */
+ 3143 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 492 25 first
+.src_ref 2 "conv2d_bf16_params.h" 497 46
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+ 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3145 "01011000" // /* MW 9 */
+ 3146 "01110000" // /* MW 8 */
+ 3147 "10000000" // /* MW 7 */
+ 3148 "01101100" // /* MW 6 */
+ 3149 "01101100" // /* MW 5 */
+ 3150 "00011111" // /* MW 4 */
+ 3151 "00000000" // /* MW 3 */
+ 3152 "00010000" // /* MW 2 */
+ 3153 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 520 34 first
+ 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3155 "01011101" // /* MW 5 */
+ 3156 "00011110" // /* MW 4 */
+ 3157 "00001000" // /* MW 3 */
+ 3158 "10010010" // /* MW 2 */
+ 3159 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+ 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3161 "01011001" // /* MW 9 */
+ 3162 "00110001" // /* MW 8 */
+ 3163 "10000000" // /* MW 7 */
+ 3164 "01101111" // /* MW 6 */
+ 3165 "01100001" // /* MW 5 */
+ 3166 "00101101" // /* MW 4 */
+ 3167 "10110000" // /* MW 3 */
+ 3168 "01011010" // /* MW 2 */
+ 3169 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+.src_ref 2 "conv2d_bf16_params.h" 507 42 first
+ 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3171 "00000101" // /* MW 5 */
+ 3172 "00011111" // /* MW 4 */
+ 3173 "00111100" // /* MW 3 */
+ 3174 "10111010" // /* MW 2 */
+ 3175 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 99 first
+ 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3177 "00010001" // /* MW 3 */
+ 3178 "11000010" // /* MW 2 */
+ 3179 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 610 64
+.src_ref 2 "conv2d_bf16_params.h" 709 96
+ 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3181 "00011101" // /* MW 5 */
+ 3182 "10100000" // /* MW 4 */
+ 3183 "11110000" // /* MW 3 */
+ 3184 "11000011" // /* MW 2 */
+ 3185 "10001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+ 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3187 "00100001" // /* MW 3 */
+ 3188 "10100011" // /* MW 2 */
+ 3189 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96 first
+ 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3191 "00011101" // /* MW 3 */
+ 3192 "11111110" // /* MW 2 */
+ 3193 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 506 48
+.src_ref 2 "conv2d_bf16_params.h" 519 42 first
+ 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3195 "01011001" // /* MW 9 */
+ 3196 "01010111" // /* MW 8 */
+ 3197 "10000000" // /* MW 7 */
+ 3198 "11101110" // /* MW 6 */
+ 3199 "11110001" // /* MW 5 */
+ 3200 "00111011" // /* MW 4 */
+ 3201 "00110000" // /* MW 3 */
+ 3202 "01111110" // /* MW 2 */
+ 3203 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 68 first
+.src_ref 2 "conv2d_bf16_params.h" 504 35 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68
+ 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3205 "01011000" // /* MW 9 */
+ 3206 "10110010" // /* MW 8 */
+ 3207 "10000111" // /* MW 7 */
+ 3208 "00111101" // /* MW 6 */
+ 3209 "00110000" // /* MW 5 */
+ 3210 "00101111" // /* MW 4 */
+ 3211 "01010000" // /* MW 3 */
+ 3212 "01010101" // /* MW 2 */
+ 3213 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3215 "01111011" // /* MW 5 */
+ 3216 "11001100" // /* MW 4 */
+ 3217 "10111001" // /* MW 3 */
+ 3218 "01001110" // /* MW 2 */
+ 3219 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53 first
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3221 "01011000" // /* MW 9 */
+ 3222 "11110110" // /* MW 8 */
+ 3223 "00000000" // /* MW 7 */
+ 3224 "00101101" // /* MW 6 */
+ 3225 "01101011" // /* MW 5 */
+ 3226 "00111111" // /* MW 4 */
+ 3227 "11100000" // /* MW 3 */
+ 3228 "01010100" // /* MW 2 */
+ 3229 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 46 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3231 "01011000" // /* MW 9 */
+ 3232 "01010000" // /* MW 8 */
+ 3233 "10000111" // /* MW 7 */
+ 3234 "00010000" // /* MW 6 */
+ 3235 "00111000" // /* MW 5 */
+ 3236 "00100111" // /* MW 4 */
+ 3237 "01010000" // /* MW 3 */
+ 3238 "01000011" // /* MW 2 */
+ 3239 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3241 "01100111" // /* MW 3 */
+ 3242 "11111110" // /* MW 2 */
+ 3243 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3245 "01100111" // /* MW 3 */
+ 3246 "11100000" // /* MW 2 */
+ 3247 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3249 "00000101" // /* MW 3 */
+ 3250 "11110111" // /* MW 2 */
+ 3251 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3253 "01010100" // /* MW 3 */
+ 3254 "11101011" // /* MW 2 */
+ 3255 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3257 "01100001" // /* MW 5 */
+ 3258 "10100000" // /* MW 4 */
+ 3259 "11011000" // /* MW 3 */
+ 3260 "10100011" // /* MW 2 */
+ 3261 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25 first
+.src_ref 2 "conv2d_bf16_params.h" 507 34
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3263 "01001001" // /* MW 9 */
+ 3264 "10000000" // /* MW 8 */
+ 3265 "11001111" // /* MW 7 */
+ 3266 "01101111" // /* MW 6 */
+ 3267 "00101001" // /* MW 5 */
+ 3268 "00011111" // /* MW 4 */
+ 3269 "10110000" // /* MW 3 */
+ 3270 "01000010" // /* MW 2 */
+ 3271 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47 first
+ 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3273 "00111011" // /* MW 5 */
+ 3274 "01000110" // /* MW 4 */
+ 3275 "00111111" // /* MW 3 */
+ 3276 "11101010" // /* MW 2 */
+ 3277 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3279 "01010000" // /* MW 7 */
+ 3280 "10101000" // /* MW 6 */
+ 3281 "00000000" // /* MW 5 */
+ 3282 "00000010" // /* MW 4 */
+ 3283 "00110000" // /* MW 3 */
+ 3284 "01101010" // /* MW 2 */
+ 3285 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77 first
+.src_ref 2 "conv2d_bf16_params.h" 509 19 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3287 "01111000" // /* MW 11 */
+ 3288 "11001110" // /* MW 10 */
+ 3289 "00001101" // /* MW 9 */
+ 3290 "00101100" // /* MW 8 */
+ 3291 "10110000" // /* MW 7 */
+ 3292 "10100111" // /* MW 6 */
+ 3293 "11110101" // /* MW 5 */
+ 3294 "11100111" // /* MW 4 */
+ 3295 "01010111" // /* MW 3 */
+ 3296 "01001001" // /* MW 2 */
+ 3297 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 19 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3299 "00010101" // /* MW 3 */
+ 3300 "11100011" // /* MW 2 */
+ 3301 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3303 "10000001" // /* MW 3 */
+ 3304 "10110111" // /* MW 2 */
+ 3305 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 47 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3307 "10010000" // /* MW 3 */
+ 3308 "10110000" // /* MW 2 */
+ 3309 "00010100" // /* MW 1 */
+ 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3311 "00000000" // /* MW 1 */
+ 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3313 "00000000" // /* MW 1 */
+ 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3315 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 57 first
+ 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3317 "00100001" // /* MW 3 */
+ 3318 "11100101" // /* MW 2 */
+ 3319 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+ 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3321 "01010001" // /* MW 3 */
+ 3322 "11001010" // /* MW 2 */
+ 3323 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 48 first
+ 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3325 "01001010" // /* MW 3 */
+ 3326 "10101010" // /* MW 2 */
+ 3327 "00000010" // /* MW 1 */
+ 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3329 "00000000" // /* MW 1 */
+ 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3331 "00000000" // /* MW 1 */
+ 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3333 "00000000" // /* MW 1 */
+ 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3335 "00000000" // /* MW 1 */
+ 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3337 "00000000" // /* MW 1 */
+ 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3339 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 62
+ 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3341 "11100001" // /* MW 3 */
+ 3342 "10100100" // /* MW 2 */
+ 3343 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+ 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3345 "10111110" // /* MW 3 */
+ 3346 "10100101" // /* MW 2 */
+ 3347 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45 first
+ 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3349 "00101101" // /* MW 3 */
+ 3350 "10100100" // /* MW 2 */
+ 3351 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3353 "00000000" // /* MW 5 */
+ 3354 "10100000" // /* MW 4 */
+ 3355 "00001101" // /* MW 3 */
+ 3356 "00000001" // /* MW 2 */
+ 3357 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3359 "00100000" // /* MW 3 */
+ 3360 "11100101" // /* MW 2 */
+ 3361 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3363 "00000000" // /* MW 5 */
+ 3364 "10100000" // /* MW 4 */
+ 3365 "00001101" // /* MW 3 */
+ 3366 "11111111" // /* MW 2 */
+ 3367 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 642 99
+ 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3369 "11000001" // /* MW 5 */
+ 3370 "00111111" // /* MW 4 */
+ 3371 "10011001" // /* MW 3 */
+ 3372 "11100100" // /* MW 2 */
+ 3373 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 19 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3375 "11100001" // /* MW 5 */
+ 3376 "10111111" // /* MW 4 */
+ 3377 "10111000" // /* MW 3 */
+ 3378 "11100010" // /* MW 2 */
+ 3379 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 512 64 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122 first
+ 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3381 "00111011" // /* MW 5 */
+ 3382 "11001110" // /* MW 4 */
+ 3383 "00111001" // /* MW 3 */
+ 3384 "11101110" // /* MW 2 */
+ 3385 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3387 "00110001" // /* MW 3 */
+ 3388 "10110101" // /* MW 2 */
+ 3389 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3391 "10101101" // /* MW 3 */
+ 3392 "00101001" // /* MW 2 */
+ 3393 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+ 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3395 "01100101" // /* MW 3 */
+ 3396 "10110101" // /* MW 2 */
+ 3397 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 36 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68 first
+ 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3399 "00100000" // /* MW 5 */
+ 3400 "01101001" // /* MW 4 */
+ 3401 "00111111" // /* MW 3 */
+ 3402 "01101010" // /* MW 2 */
+ 3403 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 65 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62 first
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3405 "10101000" // /* MW 9 */
+ 3406 "10101000" // /* MW 8 */
+ 3407 "11001110" // /* MW 7 */
+ 3408 "01101111" // /* MW 6 */
+ 3409 "01001001" // /* MW 5 */
+ 3410 "00110111" // /* MW 4 */
+ 3411 "01010000" // /* MW 3 */
+ 3412 "01100101" // /* MW 2 */
+ 3413 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3415 "11111001" // /* MW 5 */
+ 3416 "10100011" // /* MW 4 */
+ 3417 "10111000" // /* MW 3 */
+ 3418 "10100011" // /* MW 2 */
+ 3419 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 45 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3421 "00011111" // /* MW 5 */
+ 3422 "01101011" // /* MW 4 */
+ 3423 "11101101" // /* MW 3 */
+ 3424 "01100100" // /* MW 2 */
+ 3425 "01000101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3427 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3429 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3431 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3433 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 48 first
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3435 "11001010" // /* MW 5 */
+ 3436 "10110101" // /* MW 4 */
+ 3437 "10111101" // /* MW 3 */
+ 3438 "01011111" // /* MW 2 */
+ 3439 "10000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3441 "00000001" // /* MW 5 */
+ 3442 "01000000" // /* MW 4 */
+ 3443 "11111000" // /* MW 3 */
+ 3444 "00000110" // /* MW 2 */
+ 3445 "11111000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 76 first
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3447 "11110010" // /* MW 5 */
+ 3448 "10111011" // /* MW 4 */
+ 3449 "11101101" // /* MW 3 */
+ 3450 "01000001" // /* MW 2 */
+ 3451 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3453 "01011101" // /* MW 3 */
+ 3454 "11101011" // /* MW 2 */
+ 3455 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93 first
+.delay_slot
+ 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3457 "00010100" // /* MW 3 */
+ 3458 "01100011" // /* MW 2 */
+ 3459 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.src_ref 2 "conv2d_bf16_params.h" 539 139 first
+.src_ref 2 "conv2d_bf16_params.h" 555 59
+.src_ref 2 "conv2d_bf16_params.h" 559 59
+.src_ref 2 "conv2d_bf16_params.h" 700 17
+.delay_slot
+ 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3461 "01011001" // /* MW 9 */
+ 3462 "00000001" // /* MW 8 */
+ 3463 "00101000" // /* MW 7 */
+ 3464 "00111110" // /* MW 6 */
+ 3465 "10111110" // /* MW 5 */
+ 3466 "00001101" // /* MW 4 */
+ 3467 "00110000" // /* MW 3 */
+ 3468 "01000110" // /* MW 2 */
+ 3469 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3471 "10011100" // /* MW 3 */
+ 3472 "10011011" // /* MW 2 */
+ 3473 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3475 "10010001" // /* MW 3 */
+ 3476 "11100011" // /* MW 2 */
+ 3477 "00000111" // /* MW 1 */
+ 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3479 "00000000" // /* MW 1 */
+ 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3481 "00000000" // /* MW 1 */
+ 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3483 "00000000" // /* MW 1 */
+ 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3485 "00000000" // /* MW 1 */
+ 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3487 "00000000" // /* MW 1 */
+ 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3491 "00000001" // /* MW 5 */
+ 3492 "01000000" // /* MW 4 */
+ 3493 "11111000" // /* MW 3 */
+ 3494 "00000110" // /* MW 2 */
+ 3495 "11100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3501 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3503 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3505 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3507 "01011000" // /* MW 9 */
+ 3508 "01000000" // /* MW 8 */
+ 3509 "00101000" // /* MW 7 */
+ 3510 "10001011" // /* MW 6 */
+ 3511 "00010000" // /* MW 5 */
+ 3512 "00000001" // /* MW 4 */
+ 3513 "00000000" // /* MW 3 */
+ 3514 "10111100" // /* MW 2 */
+ 3515 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3517 "11010010" // /* MW 3 */
+ 3518 "01111110" // /* MW 2 */
+ 3519 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3521 "01100111" // /* MW 3 */
+ 3522 "01110110" // /* MW 2 */
+ 3523 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3525 "00000001" // /* MW 5 */
+ 3526 "10100000" // /* MW 4 */
+ 3527 "01001111" // /* MW 3 */
+ 3528 "00111000" // /* MW 2 */
+ 3529 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 46
+ 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3531 "01010000" // /* MW 3 */
+ 3532 "00110010" // /* MW 2 */
+ 3533 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 44
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3535 "11101111" // /* MW 3 */
+ 3536 "01111101" // /* MW 2 */
+ 3537 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3539 "00111001" // /* MW 5 */
+ 3540 "11000100" // /* MW 4 */
+ 3541 "01011101" // /* MW 3 */
+ 3542 "11100011" // /* MW 2 */
+ 3543 "11001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3545 "10000010" // /* MW 3 */
+ 3546 "11100011" // /* MW 2 */
+ 3547 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 79
+ 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3549 "11101111" // /* MW 3 */
+ 3550 "01100011" // /* MW 2 */
+ 3551 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3553 "11000001" // /* MW 3 */
+ 3554 "11111001" // /* MW 2 */
+ 3555 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3557 "11001110" // /* MW 3 */
+ 3558 "01100011" // /* MW 2 */
+ 3559 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 55 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3561 "00011100" // /* MW 7 */
+ 3562 "00000000" // /* MW 6 */
+ 3563 "00000000" // /* MW 5 */
+ 3564 "10000001" // /* MW 4 */
+ 3565 "00010100" // /* MW 3 */
+ 3566 "00100011" // /* MW 2 */
+ 3567 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3569 "01111000" // /* MW 9 */
+ 3570 "00001110" // /* MW 8 */
+ 3571 "01110000" // /* MW 7 */
+ 3572 "11101011" // /* MW 6 */
+ 3573 "11000111" // /* MW 5 */
+ 3574 "00111111" // /* MW 4 */
+ 3575 "00000000" // /* MW 3 */
+ 3576 "00011001" // /* MW 2 */
+ 3577 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63 first
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3579 "11000010" // /* MW 3 */
+ 3580 "01111111" // /* MW 2 */
+ 3581 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 34 first
+.src_ref 2 "conv2d_bf16_params.h" 641 32 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3583 "10101000" // /* MW 9 */
+ 3584 "01110100" // /* MW 8 */
+ 3585 "01001111" // /* MW 7 */
+ 3586 "10000011" // /* MW 6 */
+ 3587 "00000100" // /* MW 5 */
+ 3588 "00100001" // /* MW 4 */
+ 3589 "00100000" // /* MW 3 */
+ 3590 "01101110" // /* MW 2 */
+ 3591 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 61 first
+.src_ref 2 "conv2d_bf16_params.h" 640 16
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3593 "01011000" // /* MW 9 */
+ 3594 "00001001" // /* MW 8 */
+ 3595 "10101000" // /* MW 7 */
+ 3596 "10000011" // /* MW 6 */
+ 3597 "01000100" // /* MW 5 */
+ 3598 "00101001" // /* MW 4 */
+ 3599 "00000000" // /* MW 3 */
+ 3600 "00011110" // /* MW 2 */
+ 3601 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3603 "11100010" // /* MW 3 */
+ 3604 "01110011" // /* MW 2 */
+ 3605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 47 first
+ 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3607 "10001000" // /* MW 3 */
+ 3608 "11111001" // /* MW 2 */
+ 3609 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 640 16 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3611 "00111101" // /* MW 3 */
+ 3612 "01111011" // /* MW 2 */
+ 3613 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3615 "00010000" // /* MW 9 */
+ 3616 "00000100" // /* MW 8 */
+ 3617 "00001010" // /* MW 7 */
+ 3618 "00000011" // /* MW 6 */
+ 3619 "00000000" // /* MW 5 */
+ 3620 "00000000" // /* MW 4 */
+ 3621 "00100000" // /* MW 3 */
+ 3622 "11011110" // /* MW 2 */
+ 3623 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 641 44 first
+.src_ref 2 "conv2d_bf16_params.h" 642 45 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3625 "11111111" // /* MW 5 */
+ 3626 "00111010" // /* MW 4 */
+ 3627 "10111111" // /* MW 3 */
+ 3628 "11100111" // /* MW 2 */
+ 3629 "11001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3631 "11100110" // /* MW 3 */
+ 3632 "11001111" // /* MW 2 */
+ 3633 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 55 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3635 "00101001" // /* MW 5 */
+ 3636 "10101000" // /* MW 4 */
+ 3637 "00001011" // /* MW 3 */
+ 3638 "11010010" // /* MW 2 */
+ 3639 "10110100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3641 "00000001" // /* MW 5 */
+ 3642 "00100001" // /* MW 4 */
+ 3643 "01001101" // /* MW 3 */
+ 3644 "10110000" // /* MW 2 */
+ 3645 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3647 "00111001" // /* MW 5 */
+ 3648 "11000010" // /* MW 4 */
+ 3649 "00011101" // /* MW 3 */
+ 3650 "10110101" // /* MW 2 */
+ 3651 "00110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 99 first
+ 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3653 "00100100" // /* MW 3 */
+ 3654 "11001111" // /* MW 2 */
+ 3655 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3657 "01000001" // /* MW 5 */
+ 3658 "10100110" // /* MW 4 */
+ 3659 "01001101" // /* MW 3 */
+ 3660 "11011110" // /* MW 2 */
+ 3661 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3663 "01111101" // /* MW 5 */
+ 3664 "00100000" // /* MW 4 */
+ 3665 "01001001" // /* MW 3 */
+ 3666 "00001000" // /* MW 2 */
+ 3667 "00101001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119 first
+ 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3669 "00100100" // /* MW 3 */
+ 3670 "11101111" // /* MW 2 */
+ 3671 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 15 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3673 "01011000" // /* MW 9 */
+ 3674 "01110000" // /* MW 8 */
+ 3675 "01001111" // /* MW 7 */
+ 3676 "01101110" // /* MW 6 */
+ 3677 "01000010" // /* MW 5 */
+ 3678 "00100000" // /* MW 4 */
+ 3679 "00000000" // /* MW 3 */
+ 3680 "00011110" // /* MW 2 */
+ 3681 "11011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3683 "00100010" // /* MW 3 */
+ 3684 "10111101" // /* MW 2 */
+ 3685 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 85 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3687 "01011000" // /* MW 9 */
+ 3688 "00100000" // /* MW 8 */
+ 3689 "00001001" // /* MW 7 */
+ 3690 "11111110" // /* MW 6 */
+ 3691 "10101001" // /* MW 5 */
+ 3692 "00101111" // /* MW 4 */
+ 3693 "00000000" // /* MW 3 */
+ 3694 "00000101" // /* MW 2 */
+ 3695 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3697 "01010010" // /* MW 3 */
+ 3698 "00100000" // /* MW 2 */
+ 3699 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 559 59 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3701 "11110010" // /* MW 5 */
+ 3702 "10111101" // /* MW 4 */
+ 3703 "11111101" // /* MW 3 */
+ 3704 "00001001" // /* MW 2 */
+ 3705 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 41 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3707 "00111001" // /* MW 5 */
+ 3708 "11000100" // /* MW 4 */
+ 3709 "10111101" // /* MW 3 */
+ 3710 "00111111" // /* MW 2 */
+ 3711 "10000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 117 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3713 "01011111" // /* MW 5 */
+ 3714 "01101011" // /* MW 4 */
+ 3715 "10110111" // /* MW 3 */
+ 3716 "11101110" // /* MW 2 */
+ 3717 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+ 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3719 "00110010" // /* MW 3 */
+ 3720 "10000100" // /* MW 2 */
+ 3721 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 52 first
+ 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3723 "00001100" // /* MW 3 */
+ 3724 "01111110" // /* MW 2 */
+ 3725 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 92 first
+ 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3727 "10001111" // /* MW 3 */
+ 3728 "00110001" // /* MW 2 */
+ 3729 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 36 first
+ 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3731 "11000101" // /* MW 3 */
+ 3732 "11110111" // /* MW 2 */
+ 3733 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 64 first
+.src_ref 2 "conv2d_bf16_params.h" 611 47
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 629 82
+ 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3735 "01011000" // /* MW 11 */
+ 3736 "00000000" // /* MW 10 */
+ 3737 "10001001" // /* MW 9 */
+ 3738 "11101110" // /* MW 8 */
+ 3739 "11000000" // /* MW 7 */
+ 3740 "10110111" // /* MW 6 */
+ 3741 "10010101" // /* MW 5 */
+ 3742 "11101110" // /* MW 4 */
+ 3743 "00000111" // /* MW 3 */
+ 3744 "00000011" // /* MW 2 */
+ 3745 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+ 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3747 "00111001" // /* MW 5 */
+ 3748 "10110111" // /* MW 4 */
+ 3749 "01000000" // /* MW 3 */
+ 3750 "00101000" // /* MW 2 */
+ 3751 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3753 "00001100" // /* MW 5 */
+ 3754 "10101100" // /* MW 4 */
+ 3755 "00001111" // /* MW 3 */
+ 3756 "00000000" // /* MW 2 */
+ 3757 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 60 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+ 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3759 "11001001" // /* MW 9 */
+ 3760 "00111111" // /* MW 8 */
+ 3761 "10001001" // /* MW 7 */
+ 3762 "00111100" // /* MW 6 */
+ 3763 "10110000" // /* MW 5 */
+ 3764 "00011111" // /* MW 4 */
+ 3765 "10110000" // /* MW 3 */
+ 3766 "00010010" // /* MW 2 */
+ 3767 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 53
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 555 59 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3769 "11001000" // /* MW 11 */
+ 3770 "01111111" // /* MW 10 */
+ 3771 "11001100" // /* MW 9 */
+ 3772 "10010010" // /* MW 8 */
+ 3773 "11111111" // /* MW 7 */
+ 3774 "10101101" // /* MW 6 */
+ 3775 "10010001" // /* MW 5 */
+ 3776 "00011100" // /* MW 4 */
+ 3777 "10000010" // /* MW 3 */
+ 3778 "10001100" // /* MW 2 */
+ 3779 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 240 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3781 "01111001" // /* MW 9 */
+ 3782 "10001110" // /* MW 8 */
+ 3783 "01110000" // /* MW 7 */
+ 3784 "11101111" // /* MW 6 */
+ 3785 "01010111" // /* MW 5 */
+ 3786 "00101011" // /* MW 4 */
+ 3787 "00110000" // /* MW 3 */
+ 3788 "01011010" // /* MW 2 */
+ 3789 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 53 first
+.src_ref 2 "conv2d_bf16_params.h" 559 53
+.src_ref 2 "conv2d_bf16_params.h" 621 140
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3791 "01011000" // /* MW 11 */
+ 3792 "01011000" // /* MW 10 */
+ 3793 "00000000" // /* MW 9 */
+ 3794 "00001110" // /* MW 8 */
+ 3795 "01001110" // /* MW 7 */
+ 3796 "10101001" // /* MW 6 */
+ 3797 "01010001" // /* MW 5 */
+ 3798 "00011111" // /* MW 4 */
+ 3799 "00000010" // /* MW 3 */
+ 3800 "11011001" // /* MW 2 */
+ 3801 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 53 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3803 "00100100" // /* MW 5 */
+ 3804 "11100011" // /* MW 4 */
+ 3805 "00111111" // /* MW 3 */
+ 3806 "01100010" // /* MW 2 */
+ 3807 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 47 first
+.src_ref 2 "conv2d_bf16_params.h" 621 222
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3809 "01111000" // /* MW 11 */
+ 3810 "10010000" // /* MW 10 */
+ 3811 "01101001" // /* MW 9 */
+ 3812 "00001111" // /* MW 8 */
+ 3813 "11001110" // /* MW 7 */
+ 3814 "10101011" // /* MW 6 */
+ 3815 "10010001" // /* MW 5 */
+ 3816 "11101111" // /* MW 4 */
+ 3817 "00100010" // /* MW 3 */
+ 3818 "01101110" // /* MW 2 */
+ 3819 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 661 61
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3821 "11001000" // /* MW 9 */
+ 3822 "11111111" // /* MW 8 */
+ 3823 "10001100" // /* MW 7 */
+ 3824 "00010010" // /* MW 6 */
+ 3825 "11001110" // /* MW 5 */
+ 3826 "00101001" // /* MW 4 */
+ 3827 "00000000" // /* MW 3 */
+ 3828 "11110011" // /* MW 2 */
+ 3829 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 710 60
+.src_ref 2 "conv2d_bf16_params.h" 710 65
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3831 "01111000" // /* MW 9 */
+ 3832 "10001110" // /* MW 8 */
+ 3833 "01110000" // /* MW 7 */
+ 3834 "01110011" // /* MW 6 */
+ 3835 "11101010" // /* MW 5 */
+ 3836 "00111011" // /* MW 4 */
+ 3837 "00000000" // /* MW 3 */
+ 3838 "00011101" // /* MW 2 */
+ 3839 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3841 "01000100" // /* MW 5 */
+ 3842 "11001010" // /* MW 4 */
+ 3843 "00101110" // /* MW 3 */
+ 3844 "11101110" // /* MW 2 */
+ 3845 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 649 41 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3847 "01111000" // /* MW 9 */
+ 3848 "10010000" // /* MW 8 */
+ 3849 "01101001" // /* MW 7 */
+ 3850 "10010011" // /* MW 6 */
+ 3851 "00111001" // /* MW 5 */
+ 3852 "00111111" // /* MW 4 */
+ 3853 "00000000" // /* MW 3 */
+ 3854 "00011111" // /* MW 2 */
+ 3855 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3857 "00100010" // /* MW 3 */
+ 3858 "11000100" // /* MW 2 */
+ 3859 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 82 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3861 "01010001" // /* MW 3 */
+ 3862 "11101011" // /* MW 2 */
+ 3863 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 611 47 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3865 "01011001" // /* MW 9 */
+ 3866 "11000000" // /* MW 8 */
+ 3867 "01101111" // /* MW 7 */
+ 3868 "10010000" // /* MW 6 */
+ 3869 "00100111" // /* MW 5 */
+ 3870 "00000100" // /* MW 4 */
+ 3871 "00110000" // /* MW 3 */
+ 3872 "10001110" // /* MW 2 */
+ 3873 "01000111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3875 "00110010" // /* MW 3 */
+ 3876 "00111000" // /* MW 2 */
+ 3877 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 643 22 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3879 "01111111" // /* MW 3 */
+ 3880 "11111110" // /* MW 2 */
+ 3881 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3883 "01100100" // /* MW 5 */
+ 3884 "00001100" // /* MW 4 */
+ 3885 "00101110" // /* MW 3 */
+ 3886 "11000110" // /* MW 2 */
+ 3887 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 47 first
+.src_ref 2 "conv2d_bf16_params.h" 629 45
+.src_ref 2 "conv2d_bf16_params.h" 684 30 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3889 "01011001" // /* MW 9 */
+ 3890 "00101000" // /* MW 8 */
+ 3891 "10000000" // /* MW 7 */
+ 3892 "01111100" // /* MW 6 */
+ 3893 "00101001" // /* MW 5 */
+ 3894 "00110101" // /* MW 4 */
+ 3895 "00110000" // /* MW 3 */
+ 3896 "10001110" // /* MW 2 */
+ 3897 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 45 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3899 "11100100" // /* MW 5 */
+ 3900 "00001101" // /* MW 4 */
+ 3901 "00110001" // /* MW 3 */
+ 3902 "01010110" // /* MW 2 */
+ 3903 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 644 22
+.src_ref 2 "conv2d_bf16_params.h" 700 17 first
+.src_ref 2 "conv2d_bf16_params.h" 705 50
+.src_ref 2 "conv2d_bf16_params.h" 705 61
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3905 "10101000" // /* MW 9 */
+ 3906 "11111100" // /* MW 8 */
+ 3907 "10101001" // /* MW 7 */
+ 3908 "11111110" // /* MW 6 */
+ 3909 "00111000" // /* MW 5 */
+ 3910 "00000110" // /* MW 4 */
+ 3911 "00100000" // /* MW 3 */
+ 3912 "00000010" // /* MW 2 */
+ 3913 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 38 first
+.src_ref 2 "conv2d_bf16_params.h" 700 111
+.src_ref 2 "conv2d_bf16_params.h" 700 149
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3915 "00000110" // /* MW 9 */
+ 3916 "00000110" // /* MW 8 */
+ 3917 "00000101" // /* MW 7 */
+ 3918 "10000000" // /* MW 6 */
+ 3919 "00010001" // /* MW 5 */
+ 3920 "00011111" // /* MW 4 */
+ 3921 "00100010" // /* MW 3 */
+ 3922 "11000110" // /* MW 2 */
+ 3923 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14
+.src_ref 2 "conv2d_bf16_params.h" 649 38 first
+.src_ref 2 "conv2d_bf16_params.h" 674 24
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3925 "00010001" // /* MW 9 */
+ 3926 "11111000" // /* MW 8 */
+ 3927 "01101111" // /* MW 7 */
+ 3928 "00111110" // /* MW 6 */
+ 3929 "00000000" // /* MW 5 */
+ 3930 "00000000" // /* MW 4 */
+ 3931 "00110000" // /* MW 3 */
+ 3932 "11001110" // /* MW 2 */
+ 3933 "01001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14 first
+.src_ref 2 "conv2d_bf16_params.h" 662 61
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3935 "11001001" // /* MW 9 */
+ 3936 "10111111" // /* MW 8 */
+ 3937 "01001011" // /* MW 7 */
+ 3938 "10100100" // /* MW 6 */
+ 3939 "01001001" // /* MW 5 */
+ 3940 "00111111" // /* MW 4 */
+ 3941 "00110000" // /* MW 3 */
+ 3942 "11010010" // /* MW 2 */
+ 3943 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 663 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3945 "10011100" // /* MW 5 */
+ 3946 "01010110" // /* MW 4 */
+ 3947 "00110001" // /* MW 3 */
+ 3948 "11000110" // /* MW 2 */
+ 3949 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+ 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3951 "10000001" // /* MW 5 */
+ 3952 "01111010" // /* MW 4 */
+ 3953 "00111111" // /* MW 3 */
+ 3954 "10001010" // /* MW 2 */
+ 3955 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3957 "11100011" // /* MW 5 */
+ 3958 "01110011" // /* MW 4 */
+ 3959 "00111000" // /* MW 3 */
+ 3960 "11111010" // /* MW 2 */
+ 3961 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3963 "01011001" // /* MW 9 */
+ 3964 "00000000" // /* MW 8 */
+ 3965 "01100000" // /* MW 7 */
+ 3966 "00110000" // /* MW 6 */
+ 3967 "11111000" // /* MW 5 */
+ 3968 "00101101" // /* MW 4 */
+ 3969 "00110000" // /* MW 3 */
+ 3970 "11010110" // /* MW 2 */
+ 3971 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3973 "11001001" // /* MW 9 */
+ 3974 "01111111" // /* MW 8 */
+ 3975 "00101100" // /* MW 7 */
+ 3976 "01111110" // /* MW 6 */
+ 3977 "00100000" // /* MW 5 */
+ 3978 "00111110" // /* MW 4 */
+ 3979 "00110000" // /* MW 3 */
+ 3980 "10001100" // /* MW 2 */
+ 3981 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 705 50 first
+.src_ref 2 "conv2d_bf16_params.h" 705 61 first
+ 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3983 "00001100" // /* MW 5 */
+ 3984 "10111000" // /* MW 4 */
+ 3985 "00111000" // /* MW 3 */
+ 3986 "10001100" // /* MW 2 */
+ 3987 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10
+.src_ref 2 "conv2d_bf16_params.h" 674 24 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.src_ref 2 "conv2d_bf16_params.h" 720 50
+ 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3989 "01011001" // /* MW 9 */
+ 3990 "00000000" // /* MW 8 */
+ 3991 "01001000" // /* MW 7 */
+ 3992 "00100100" // /* MW 6 */
+ 3993 "00000001" // /* MW 5 */
+ 3994 "00100111" // /* MW 4 */
+ 3995 "00110000" // /* MW 3 */
+ 3996 "11011010" // /* MW 2 */
+ 3997 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3999 "01111001" // /* MW 9 */
+ 4000 "00001110" // /* MW 8 */
+ 4001 "01110000" // /* MW 7 */
+ 4002 "10001111" // /* MW 6 */
+ 4003 "00011111" // /* MW 5 */
+ 4004 "00000101" // /* MW 4 */
+ 4005 "00110000" // /* MW 3 */
+ 4006 "11110010" // /* MW 2 */
+ 4007 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 707 61 first
+ 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4009 "11011111" // /* MW 5 */
+ 4010 "10111001" // /* MW 4 */
+ 4011 "00111011" // /* MW 3 */
+ 4012 "10010010" // /* MW 2 */
+ 4013 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 674 22 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4015 "01011001" // /* MW 9 */
+ 4016 "00000110" // /* MW 8 */
+ 4017 "00001000" // /* MW 7 */
+ 4018 "10001100" // /* MW 6 */
+ 4019 "00001111" // /* MW 5 */
+ 4020 "00100001" // /* MW 4 */
+ 4021 "00110000" // /* MW 3 */
+ 4022 "11000110" // /* MW 2 */
+ 4023 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4025 "01111000" // /* MW 11 */
+ 4026 "10010000" // /* MW 10 */
+ 4027 "01101001" // /* MW 9 */
+ 4028 "00010011" // /* MW 8 */
+ 4029 "00000000" // /* MW 7 */
+ 4030 "10011011" // /* MW 6 */
+ 4031 "00010001" // /* MW 5 */
+ 4032 "00011110" // /* MW 4 */
+ 4033 "00000010" // /* MW 3 */
+ 4034 "00000000" // /* MW 2 */
+ 4035 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4037 "10100100" // /* MW 5 */
+ 4038 "00010100" // /* MW 4 */
+ 4039 "00100000" // /* MW 3 */
+ 4040 "00010110" // /* MW 2 */
+ 4041 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 691 56 first
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4043 "10101111" // /* MW 3 */
+ 4044 "01100011" // /* MW 2 */
+ 4045 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 709 71 first
+ 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4047 "01011001" // /* MW 9 */
+ 4048 "11001000" // /* MW 8 */
+ 4049 "00000111" // /* MW 7 */
+ 4050 "01101101" // /* MW 6 */
+ 4051 "00001000" // /* MW 5 */
+ 4052 "00000111" // /* MW 4 */
+ 4053 "00110000" // /* MW 3 */
+ 4054 "10001100" // /* MW 2 */
+ 4055 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 706 23 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4057 "11001000" // /* MW 11 */
+ 4058 "11000001" // /* MW 10 */
+ 4059 "10101000" // /* MW 9 */
+ 4060 "11101101" // /* MW 8 */
+ 4061 "11110111" // /* MW 7 */
+ 4062 "10100000" // /* MW 6 */
+ 4063 "01100001" // /* MW 5 */
+ 4064 "01001000" // /* MW 4 */
+ 4065 "00000010" // /* MW 3 */
+ 4066 "01100011" // /* MW 2 */
+ 4067 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 682 38 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4069 "01111011" // /* MW 5 */
+ 4070 "11000000" // /* MW 4 */
+ 4071 "00110110" // /* MW 3 */
+ 4072 "00001010" // /* MW 2 */
+ 4073 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+ 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4075 "01000001" // /* MW 5 */
+ 4076 "10001110" // /* MW 4 */
+ 4077 "00111000" // /* MW 3 */
+ 4078 "11011010" // /* MW 2 */
+ 4079 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+ 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4081 "10011100" // /* MW 5 */
+ 4082 "11001000" // /* MW 4 */
+ 4083 "00111000" // /* MW 3 */
+ 4084 "11001010" // /* MW 2 */
+ 4085 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4087 "11011011" // /* MW 5 */
+ 4088 "10010100" // /* MW 4 */
+ 4089 "00110010" // /* MW 3 */
+ 4090 "10010010" // /* MW 2 */
+ 4091 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 706 28 first
+ 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4093 "01011001" // /* MW 9 */
+ 4094 "11111101" // /* MW 8 */
+ 4095 "00001111" // /* MW 7 */
+ 4096 "00000100" // /* MW 6 */
+ 4097 "00111000" // /* MW 5 */
+ 4098 "00011010" // /* MW 4 */
+ 4099 "00110000" // /* MW 3 */
+ 4100 "10001110" // /* MW 2 */
+ 4101 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4103 "00001110" // /* MW 3 */
+ 4104 "11000000" // /* MW 2 */
+ 4105 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 707 66 first
+ 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4107 "00011111" // /* MW 5 */
+ 4108 "00010000" // /* MW 4 */
+ 4109 "00110111" // /* MW 3 */
+ 4110 "11001010" // /* MW 2 */
+ 4111 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 709 96 first
+ 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4113 "00111011" // /* MW 5 */
+ 4114 "00001100" // /* MW 4 */
+ 4115 "00110000" // /* MW 3 */
+ 4116 "10001100" // /* MW 2 */
+ 4117 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 709 90
+ 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4119 "00110001" // /* MW 9 */
+ 4120 "11000110" // /* MW 8 */
+ 4121 "00000011" // /* MW 7 */
+ 4122 "10000000" // /* MW 6 */
+ 4123 "01100001" // /* MW 5 */
+ 4124 "00011100" // /* MW 4 */
+ 4125 "00100010" // /* MW 3 */
+ 4126 "10110110" // /* MW 2 */
+ 4127 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 707 50 first
+.src_ref 2 "conv2d_bf16_params.h" 708 59
+.src_ref 2 "conv2d_bf16_params.h" 710 60 first
+.src_ref 2 "conv2d_bf16_params.h" 710 65 first
+ 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4129 "11001000" // /* MW 11 */
+ 4130 "00111111" // /* MW 10 */
+ 4131 "00101000" // /* MW 9 */
+ 4132 "00110000" // /* MW 8 */
+ 4133 "01110000" // /* MW 7 */
+ 4134 "10111010" // /* MW 6 */
+ 4135 "10010001" // /* MW 5 */
+ 4136 "00011100" // /* MW 4 */
+ 4137 "00100010" // /* MW 3 */
+ 4138 "00111010" // /* MW 2 */
+ 4139 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 708 48 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4141 "10101111" // /* MW 9 */
+ 4142 "01000001" // /* MW 8 */
+ 4143 "00000001" // /* MW 7 */
+ 4144 "10000000" // /* MW 6 */
+ 4145 "00110001" // /* MW 5 */
+ 4146 "00011100" // /* MW 4 */
+ 4147 "00100010" // /* MW 3 */
+ 4148 "10111110" // /* MW 2 */
+ 4149 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 709 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+ 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4151 "00000000" // /* MW 5 */
+ 4152 "01010000" // /* MW 4 */
+ 4153 "00110000" // /* MW 3 */
+ 4154 "10001110" // /* MW 2 */
+ 4155 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 710 50 first
+.delay_slot
+ 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4157 "11110001" // /* MW 3 */
+ 4158 "01011100" // /* MW 2 */
+ 4159 "00001010" // /* MW 1 */
+.delay_slot
+ 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4161 "00010001" // /* MW 3 */
+ 4162 "00011100" // /* MW 2 */
+ 4163 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48 first
+.delay_slot
+ 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4165 "01010001" // /* MW 3 */
+ 4166 "00011100" // /* MW 2 */
+ 4167 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.delay_slot
+ 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4169 "01010001" // /* MW 3 */
+ 4170 "00000100" // /* MW 2 */
+ 4171 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 720 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+.delay_slot
+ 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4173 "01110001" // /* MW 9 */
+ 4174 "00000000" // /* MW 8 */
+ 4175 "00000000" // /* MW 7 */
+ 4176 "00000000" // /* MW 6 */
+ 4177 "11111110" // /* MW 5 */
+ 4178 "00111111" // /* MW 4 */
+ 4179 "00110000" // /* MW 3 */
+ 4180 "10001010" // /* MW 2 */
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0
+ 4181 "01000010" // /* MW 1 */
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 689 first
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 704 12
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.function_start
+ 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4193 "01111000" // /* MW 11 */
+ 4194 "01100000" // /* MW 10 */
+ 4195 "00001010" // /* MW 9 */
+ 4196 "00001000" // /* MW 8 */
+ 4197 "10000000" // /* MW 7 */
+ 4198 "00000001" // /* MW 6 */
+ 4199 "10001011" // /* MW 5 */
+ 4200 "10000100" // /* MW 4 */
+ 4201 "10000010" // /* MW 3 */
+ 4202 "00000011" // /* MW 2 */
+ 4203 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 526 11
+.src_ref 2 "conv2d_bf16.h" 698 28 first
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+ 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4205 "01100000" // /* MW 13 */
+ 4206 "00001001" // /* MW 12 */
+ 4207 "00100000" // /* MW 11 */
+ 4208 "00100001" // /* MW 10 */
+ 4209 "00000000" // /* MW 9 */
+ 4210 "00110110" // /* MW 8 */
+ 4211 "00000001" // /* MW 7 */
+ 4212 "00110100" // /* MW 6 */
+ 4213 "00101000" // /* MW 5 */
+ 4214 "00101000" // /* MW 4 */
+ 4215 "10001000" // /* MW 3 */
+ 4216 "00000110" // /* MW 2 */
+ 4217 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4219 "00010000" // /* MW 9 */
+ 4220 "00110100" // /* MW 8 */
+ 4221 "00110010" // /* MW 7 */
+ 4222 "11110010" // /* MW 6 */
+ 4223 "00000001" // /* MW 5 */
+ 4224 "00000000" // /* MW 4 */
+ 4225 "11010000" // /* MW 3 */
+ 4226 "10010100" // /* MW 2 */
+ 4227 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 43
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+ 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4229 "00010000" // /* MW 9 */
+ 4230 "01111000" // /* MW 8 */
+ 4231 "01111000" // /* MW 7 */
+ 4232 "00000100" // /* MW 6 */
+ 4233 "00000000" // /* MW 5 */
+ 4234 "00000000" // /* MW 4 */
+ 4235 "11010000" // /* MW 3 */
+ 4236 "10010000" // /* MW 2 */
+ 4237 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 699 43 first
+.src_ref 2 "conv2d_bf16.h" 702 4
+ 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4239 "00010000" // /* MW 9 */
+ 4240 "10010000" // /* MW 8 */
+ 4241 "10111000" // /* MW 7 */
+ 4242 "00000101" // /* MW 6 */
+ 4243 "00000000" // /* MW 5 */
+ 4244 "00000000" // /* MW 4 */
+ 4245 "11010000" // /* MW 3 */
+ 4246 "10000000" // /* MW 2 */
+ 4247 "01100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 702 37 first
+ 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4249 "00000001" // /* MW 5 */
+ 4250 "00000000" // /* MW 4 */
+ 4251 "11010001" // /* MW 3 */
+ 4252 "10000010" // /* MW 2 */
+ 4253 "01111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4255 "00100010" // /* MW 3 */
+ 4256 "00000100" // /* MW 2 */
+ 4257 "00000100" // /* MW 1 */
+ 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4259 "00000000" // /* MW 1 */
+ 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4261 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+ 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4263 "00000001" // /* MW 5 */
+ 4264 "10000101" // /* MW 4 */
+ 4265 "10000000" // /* MW 3 */
+ 4266 "00001010" // /* MW 2 */
+ 4267 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+ 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4269 "00010100" // /* MW 3 */
+ 4270 "00110000" // /* MW 2 */
+ 4271 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4273 "00010100" // /* MW 3 */
+ 4274 "00010100" // /* MW 2 */
+ 4275 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4277 "11111101" // /* MW 5 */
+ 4278 "11100000" // /* MW 4 */
+ 4279 "10001010" // /* MW 3 */
+ 4280 "00001010" // /* MW 2 */
+ 4281 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4283 "00000000" // /* MW 5 */
+ 4284 "11110101" // /* MW 4 */
+ 4285 "10000000" // /* MW 3 */
+ 4286 "00000010" // /* MW 2 */
+ 4287 "11000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4289 "00010100" // /* MW 3 */
+ 4290 "00010100" // /* MW 2 */
+ 4291 "00111100" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4293 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4295 "01111110" // /* MW 9 */
+ 4296 "10100101" // /* MW 8 */
+ 4297 "00000001" // /* MW 7 */
+ 4298 "00000000" // /* MW 6 */
+ 4299 "01010100" // /* MW 5 */
+ 4300 "00000000" // /* MW 4 */
+ 4301 "11110000" // /* MW 3 */
+ 4302 "00101100" // /* MW 2 */
+ 4303 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4305 "00000000" // /* MW 15 */
+ 4306 "00000000" // /* MW 14 */
+ 4307 "01111000" // /* MW 13 */
+ 4308 "11000101" // /* MW 12 */
+ 4309 "00000001" // /* MW 11 */
+ 4310 "00000000" // /* MW 10 */
+ 4311 "00000000" // /* MW 9 */
+ 4312 "00000000" // /* MW 8 */
+ 4313 "01011011" // /* MW 7 */
+ 4314 "00000001" // /* MW 6 */
+ 4315 "00101000" // /* MW 5 */
+ 4316 "01100000" // /* MW 4 */
+ 4317 "11111100" // /* MW 3 */
+ 4318 "00101100" // /* MW 2 */
+ 4319 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4321 "00000000" // /* MW 15 */
+ 4322 "00000000" // /* MW 14 */
+ 4323 "01111000" // /* MW 13 */
+ 4324 "11000101" // /* MW 12 */
+ 4325 "01000000" // /* MW 11 */
+ 4326 "00000000" // /* MW 10 */
+ 4327 "00000000" // /* MW 9 */
+ 4328 "00000000" // /* MW 8 */
+ 4329 "01011011" // /* MW 7 */
+ 4330 "00000001" // /* MW 6 */
+ 4331 "00100000" // /* MW 5 */
+ 4332 "00000000" // /* MW 4 */
+ 4333 "11110000" // /* MW 3 */
+ 4334 "00101100" // /* MW 2 */
+ 4335 "00000000" // /* MW 1 */
+.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4337 "00000000" // /* MW 15 */
+ 4338 "00000000" // /* MW 14 */
+ 4339 "01111000" // /* MW 13 */
+ 4340 "10100101" // /* MW 12 */
+ 4341 "00000001" // /* MW 11 */
+ 4342 "00000000" // /* MW 10 */
+ 4343 "00000000" // /* MW 9 */
+ 4344 "00000000" // /* MW 8 */
+ 4345 "01011011" // /* MW 7 */
+ 4346 "00000001" // /* MW 6 */
+ 4347 "00101000" // /* MW 5 */
+ 4348 "00101000" // /* MW 4 */
+ 4349 "11111000" // /* MW 3 */
+ 4350 "00101100" // /* MW 2 */
+ 4351 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4353 "00000000" // /* MW 15 */
+ 4354 "00000000" // /* MW 14 */
+ 4355 "01111000" // /* MW 13 */
+ 4356 "10100101" // /* MW 12 */
+ 4357 "00000001" // /* MW 11 */
+ 4358 "00000000" // /* MW 10 */
+ 4359 "00000000" // /* MW 9 */
+ 4360 "00000000" // /* MW 8 */
+ 4361 "00000011" // /* MW 7 */
+ 4362 "10000000" // /* MW 6 */
+ 4363 "10101101" // /* MW 5 */
+ 4364 "00000000" // /* MW 4 */
+ 4365 "11110000" // /* MW 3 */
+ 4366 "00101100" // /* MW 2 */
+ 4367 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4369 "00000000" // /* MW 15 */
+ 4370 "00000000" // /* MW 14 */
+ 4371 "01111000" // /* MW 13 */
+ 4372 "11000101" // /* MW 12 */
+ 4373 "00000001" // /* MW 11 */
+ 4374 "00000000" // /* MW 10 */
+ 4375 "00000000" // /* MW 9 */
+ 4376 "00000000" // /* MW 8 */
+ 4377 "00000011" // /* MW 7 */
+ 4378 "00000000" // /* MW 6 */
+ 4379 "00101001" // /* MW 5 */
+ 4380 "01100000" // /* MW 4 */
+ 4381 "11111100" // /* MW 3 */
+ 4382 "00101100" // /* MW 2 */
+ 4383 "00000000" // /* MW 1 */
+.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 3 "utils.h" 531 4 first
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4385 "00000000" // /* MW 15 */
+ 4386 "00000000" // /* MW 14 */
+ 4387 "01111000" // /* MW 13 */
+ 4388 "11000101" // /* MW 12 */
+ 4389 "01000000" // /* MW 11 */
+ 4390 "00000000" // /* MW 10 */
+ 4391 "00000000" // /* MW 9 */
+ 4392 "00000000" // /* MW 8 */
+ 4393 "00000011" // /* MW 7 */
+ 4394 "00000000" // /* MW 6 */
+ 4395 "00100011" // /* MW 5 */
+ 4396 "00000000" // /* MW 4 */
+ 4397 "11110000" // /* MW 3 */
+ 4398 "00101100" // /* MW 2 */
+ 4399 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4401 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4403 "00000011" // /* MW 3 */
+ 4404 "10000000" // /* MW 2 */
+ 4405 "00001101" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4407 "01110000" // /* MW 7 */
+ 4408 "11000101" // /* MW 6 */
+ 4409 "00000001" // /* MW 5 */
+ 4410 "00000000" // /* MW 4 */
+ 4411 "01100000" // /* MW 3 */
+ 4412 "00000000" // /* MW 2 */
+ 4413 "00100000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4415 "10001010" // /* MW 3 */
+ 4416 "10000001" // /* MW 2 */
+ 4417 "00011000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4419 "00000011" // /* MW 3 */
+ 4420 "00000000" // /* MW 2 */
+ 4421 "00001011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+ 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4423 "01110000" // /* MW 7 */
+ 4424 "11000101" // /* MW 6 */
+ 4425 "00000001" // /* MW 5 */
+ 4426 "00000000" // /* MW 4 */
+ 4427 "01100000" // /* MW 3 */
+ 4428 "00000000" // /* MW 2 */
+ 4429 "10110000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+ 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4431 "01110000" // /* MW 7 */
+ 4432 "11000101" // /* MW 6 */
+ 4433 "01000000" // /* MW 5 */
+ 4434 "00000000" // /* MW 4 */
+ 4435 "01100000" // /* MW 3 */
+ 4436 "00000000" // /* MW 2 */
+ 4437 "00100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4439 "00000011" // /* MW 3 */
+ 4440 "00000000" // /* MW 2 */
+ 4441 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.src_ref 2 "conv2d_bf16.h" 723 first
+ 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4443 "00000000" // /* MW 5 */
+ 4444 "01010000" // /* MW 4 */
+ 4445 "01100000" // /* MW 3 */
+ 4446 "00000000" // /* MW 2 */
+ 4447 "10110000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.delay_slot
+ 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4449 "00000011" // /* MW 3 */
+ 4450 "00000000" // /* MW 2 */
+ 4451 "00001001" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+.delay_slot
+ 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4453 "00000011" // /* MW 3 */
+ 4454 "00000000" // /* MW 2 */
+ 4455 "00001011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4457 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4459 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0
+ 4461 "00000000" // /* MW 1 */
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.function_start
+ 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4465 "01100000" // /* MW 13 */
+ 4466 "00010001" // /* MW 12 */
+ 4467 "10010001" // /* MW 11 */
+ 4468 "00001110" // /* MW 10 */
+ 4469 "00000000" // /* MW 9 */
+ 4470 "00000000" // /* MW 8 */
+ 4471 "10000000" // /* MW 7 */
+ 4472 "00000000" // /* MW 6 */
+ 4473 "00100000" // /* MW 5 */
+ 4474 "00111111" // /* MW 4 */
+ 4475 "10000110" // /* MW 3 */
+ 4476 "11100000" // /* MW 2 */
+ 4477 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 241 95
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4479 "01111000" // /* MW 11 */
+ 4480 "01100000" // /* MW 10 */
+ 4481 "00101011" // /* MW 9 */
+ 4482 "00001010" // /* MW 8 */
+ 4483 "11000101" // /* MW 7 */
+ 4484 "10111111" // /* MW 6 */
+ 4485 "10010101" // /* MW 5 */
+ 4486 "11110001" // /* MW 4 */
+ 4487 "00000111" // /* MW 3 */
+ 4488 "01110011" // /* MW 2 */
+ 4489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 94
+.src_ref 2 "conv2d_bf16_params.h" 242 100
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 245 28
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4491 "00001000" // /* MW 11 */
+ 4492 "01000111" // /* MW 10 */
+ 4493 "00110100" // /* MW 9 */
+ 4494 "00101001" // /* MW 8 */
+ 4495 "00010000" // /* MW 7 */
+ 4496 "10000001" // /* MW 6 */
+ 4497 "00110101" // /* MW 5 */
+ 4498 "11011010" // /* MW 4 */
+ 4499 "00000111" // /* MW 3 */
+ 4500 "00011001" // /* MW 2 */
+ 4501 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 240 68 first
+ 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4503 "00010000" // /* MW 11 */
+ 4504 "00000000" // /* MW 10 */
+ 4505 "10101000" // /* MW 9 */
+ 4506 "00000011" // /* MW 8 */
+ 4507 "01000000" // /* MW 7 */
+ 4508 "10000000" // /* MW 6 */
+ 4509 "00110101" // /* MW 5 */
+ 4510 "11110101" // /* MW 4 */
+ 4511 "11010111" // /* MW 3 */
+ 4512 "11001010" // /* MW 2 */
+ 4513 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.src_ref 2 "conv2d_bf16_params.h" 245 20
+ 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4515 "10010000" // /* MW 11 */
+ 4516 "11111111" // /* MW 10 */
+ 4517 "11101111" // /* MW 9 */
+ 4518 "11111111" // /* MW 8 */
+ 4519 "01111111" // /* MW 7 */
+ 4520 "10000000" // /* MW 6 */
+ 4521 "11010101" // /* MW 5 */
+ 4522 "11111101" // /* MW 4 */
+ 4523 "10000111" // /* MW 3 */
+ 4524 "00011000" // /* MW 2 */
+ 4525 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4527 "01011000" // /* MW 11 */
+ 4528 "11101100" // /* MW 10 */
+ 4529 "00000111" // /* MW 9 */
+ 4530 "00001010" // /* MW 8 */
+ 4531 "01100001" // /* MW 7 */
+ 4532 "10000001" // /* MW 6 */
+ 4533 "10110101" // /* MW 5 */
+ 4534 "11100001" // /* MW 4 */
+ 4535 "00000111" // /* MW 3 */
+ 4536 "10110100" // /* MW 2 */
+ 4537 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.src_ref 2 "conv2d_bf16_params.h" 250 71
+ 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4539 "01011000" // /* MW 11 */
+ 4540 "11000100" // /* MW 10 */
+ 4541 "10000111" // /* MW 9 */
+ 4542 "11001010" // /* MW 8 */
+ 4543 "01110111" // /* MW 7 */
+ 4544 "10000111" // /* MW 6 */
+ 4545 "11110101" // /* MW 5 */
+ 4546 "11101101" // /* MW 4 */
+ 4547 "00000111" // /* MW 3 */
+ 4548 "10010101" // /* MW 2 */
+ 4549 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44
+ 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4551 "01010000" // /* MW 7 */
+ 4552 "01000000" // /* MW 6 */
+ 4553 "10000000" // /* MW 5 */
+ 4554 "00000011" // /* MW 4 */
+ 4555 "10110000" // /* MW 3 */
+ 4556 "01110011" // /* MW 2 */
+ 4557 "11111111" // /* MW 1 */
+ 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4559 "00111101" // /* MW 3 */
+ 4560 "11100100" // /* MW 2 */
+ 4561 "00001111" // /* MW 1 */
+ 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4563 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+ 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4565 "00100000" // /* MW 3 */
+ 4566 "01011001" // /* MW 2 */
+ 4567 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+ 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4569 "10011011" // /* MW 5 */
+ 4570 "01110111" // /* MW 4 */
+ 4571 "00110110" // /* MW 3 */
+ 4572 "00110010" // /* MW 2 */
+ 4573 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54 first
+.src_ref 2 "conv2d_bf16_params.h" 242 94 first
+ 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4575 "00101111" // /* MW 5 */
+ 4576 "11110010" // /* MW 4 */
+ 4577 "01011110" // /* MW 3 */
+ 4578 "11111001" // /* MW 2 */
+ 4579 "01011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 20 first
+ 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4581 "00101010" // /* MW 3 */
+ 4582 "11001001" // /* MW 2 */
+ 4583 "00000010" // /* MW 1 */
+ 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4585 "00000000" // /* MW 1 */
+ 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4587 "00000000" // /* MW 1 */
+ 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4589 "00000000" // /* MW 1 */
+ 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4591 "00000000" // /* MW 1 */
+ 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4593 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 174 first
+ 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4595 "11101100" // /* MW 3 */
+ 4596 "01110111" // /* MW 2 */
+ 4597 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+ 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4599 "00110010" // /* MW 3 */
+ 4600 "01011101" // /* MW 2 */
+ 4601 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4603 "11001100" // /* MW 3 */
+ 4604 "11110110" // /* MW 2 */
+ 4605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4607 "11001111" // /* MW 5 */
+ 4608 "10110111" // /* MW 4 */
+ 4609 "11101110" // /* MW 3 */
+ 4610 "01110000" // /* MW 2 */
+ 4611 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 100 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4613 "00011101" // /* MW 3 */
+ 4614 "01111111" // /* MW 2 */
+ 4615 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4617 "11100010" // /* MW 3 */
+ 4618 "01011000" // /* MW 2 */
+ 4619 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 98
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4621 "11000101" // /* MW 3 */
+ 4622 "11111001" // /* MW 2 */
+ 4623 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 151
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4625 "01100010" // /* MW 5 */
+ 4626 "00111100" // /* MW 4 */
+ 4627 "10011110" // /* MW 3 */
+ 4628 "11111101" // /* MW 2 */
+ 4629 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4631 "11000010" // /* MW 3 */
+ 4632 "01111001" // /* MW 2 */
+ 4633 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+ 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4635 "11001100" // /* MW 3 */
+ 4636 "01111111" // /* MW 2 */
+ 4637 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 117 first
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+ 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4639 "11010001" // /* MW 5 */
+ 4640 "11110111" // /* MW 4 */
+ 4641 "00111110" // /* MW 3 */
+ 4642 "01111110" // /* MW 2 */
+ 4643 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44 first
+.src_ref 2 "conv2d_bf16_params.h" 245 28 first
+ 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4645 "00110001" // /* MW 5 */
+ 4646 "10110010" // /* MW 4 */
+ 4647 "01010100" // /* MW 3 */
+ 4648 "01111001" // /* MW 2 */
+ 4649 "01011101" // /* MW 1 */
+ 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4651 "00000000" // /* MW 1 */
+ 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4653 "00000000" // /* MW 1 */
+ 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4655 "00000000" // /* MW 1 */
+ 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4657 "00000000" // /* MW 1 */
+ 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4659 "00000000" // /* MW 1 */
+ 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4661 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+ 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4663 "11111100" // /* MW 5 */
+ 4664 "10111110" // /* MW 4 */
+ 4665 "00011111" // /* MW 3 */
+ 4666 "10101101" // /* MW 2 */
+ 4667 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4669 "00000001" // /* MW 5 */
+ 4670 "01000000" // /* MW 4 */
+ 4671 "01000000" // /* MW 3 */
+ 4672 "00001001" // /* MW 2 */
+ 4673 "01100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.delay_slot
+ 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4675 "01001000" // /* MW 3 */
+ 4676 "10010011" // /* MW 2 */
+ 4677 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4679 "10010000" // /* MW 3 */
+ 4680 "11111110" // /* MW 2 */
+ 4681 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4683 "01100100" // /* MW 3 */
+ 4684 "01101101" // /* MW 2 */
+ 4685 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4687 "01111100" // /* MW 3 */
+ 4688 "11101111" // /* MW 2 */
+ 4689 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 132
+.delay_slot
+ 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4691 "01100100" // /* MW 3 */
+ 4692 "11100001" // /* MW 2 */
+ 4693 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4695 "00000001" // /* MW 5 */
+ 4696 "01000000" // /* MW 4 */
+ 4697 "01000000" // /* MW 3 */
+ 4698 "00001001" // /* MW 2 */
+ 4699 "11101000" // /* MW 1 */
+.delay_slot
+ 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4701 "00011101" // /* MW 3 */
+ 4702 "11101011" // /* MW 2 */
+ 4703 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4705 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4707 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4709 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4711 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+ 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */
+ 4713 "00100000" // /* MW 9 */
+ 4714 "00000000" // /* MW 8 */
+ 4715 "00000000" // /* MW 7 */
+ 4716 "01010110" // /* MW 6 */
+ 4717 "00000010" // /* MW 5 */
+ 4718 "00000000" // /* MW 4 */
+ 4719 "00000000" // /* MW 3 */
+ 4720 "00111011" // /* MW 2 */
+ 4721 "00000000" // /* MW 1 */
+.delay_slot
+ 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4723 "10011100" // /* MW 3 */
+ 4724 "00011001" // /* MW 2 */
+ 4725 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1849 12
+.delay_slot
+ 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4727 "00000101" // /* MW 3 */
+ 4728 "00100110" // /* MW 2 */
+ 4729 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4731 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4733 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4735 "00000000" // /* MW 1 */
+.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 2 "conv2d_bf16_params.h" 250 71 first
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4737 "01111000" // /* MW 11 */
+ 4738 "11001110" // /* MW 10 */
+ 4739 "00001100" // /* MW 9 */
+ 4740 "00111100" // /* MW 8 */
+ 4741 "10111111" // /* MW 7 */
+ 4742 "10101011" // /* MW 6 */
+ 4743 "00011101" // /* MW 5 */
+ 4744 "11101011" // /* MW 4 */
+ 4745 "00000111" // /* MW 3 */
+ 4746 "10010101" // /* MW 2 */
+ 4747 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4749 "01011101" // /* MW 3 */
+ 4750 "10101011" // /* MW 2 */
+ 4751 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+ 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4753 "10010010" // /* MW 3 */
+ 4754 "01101011" // /* MW 2 */
+ 4755 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4757 "11100111" // /* MW 3 */
+ 4758 "11110111" // /* MW 2 */
+ 4759 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4761 "01000001" // /* MW 5 */
+ 4762 "10110000" // /* MW 4 */
+ 4763 "01001101" // /* MW 3 */
+ 4764 "11110010" // /* MW 2 */
+ 4765 "10101100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4767 "00110010" // /* MW 3 */
+ 4768 "01100111" // /* MW 2 */
+ 4769 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87 first
+ 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4771 "01000100" // /* MW 3 */
+ 4772 "00101001" // /* MW 2 */
+ 4773 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4775 "11110000" // /* MW 3 */
+ 4776 "00110110" // /* MW 2 */
+ 4777 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 152 first
+ 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4779 "10001011" // /* MW 5 */
+ 4780 "11001111" // /* MW 4 */
+ 4781 "11111001" // /* MW 3 */
+ 4782 "00101100" // /* MW 2 */
+ 4783 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320
+.src_ref 2 "conv2d_bf16_params.h" 258 8 first
+ 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */
+ 4785 "01100000" // /* MW 11 */
+ 4786 "00000000" // /* MW 10 */
+ 4787 "00010000" // /* MW 9 */
+ 4788 "01011100" // /* MW 8 */
+ 4789 "00000010" // /* MW 7 */
+ 4790 "10111010" // /* MW 6 */
+ 4791 "01110001" // /* MW 5 */
+ 4792 "01101111" // /* MW 4 */
+ 4793 "10000010" // /* MW 3 */
+ 4794 "10010000" // /* MW 2 */
+ 4795 "00000001" // /* MW 1 */
+.delay_slot
+ 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4797 "01100111" // /* MW 3 */
+ 4798 "10001010" // /* MW 2 */
+ 4799 "00000010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4801 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4803 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4805 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4807 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+ 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4809 "11111110" // /* MW 5 */
+ 4810 "00111111" // /* MW 4 */
+ 4811 "11111010" // /* MW 3 */
+ 4812 "11111111" // /* MW 2 */
+ 4813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71 first
+ 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4815 "01000100" // /* MW 3 */
+ 4816 "10100101" // /* MW 2 */
+ 4817 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4819 "00011100" // /* MW 13 */
+ 4820 "00000000" // /* MW 12 */
+ 4821 "00000000" // /* MW 11 */
+ 4822 "01010111" // /* MW 10 */
+ 4823 "00011010" // /* MW 9 */
+ 4824 "01000000" // /* MW 8 */
+ 4825 "00000000" // /* MW 7 */
+ 4826 "00000000" // /* MW 6 */
+ 4827 "10100011" // /* MW 5 */
+ 4828 "11101100" // /* MW 4 */
+ 4829 "11110110" // /* MW 3 */
+ 4830 "00101100" // /* MW 2 */
+ 4831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368
+.src_ref 2 "conv2d_bf16.h" 1841 65 first
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16.h" 1849 12 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4833 "01011000" // /* MW 9 */
+ 4834 "11111101" // /* MW 8 */
+ 4835 "11001111" // /* MW 7 */
+ 4836 "10000010" // /* MW 6 */
+ 4837 "01000100" // /* MW 5 */
+ 4838 "00100111" // /* MW 4 */
+ 4839 "11010000" // /* MW 3 */
+ 4840 "11010010" // /* MW 2 */
+ 4841 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1841 34
+.src_ref 2 "conv2d_bf16.h" 1842 36
+.src_ref 2 "conv2d_bf16.h" 1842 67
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4843 "01011000" // /* MW 9 */
+ 4844 "00100100" // /* MW 8 */
+ 4845 "00000000" // /* MW 7 */
+ 4846 "11111010" // /* MW 6 */
+ 4847 "01011111" // /* MW 5 */
+ 4848 "00101001" // /* MW 4 */
+ 4849 "00000000" // /* MW 3 */
+ 4850 "01010010" // /* MW 2 */
+ 4851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 67 first
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4853 "01011000" // /* MW 11 */
+ 4854 "11001100" // /* MW 10 */
+ 4855 "00000111" // /* MW 9 */
+ 4856 "00100110" // /* MW 8 */
+ 4857 "01101011" // /* MW 7 */
+ 4858 "10101011" // /* MW 6 */
+ 4859 "00101101" // /* MW 5 */
+ 4860 "11010000" // /* MW 4 */
+ 4861 "11010111" // /* MW 3 */
+ 4862 "01011010" // /* MW 2 */
+ 4863 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1845 80
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4865 "01011000" // /* MW 11 */
+ 4866 "11000100" // /* MW 10 */
+ 4867 "00000000" // /* MW 9 */
+ 4868 "11101010" // /* MW 8 */
+ 4869 "00110111" // /* MW 7 */
+ 4870 "10111111" // /* MW 6 */
+ 4871 "11010101" // /* MW 5 */
+ 4872 "11011110" // /* MW 4 */
+ 4873 "11010111" // /* MW 3 */
+ 4874 "01011110" // /* MW 2 */
+ 4875 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 63 first
+ 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4877 "10110110" // /* MW 3 */
+ 4878 "11111111" // /* MW 2 */
+ 4879 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52 first
+ 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4881 "11110110" // /* MW 3 */
+ 4882 "10001011" // /* MW 2 */
+ 4883 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4885 "10110110" // /* MW 3 */
+ 4886 "00000110" // /* MW 2 */
+ 4887 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+ 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4889 "01011011" // /* MW 5 */
+ 4890 "00100110" // /* MW 4 */
+ 4891 "11011010" // /* MW 3 */
+ 4892 "11010010" // /* MW 2 */
+ 4893 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4895 "11010110" // /* MW 3 */
+ 4896 "00000111" // /* MW 2 */
+ 4897 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+ 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4899 "00101101" // /* MW 3 */
+ 4900 "10101101" // /* MW 2 */
+ 4901 "00010101" // /* MW 1 */
+ 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4903 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 80 first
+ 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4905 "00111110" // /* MW 3 */
+ 4906 "01100111" // /* MW 2 */
+ 4907 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21 first
+ 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4909 "00011000" // /* MW 3 */
+ 4910 "11100011" // /* MW 2 */
+ 4911 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 12
+ 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */
+ 4913 "00000001" // /* MW 5 */
+ 4914 "01000000" // /* MW 4 */
+ 4915 "11010000" // /* MW 3 */
+ 4916 "00001001" // /* MW 2 */
+ 4917 "10001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+.src_ref 2 "conv2d_bf16.h" 1842 75 first
+.delay_slot
+ 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4919 "10110010" // /* MW 5 */
+ 4920 "10110101" // /* MW 4 */
+ 4921 "10111010" // /* MW 3 */
+ 4922 "10100101" // /* MW 2 */
+ 4923 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4925 "10110010" // /* MW 5 */
+ 4926 "10010101" // /* MW 4 */
+ 4927 "10110000" // /* MW 3 */
+ 4928 "01100101" // /* MW 2 */
+ 4929 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+.delay_slot
+ 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4931 "10100000" // /* MW 7 */
+ 4932 "01101000" // /* MW 6 */
+ 4933 "11001010" // /* MW 5 */
+ 4934 "00000001" // /* MW 4 */
+ 4935 "10110000" // /* MW 3 */
+ 4936 "10000100" // /* MW 2 */
+ 4937 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4939 "10000000" // /* MW 3 */
+ 4940 "11010000" // /* MW 2 */
+ 4941 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4943 "11111001" // /* MW 3 */
+ 4944 "01101010" // /* MW 2 */
+ 4945 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4947 "11010000" // /* MW 5 */
+ 4948 "11001000" // /* MW 4 */
+ 4949 "11001110" // /* MW 3 */
+ 4950 "00000111" // /* MW 2 */
+ 4951 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18 first
+ 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4953 "10000000" // /* MW 5 */
+ 4954 "10110100" // /* MW 4 */
+ 4955 "01010000" // /* MW 3 */
+ 4956 "11000100" // /* MW 2 */
+ 4957 "11100000" // /* MW 1 */
+ 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4959 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4961 "00000000" // /* MW 5 */
+ 4962 "00100000" // /* MW 4 */
+ 4963 "00001010" // /* MW 3 */
+ 4964 "01111111" // /* MW 2 */
+ 4965 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4967 "10010001" // /* MW 3 */
+ 4968 "00000010" // /* MW 2 */
+ 4969 "00011000" // /* MW 1 */
+ 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4971 "11100000" // /* MW 3 */
+ 4972 "00010101" // /* MW 2 */
+ 4973 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4975 "01011111" // /* MW 3 */
+ 4976 "01101010" // /* MW 2 */
+ 4977 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4979 "00100101" // /* MW 5 */
+ 4980 "00000001" // /* MW 4 */
+ 4981 "11100000" // /* MW 3 */
+ 4982 "11000110" // /* MW 2 */
+ 4983 "11100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4985 "10000000" // /* MW 3 */
+ 4986 "01111010" // /* MW 2 */
+ 4987 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4989 "00010110" // /* MW 3 */
+ 4990 "01000000" // /* MW 2 */
+ 4991 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4993 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4995 "00000001" // /* MW 3 */
+ 4996 "01000001" // /* MW 2 */
+ 4997 "00011100" // /* MW 1 */
+ 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4999 "00000000" // /* MW 1 */
+ 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5001 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5003 "00110010" // /* MW 3 */
+ 5004 "00000110" // /* MW 2 */
+ 5005 "00000111" // /* MW 1 */
+ 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5007 "00000000" // /* MW 1 */
+ 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5009 "00000000" // /* MW 1 */
+ 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5011 "00000000" // /* MW 1 */
+ 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5013 "00000000" // /* MW 1 */
+ 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5015 "00000000" // /* MW 1 */
+ 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5017 "00000000" // /* MW 1 */
+ 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5019 "01101011" // /* MW 5 */
+ 5020 "10100100" // /* MW 4 */
+ 5021 "11111111" // /* MW 3 */
+ 5022 "00101100" // /* MW 2 */
+ 5023 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560
+.src_ref 2 "conv2d_bf16.h" 881 76
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5025 "00010000" // /* MW 11 */
+ 5026 "00110100" // /* MW 10 */
+ 5027 "10110010" // /* MW 9 */
+ 5028 "11110001" // /* MW 8 */
+ 5029 "00000001" // /* MW 7 */
+ 5030 "00000000" // /* MW 6 */
+ 5031 "00001011" // /* MW 5 */
+ 5032 "10001110" // /* MW 4 */
+ 5033 "10000001" // /* MW 3 */
+ 5034 "10010000" // /* MW 2 */
+ 5035 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.src_ref 2 "conv2d_bf16.h" 876 51 first
+.src_ref 2 "conv2d_bf16.h" 881 76 first
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5037 "01011000" // /* MW 11 */
+ 5038 "00001011" // /* MW 10 */
+ 5039 "01101000" // /* MW 9 */
+ 5040 "10010010" // /* MW 8 */
+ 5041 "00011001" // /* MW 7 */
+ 5042 "00110011" // /* MW 6 */
+ 5043 "10001011" // /* MW 5 */
+ 5044 "10000100" // /* MW 4 */
+ 5045 "01010000" // /* MW 3 */
+ 5046 "01000101" // /* MW 2 */
+ 5047 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5049 "01111000" // /* MW 9 */
+ 5050 "01100000" // /* MW 8 */
+ 5051 "10101010" // /* MW 7 */
+ 5052 "01100101" // /* MW 6 */
+ 5053 "10111001" // /* MW 5 */
+ 5054 "00111001" // /* MW 4 */
+ 5055 "00000000" // /* MW 3 */
+ 5056 "10010110" // /* MW 2 */
+ 5057 "01100001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 883 4 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5059 "01100111" // /* MW 3 */
+ 5060 "00000110" // /* MW 2 */
+ 5061 "00000011" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5063 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 884 4 first
+.aggressive_scheduled_block_id 4
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5065 "00000001" // /* MW 5 */
+ 5066 "00000000" // /* MW 4 */
+ 5067 "00110000" // /* MW 3 */
+ 5068 "00001000" // /* MW 2 */
+ 5069 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5071 "00101101" // /* MW 3 */
+ 5072 "01101011" // /* MW 2 */
+ 5073 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.delay_slot
+ 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5075 "11111001" // /* MW 3 */
+ 5076 "01101010" // /* MW 2 */
+ 5077 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45 first
+.delay_slot
+ 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5079 "00010001" // /* MW 3 */
+ 5080 "01100011" // /* MW 2 */
+ 5081 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.delay_slot
+ 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5083 "00110101" // /* MW 5 */
+ 5084 "00101100" // /* MW 4 */
+ 5085 "10111010" // /* MW 3 */
+ 5086 "01100101" // /* MW 2 */
+ 5087 "10001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.delay_slot
+ 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5089 "00000000" // /* MW 15 */
+ 5090 "00000000" // /* MW 14 */
+ 5091 "10101000" // /* MW 13 */
+ 5092 "11100010" // /* MW 12 */
+ 5093 "10001011" // /* MW 11 */
+ 5094 "00010001" // /* MW 10 */
+ 5095 "10011010" // /* MW 9 */
+ 5096 "00101100" // /* MW 8 */
+ 5097 "01011011" // /* MW 7 */
+ 5098 "00000001" // /* MW 6 */
+ 5099 "00100000" // /* MW 5 */
+ 5100 "00000000" // /* MW 4 */
+ 5101 "11110000" // /* MW 3 */
+ 5102 "00101100" // /* MW 2 */
+ 5103 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.return_address
+ 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5105 "10011001" // /* MW 3 */
+ 5106 "11010100" // /* MW 2 */
+ 5107 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4 first
+.no_stack_arguments
+ 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5109 "00000001" // /* MW 5 */
+ 5110 "00000000" // /* MW 4 */
+ 5111 "00110000" // /* MW 3 */
+ 5112 "00001000" // /* MW 2 */
+ 5113 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5115 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5117 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.delay_slot
+ 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5119 "10010000" // /* MW 3 */
+ 5120 "01010110" // /* MW 2 */
+ 5121 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5123 "10100000" // /* MW 3 */
+ 5124 "01100110" // /* MW 2 */
+ 5125 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5127 "00000000" // /* MW 9 */
+ 5128 "00000000" // /* MW 8 */
+ 5129 "00000000" // /* MW 7 */
+ 5130 "00000000" // /* MW 6 */
+ 5131 "00001011" // /* MW 5 */
+ 5132 "10001111" // /* MW 4 */
+ 5133 "11110000" // /* MW 3 */
+ 5134 "00101100" // /* MW 2 */
+ 5135 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.return_address
+ 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5137 "00001000" // /* MW 9 */
+ 5138 "01100011" // /* MW 8 */
+ 5139 "00110011" // /* MW 7 */
+ 5140 "11101010" // /* MW 6 */
+ 5141 "00110111" // /* MW 5 */
+ 5142 "00000001" // /* MW 4 */
+ 5143 "10000000" // /* MW 3 */
+ 5144 "10011010" // /* MW 2 */
+ 5145 "11010110" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 886 4
+.src_ref 2 "conv2d_bf16.h" 896 23 first
+.src_ref 2 "conv2d_bf16.h" 1123 71
+ 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5147 "01100010" // /* MW 5 */
+ 5148 "00110100" // /* MW 4 */
+ 5149 "11010000" // /* MW 3 */
+ 5150 "10000100" // /* MW 2 */
+ 5151 "10000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5153 "01000110" // /* MW 3 */
+ 5154 "00011100" // /* MW 2 */
+ 5155 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5157 "00100110" // /* MW 3 */
+ 5158 "00011110" // /* MW 2 */
+ 5159 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5161 "01000110" // /* MW 3 */
+ 5162 "00011110" // /* MW 2 */
+ 5163 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5165 "00000110" // /* MW 3 */
+ 5166 "00011100" // /* MW 2 */
+ 5167 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5169 "01100110" // /* MW 3 */
+ 5170 "00011100" // /* MW 2 */
+ 5171 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5173 "01100110" // /* MW 3 */
+ 5174 "00011110" // /* MW 2 */
+ 5175 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23 first
+ 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5177 "11010110" // /* MW 3 */
+ 5178 "00011110" // /* MW 2 */
+ 5179 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5181 "00110110" // /* MW 3 */
+ 5182 "00011110" // /* MW 2 */
+ 5183 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5185 "10010110" // /* MW 3 */
+ 5186 "00011111" // /* MW 2 */
+ 5187 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5189 "10110110" // /* MW 3 */
+ 5190 "00011110" // /* MW 2 */
+ 5191 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5193 "11110110" // /* MW 3 */
+ 5194 "00011110" // /* MW 2 */
+ 5195 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5197 "10011110" // /* MW 3 */
+ 5198 "00011101" // /* MW 2 */
+ 5199 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5201 "00100110" // /* MW 3 */
+ 5202 "00011101" // /* MW 2 */
+ 5203 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5205 "10100110" // /* MW 3 */
+ 5206 "00011100" // /* MW 2 */
+ 5207 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5209 "11000110" // /* MW 3 */
+ 5210 "00011100" // /* MW 2 */
+ 5211 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5213 "10100110" // /* MW 3 */
+ 5214 "00011110" // /* MW 2 */
+ 5215 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5217 "11010110" // /* MW 3 */
+ 5218 "00011111" // /* MW 2 */
+ 5219 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5221 "10110110" // /* MW 3 */
+ 5222 "00011111" // /* MW 2 */
+ 5223 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5225 "11100110" // /* MW 3 */
+ 5226 "00011100" // /* MW 2 */
+ 5227 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5229 "01001010" // /* MW 3 */
+ 5230 "11000010" // /* MW 2 */
+ 5231 "00000100" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25
+ 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5233 "10010001" // /* MW 3 */
+ 5234 "11010010" // /* MW 2 */
+ 5235 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5237 "01010110" // /* MW 3 */
+ 5238 "00000100" // /* MW 2 */
+ 5239 "00000100" // /* MW 1 */
+ 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5241 "00000000" // /* MW 1 */
+ 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5243 "00000000" // /* MW 1 */
+ 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5245 "00000000" // /* MW 1 */
+ 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5247 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5249 "00101100" // /* MW 3 */
+ 5250 "11100111" // /* MW 2 */
+ 5251 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 12
+ 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */
+ 5253 "00000001" // /* MW 5 */
+ 5254 "01000000" // /* MW 4 */
+ 5255 "00010000" // /* MW 3 */
+ 5256 "00001100" // /* MW 2 */
+ 5257 "10011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4
+.delay_slot
+ 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5259 "11010000" // /* MW 5 */
+ 5260 "11001000" // /* MW 4 */
+ 5261 "11000100" // /* MW 3 */
+ 5262 "00000111" // /* MW 2 */
+ 5263 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4 first
+.delay_slot
+ 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5265 "10100111" // /* MW 3 */
+ 5266 "00000101" // /* MW 2 */
+ 5267 "00000010" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5269 "01110010" // /* MW 3 */
+ 5270 "11010001" // /* MW 2 */
+ 5271 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5273 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5275 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 71 first
+ 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5277 "01011000" // /* MW 9 */
+ 5278 "10000100" // /* MW 8 */
+ 5279 "10000000" // /* MW 7 */
+ 5280 "00111111" // /* MW 6 */
+ 5281 "10111001" // /* MW 5 */
+ 5282 "00011011" // /* MW 4 */
+ 5283 "00100000" // /* MW 3 */
+ 5284 "01000011" // /* MW 2 */
+ 5285 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+.src_ref 2 "conv2d_bf16.h" 1154 80
+ 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5287 "01011000" // /* MW 9 */
+ 5288 "00111100" // /* MW 8 */
+ 5289 "00000000" // /* MW 7 */
+ 5290 "00001010" // /* MW 6 */
+ 5291 "00100000" // /* MW 5 */
+ 5292 "00111101" // /* MW 4 */
+ 5293 "00000000" // /* MW 3 */
+ 5294 "00010011" // /* MW 2 */
+ 5295 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5297 "01111000" // /* MW 9 */
+ 5298 "11010000" // /* MW 8 */
+ 5299 "11100100" // /* MW 7 */
+ 5300 "00001011" // /* MW 6 */
+ 5301 "10100000" // /* MW 5 */
+ 5302 "00000001" // /* MW 4 */
+ 5303 "10000000" // /* MW 3 */
+ 5304 "00010100" // /* MW 2 */
+ 5305 "11111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 746 83
+ 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5307 "01111000" // /* MW 11 */
+ 5308 "11000000" // /* MW 10 */
+ 5309 "10100111" // /* MW 9 */
+ 5310 "00000001" // /* MW 8 */
+ 5311 "11010100" // /* MW 7 */
+ 5312 "00011011" // /* MW 6 */
+ 5313 "01001011" // /* MW 5 */
+ 5314 "00011100" // /* MW 4 */
+ 5315 "10000010" // /* MW 3 */
+ 5316 "10011000" // /* MW 2 */
+ 5317 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.src_ref 2 "conv2d_bf16.h" 1199 26
+.src_ref 2 "conv2d_bf16.h" 1200 26
+.src_ref 2 "conv2d_bf16.h" 1201 26
+.src_ref 2 "conv2d_bf16.h" 1202 26
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5319 "01011000" // /* MW 11 */
+ 5320 "00000111" // /* MW 10 */
+ 5321 "11101000" // /* MW 9 */
+ 5322 "10001001" // /* MW 8 */
+ 5323 "11110111" // /* MW 7 */
+ 5324 "00000001" // /* MW 6 */
+ 5325 "01001011" // /* MW 5 */
+ 5326 "00011100" // /* MW 4 */
+ 5327 "00100110" // /* MW 3 */
+ 5328 "10010110" // /* MW 2 */
+ 5329 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 12
+.src_ref 2 "conv2d_bf16.h" 1218 20
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5331 "00010000" // /* MW 9 */
+ 5332 "10100000" // /* MW 8 */
+ 5333 "00110010" // /* MW 7 */
+ 5334 "00000101" // /* MW 6 */
+ 5335 "00000000" // /* MW 5 */
+ 5336 "00000000" // /* MW 4 */
+ 5337 "00100000" // /* MW 3 */
+ 5338 "11001010" // /* MW 2 */
+ 5339 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 749 26
+.src_ref 2 "conv2d_bf16.h" 750 26
+.src_ref 2 "conv2d_bf16.h" 751 26
+.src_ref 2 "conv2d_bf16.h" 752 26
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5341 "01011000" // /* MW 9 */
+ 5342 "00001100" // /* MW 8 */
+ 5343 "10001011" // /* MW 7 */
+ 5344 "00010010" // /* MW 6 */
+ 5345 "01101001" // /* MW 5 */
+ 5346 "00110100" // /* MW 4 */
+ 5347 "00100000" // /* MW 3 */
+ 5348 "00110110" // /* MW 2 */
+ 5349 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1873
+ 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5351 "01011000" // /* MW 11 */
+ 5352 "00000000" // /* MW 10 */
+ 5353 "00001000" // /* MW 9 */
+ 5354 "00001011" // /* MW 8 */
+ 5355 "10010000" // /* MW 7 */
+ 5356 "00000001" // /* MW 6 */
+ 5357 "00100000" // /* MW 5 */
+ 5358 "11010111" // /* MW 4 */
+ 5359 "00101001" // /* MW 3 */
+ 5360 "10000111" // /* MW 2 */
+ 5361 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5363 "00010110" // /* MW 3 */
+ 5364 "10001000" // /* MW 2 */
+ 5365 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5367 "00100110" // /* MW 3 */
+ 5368 "10101011" // /* MW 2 */
+ 5369 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5371 "01110110" // /* MW 3 */
+ 5372 "00101111" // /* MW 2 */
+ 5373 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 80 first
+ 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5375 "10000110" // /* MW 3 */
+ 5376 "00011110" // /* MW 2 */
+ 5377 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 80 first
+ 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5379 "11000110" // /* MW 3 */
+ 5380 "10001010" // /* MW 2 */
+ 5381 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 87 first
+ 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5383 "00000110" // /* MW 3 */
+ 5384 "10011110" // /* MW 2 */
+ 5385 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 83 first
+ 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5387 "00110110" // /* MW 3 */
+ 5388 "00011100" // /* MW 2 */
+ 5389 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 83 first
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5391 "00000010" // /* MW 5 */
+ 5392 "00000110" // /* MW 4 */
+ 5393 "11011101" // /* MW 3 */
+ 5394 "00000010" // /* MW 2 */
+ 5395 "10011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 66 first
+ 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5397 "01110110" // /* MW 3 */
+ 5398 "00010100" // /* MW 2 */
+ 5399 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1206 63 first
+ 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5401 "10010110" // /* MW 3 */
+ 5402 "00000100" // /* MW 2 */
+ 5403 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89
+ 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5405 "00000000" // /* MW 3 */
+ 5406 "11011010" // /* MW 2 */
+ 5407 "00011001" // /* MW 1 */
+ 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5409 "10011001" // /* MW 3 */
+ 5410 "10000011" // /* MW 2 */
+ 5411 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89
+ 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5413 "00000000" // /* MW 3 */
+ 5414 "00011011" // /* MW 2 */
+ 5415 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5417 "10011001" // /* MW 3 */
+ 5418 "00001101" // /* MW 2 */
+ 5419 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89 first
+ 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5421 "11100000" // /* MW 3 */
+ 5422 "00000011" // /* MW 2 */
+ 5423 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89 first
+ 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5425 "11000000" // /* MW 5 */
+ 5426 "00010000" // /* MW 4 */
+ 5427 "11101110" // /* MW 3 */
+ 5428 "11111111" // /* MW 2 */
+ 5429 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5431 "01111110" // /* MW 9 */
+ 5432 "10000000" // /* MW 8 */
+ 5433 "10000010" // /* MW 7 */
+ 5434 "00000000" // /* MW 6 */
+ 5435 "00010000" // /* MW 5 */
+ 5436 "00000000" // /* MW 4 */
+ 5437 "11110000" // /* MW 3 */
+ 5438 "00101100" // /* MW 2 */
+ 5439 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1147 31 first
+.src_ref 2 "conv2d_bf16.h" 1187 40 first
+.loop_nesting 1
+ 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5441 "01111000" // /* MW 11 */
+ 5442 "10010000" // /* MW 10 */
+ 5443 "00110011" // /* MW 9 */
+ 5444 "11101100" // /* MW 8 */
+ 5445 "11100111" // /* MW 7 */
+ 5446 "00000100" // /* MW 6 */
+ 5447 "00001011" // /* MW 5 */
+ 5448 "10000101" // /* MW 4 */
+ 5449 "01110001" // /* MW 3 */
+ 5450 "10000101" // /* MW 2 */
+ 5451 "11000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1188 50 first
+ 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5453 "10100000" // /* MW 11 */
+ 5454 "10011000" // /* MW 10 */
+ 5455 "00110011" // /* MW 9 */
+ 5456 "00000010" // /* MW 8 */
+ 5457 "01001011" // /* MW 7 */
+ 5458 "00001110" // /* MW 6 */
+ 5459 "00101011" // /* MW 5 */
+ 5460 "00101000" // /* MW 4 */
+ 5461 "01111000" // /* MW 3 */
+ 5462 "10000001" // /* MW 2 */
+ 5463 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+ 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5465 "01110000" // /* MW 11 */
+ 5466 "10000000" // /* MW 10 */
+ 5467 "11000110" // /* MW 9 */
+ 5468 "00000011" // /* MW 8 */
+ 5469 "01001011" // /* MW 7 */
+ 5470 "01011010" // /* MW 6 */
+ 5471 "00101111" // /* MW 5 */
+ 5472 "00101000" // /* MW 4 */
+ 5473 "01111000" // /* MW 3 */
+ 5474 "00111001" // /* MW 2 */
+ 5475 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1149 31 first
+ 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5477 "01110000" // /* MW 11 */
+ 5478 "00000000" // /* MW 10 */
+ 5479 "10000010" // /* MW 9 */
+ 5480 "00000001" // /* MW 8 */
+ 5481 "00001011" // /* MW 7 */
+ 5482 "01010011" // /* MW 6 */
+ 5483 "00101011" // /* MW 5 */
+ 5484 "00000011" // /* MW 4 */
+ 5485 "01110100" // /* MW 3 */
+ 5486 "00001101" // /* MW 2 */
+ 5487 "11011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+ 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5489 "01011110" // /* MW 9 */
+ 5490 "00000000" // /* MW 8 */
+ 5491 "11000000" // /* MW 7 */
+ 5492 "00000001" // /* MW 6 */
+ 5493 "11010100" // /* MW 5 */
+ 5494 "00010010" // /* MW 4 */
+ 5495 "01110100" // /* MW 3 */
+ 5496 "01000001" // /* MW 2 */
+ 5497 "01110001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1152 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+ 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5499 "00010000" // /* MW 11 */
+ 5500 "01000000" // /* MW 10 */
+ 5501 "10111011" // /* MW 9 */
+ 5502 "00000101" // /* MW 8 */
+ 5503 "00000000" // /* MW 7 */
+ 5504 "00000000" // /* MW 6 */
+ 5505 "00101000" // /* MW 5 */
+ 5506 "00101000" // /* MW 4 */
+ 5507 "01111000" // /* MW 3 */
+ 5508 "10010101" // /* MW 2 */
+ 5509 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 1154 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8
+ 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5511 "00010000" // /* MW 11 */
+ 5512 "00101000" // /* MW 10 */
+ 5513 "01111011" // /* MW 9 */
+ 5514 "00000100" // /* MW 8 */
+ 5515 "00000000" // /* MW 7 */
+ 5516 "00000000" // /* MW 6 */
+ 5517 "00101000" // /* MW 5 */
+ 5518 "00101000" // /* MW 4 */
+ 5519 "01111000" // /* MW 3 */
+ 5520 "00011101" // /* MW 2 */
+ 5521 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+ 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5523 "00101000" // /* MW 5 */
+ 5524 "00000001" // /* MW 4 */
+ 5525 "01110100" // /* MW 3 */
+ 5526 "10110101" // /* MW 2 */
+ 5527 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1157 31 first
+ 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5529 "00101000" // /* MW 5 */
+ 5530 "00100010" // /* MW 4 */
+ 5531 "01111000" // /* MW 3 */
+ 5532 "10100101" // /* MW 2 */
+ 5533 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1159 31 first
+ 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5535 "00101000" // /* MW 5 */
+ 5536 "00101000" // /* MW 4 */
+ 5537 "01111000" // /* MW 3 */
+ 5538 "00101101" // /* MW 2 */
+ 5539 "11011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5541 "00101000" // /* MW 5 */
+ 5542 "00101000" // /* MW 4 */
+ 5543 "01111000" // /* MW 3 */
+ 5544 "10000001" // /* MW 2 */
+ 5545 "00100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1192 29 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5547 "00101000" // /* MW 5 */
+ 5548 "00000001" // /* MW 4 */
+ 5549 "01110100" // /* MW 3 */
+ 5550 "10111101" // /* MW 2 */
+ 5551 "10000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5553 "11101110" // /* MW 9 */
+ 5554 "11000011" // /* MW 8 */
+ 5555 "10011010" // /* MW 7 */
+ 5556 "00000010" // /* MW 6 */
+ 5557 "00010100" // /* MW 5 */
+ 5558 "00010001" // /* MW 4 */
+ 5559 "01110100" // /* MW 3 */
+ 5560 "11001101" // /* MW 2 */
+ 5561 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1162 81
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5563 "11100000" // /* MW 11 */
+ 5564 "11000001" // /* MW 10 */
+ 5565 "10011010" // /* MW 9 */
+ 5566 "00000001" // /* MW 8 */
+ 5567 "10001011" // /* MW 7 */
+ 5568 "10011000" // /* MW 6 */
+ 5569 "00101100" // /* MW 5 */
+ 5570 "00101000" // /* MW 4 */
+ 5571 "01111000" // /* MW 3 */
+ 5572 "11000101" // /* MW 2 */
+ 5573 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5575 "11101001" // /* MW 9 */
+ 5576 "00010100" // /* MW 8 */
+ 5577 "01001000" // /* MW 7 */
+ 5578 "00011101" // /* MW 6 */
+ 5579 "01010100" // /* MW 5 */
+ 5580 "00000000" // /* MW 4 */
+ 5581 "01110011" // /* MW 3 */
+ 5582 "10000001" // /* MW 2 */
+ 5583 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5585 "11101001" // /* MW 13 */
+ 5586 "00101100" // /* MW 12 */
+ 5587 "01001001" // /* MW 11 */
+ 5588 "00000111" // /* MW 10 */
+ 5589 "01011000" // /* MW 9 */
+ 5590 "01011100" // /* MW 8 */
+ 5591 "00000000" // /* MW 7 */
+ 5592 "00000000" // /* MW 6 */
+ 5593 "10010110" // /* MW 5 */
+ 5594 "10010100" // /* MW 4 */
+ 5595 "01110110" // /* MW 3 */
+ 5596 "00110101" // /* MW 2 */
+ 5597 "11001111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1162 81 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5599 "00001001" // /* MW 13 */
+ 5600 "01010101" // /* MW 12 */
+ 5601 "01001010" // /* MW 11 */
+ 5602 "00111110" // /* MW 10 */
+ 5603 "10010000" // /* MW 9 */
+ 5604 "01001100" // /* MW 8 */
+ 5605 "00000000" // /* MW 7 */
+ 5606 "00000000" // /* MW 6 */
+ 5607 "10010110" // /* MW 5 */
+ 5608 "00111000" // /* MW 4 */
+ 5609 "01111010" // /* MW 3 */
+ 5610 "10111101" // /* MW 2 */
+ 5611 "10000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1199 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5613 "00111101" // /* MW 13 */
+ 5614 "01100000" // /* MW 12 */
+ 5615 "11111000" // /* MW 11 */
+ 5616 "00011110" // /* MW 10 */
+ 5617 "10010000" // /* MW 9 */
+ 5618 "01010100" // /* MW 8 */
+ 5619 "00000000" // /* MW 7 */
+ 5620 "00000000" // /* MW 6 */
+ 5621 "10010110" // /* MW 5 */
+ 5622 "10011000" // /* MW 4 */
+ 5623 "01110100" // /* MW 3 */
+ 5624 "00000001" // /* MW 2 */
+ 5625 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1200 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5627 "00111101" // /* MW 7 */
+ 5628 "01100100" // /* MW 6 */
+ 5629 "11111001" // /* MW 5 */
+ 5630 "00000100" // /* MW 4 */
+ 5631 "01110000" // /* MW 3 */
+ 5632 "10000001" // /* MW 2 */
+ 5633 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1201 26 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5635 "00111101" // /* MW 7 */
+ 5636 "10001000" // /* MW 6 */
+ 5637 "11111010" // /* MW 5 */
+ 5638 "00000100" // /* MW 4 */
+ 5639 "01110000" // /* MW 3 */
+ 5640 "00001001" // /* MW 2 */
+ 5641 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5643 "00001001" // /* MW 7 */
+ 5644 "01101101" // /* MW 6 */
+ 5645 "01001011" // /* MW 5 */
+ 5646 "00000100" // /* MW 4 */
+ 5647 "01110000" // /* MW 3 */
+ 5648 "00000001" // /* MW 2 */
+ 5649 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5651 "00101000" // /* MW 5 */
+ 5652 "00000001" // /* MW 4 */
+ 5653 "01110100" // /* MW 3 */
+ 5654 "10000001" // /* MW 2 */
+ 5655 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5657 "00010100" // /* MW 3 */
+ 5658 "00010001" // /* MW 2 */
+ 5659 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1202 26 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5661 "00111101" // /* MW 11 */
+ 5662 "10001100" // /* MW 10 */
+ 5663 "11111011" // /* MW 9 */
+ 5664 "10000010" // /* MW 8 */
+ 5665 "01111101" // /* MW 7 */
+ 5666 "01110010" // /* MW 6 */
+ 5667 "00101101" // /* MW 5 */
+ 5668 "00101000" // /* MW 4 */
+ 5669 "01111000" // /* MW 3 */
+ 5670 "00001001" // /* MW 2 */
+ 5671 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5673 "00101001" // /* MW 9 */
+ 5674 "00000110" // /* MW 8 */
+ 5675 "10100000" // /* MW 7 */
+ 5676 "00011101" // /* MW 6 */
+ 5677 "00010100" // /* MW 5 */
+ 5678 "00010100" // /* MW 4 */
+ 5679 "01110100" // /* MW 3 */
+ 5680 "00000001" // /* MW 2 */
+ 5681 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5683 "00001001" // /* MW 13 */
+ 5684 "01000110" // /* MW 12 */
+ 5685 "10100010" // /* MW 11 */
+ 5686 "00001111" // /* MW 10 */
+ 5687 "10101010" // /* MW 9 */
+ 5688 "01011000" // /* MW 8 */
+ 5689 "00000000" // /* MW 7 */
+ 5690 "00000000" // /* MW 6 */
+ 5691 "00101000" // /* MW 5 */
+ 5692 "00000001" // /* MW 4 */
+ 5693 "01110100" // /* MW 3 */
+ 5694 "10000001" // /* MW 2 */
+ 5695 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5697 "01010001" // /* MW 15 */
+ 5698 "00001001" // /* MW 14 */
+ 5699 "11101101" // /* MW 13 */
+ 5700 "00000011" // /* MW 12 */
+ 5701 "11001001" // /* MW 11 */
+ 5702 "00000000" // /* MW 10 */
+ 5703 "00000000" // /* MW 9 */
+ 5704 "00000000" // /* MW 8 */
+ 5705 "01011011" // /* MW 7 */
+ 5706 "00000001" // /* MW 6 */
+ 5707 "00101000" // /* MW 5 */
+ 5708 "00100010" // /* MW 4 */
+ 5709 "11111000" // /* MW 3 */
+ 5710 "00101100" // /* MW 2 */
+ 5711 "00000000" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.begin_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5713 "01010000" // /* MW 15 */
+ 5714 "00011011" // /* MW 14 */
+ 5715 "11101101" // /* MW 13 */
+ 5716 "00000001" // /* MW 12 */
+ 5717 "01001001" // /* MW 11 */
+ 5718 "00000001" // /* MW 10 */
+ 5719 "00000000" // /* MW 9 */
+ 5720 "00000000" // /* MW 8 */
+ 5721 "01011011" // /* MW 7 */
+ 5722 "00000001" // /* MW 6 */
+ 5723 "00101000" // /* MW 5 */
+ 5724 "00101000" // /* MW 4 */
+ 5725 "01111000" // /* MW 3 */
+ 5726 "00001001" // /* MW 2 */
+ 5727 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5729 "00110001" // /* MW 15 */
+ 5730 "00000000" // /* MW 14 */
+ 5731 "01111101" // /* MW 13 */
+ 5732 "10100101" // /* MW 12 */
+ 5733 "00000001" // /* MW 11 */
+ 5734 "00000000" // /* MW 10 */
+ 5735 "00000000" // /* MW 9 */
+ 5736 "00000000" // /* MW 8 */
+ 5737 "01011011" // /* MW 7 */
+ 5738 "00000001" // /* MW 6 */
+ 5739 "00101000" // /* MW 5 */
+ 5740 "00101000" // /* MW 4 */
+ 5741 "01111000" // /* MW 3 */
+ 5742 "00000001" // /* MW 2 */
+ 5743 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5745 "00110000" // /* MW 15 */
+ 5746 "00010010" // /* MW 14 */
+ 5747 "01111101" // /* MW 13 */
+ 5748 "10100101" // /* MW 12 */
+ 5749 "00000001" // /* MW 11 */
+ 5750 "00000000" // /* MW 10 */
+ 5751 "00000000" // /* MW 9 */
+ 5752 "00000000" // /* MW 8 */
+ 5753 "01011011" // /* MW 7 */
+ 5754 "00000001" // /* MW 6 */
+ 5755 "00101000" // /* MW 5 */
+ 5756 "00000001" // /* MW 4 */
+ 5757 "01110100" // /* MW 3 */
+ 5758 "10000001" // /* MW 2 */
+ 5759 "00100010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.end_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5761 "01010001" // /* MW 15 */
+ 5762 "00001001" // /* MW 14 */
+ 5763 "11101101" // /* MW 13 */
+ 5764 "00000011" // /* MW 12 */
+ 5765 "11001001" // /* MW 11 */
+ 5766 "00000000" // /* MW 10 */
+ 5767 "00000000" // /* MW 9 */
+ 5768 "00000000" // /* MW 8 */
+ 5769 "01011011" // /* MW 7 */
+ 5770 "00000001" // /* MW 6 */
+ 5771 "00101000" // /* MW 5 */
+ 5772 "00100010" // /* MW 4 */
+ 5773 "11111000" // /* MW 3 */
+ 5774 "00101100" // /* MW 2 */
+ 5775 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5777 "00001001" // /* MW 13 */
+ 5778 "01101010" // /* MW 12 */
+ 5779 "10100011" // /* MW 11 */
+ 5780 "00011110" // /* MW 10 */
+ 5781 "10010000" // /* MW 9 */
+ 5782 "01010100" // /* MW 8 */
+ 5783 "00000000" // /* MW 7 */
+ 5784 "00000000" // /* MW 6 */
+ 5785 "10010110" // /* MW 5 */
+ 5786 "10111100" // /* MW 4 */
+ 5787 "01111100" // /* MW 3 */
+ 5788 "00001001" // /* MW 2 */
+ 5789 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5791 "00101001" // /* MW 13 */
+ 5792 "00000110" // /* MW 12 */
+ 5793 "10100000" // /* MW 11 */
+ 5794 "00000111" // /* MW 10 */
+ 5795 "00111000" // /* MW 9 */
+ 5796 "01111100" // /* MW 8 */
+ 5797 "00000000" // /* MW 7 */
+ 5798 "00000000" // /* MW 6 */
+ 5799 "10010110" // /* MW 5 */
+ 5800 "00011100" // /* MW 4 */
+ 5801 "01111110" // /* MW 3 */
+ 5802 "00000001" // /* MW 2 */
+ 5803 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5805 "00001001" // /* MW 9 */
+ 5806 "01000110" // /* MW 8 */
+ 5807 "10100010" // /* MW 7 */
+ 5808 "11100100" // /* MW 6 */
+ 5809 "00000000" // /* MW 5 */
+ 5810 "01010101" // /* MW 4 */
+ 5811 "01100001" // /* MW 3 */
+ 5812 "10010001" // /* MW 2 */
+ 5813 "01100001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5815 "00101001" // /* MW 9 */
+ 5816 "00101010" // /* MW 8 */
+ 5817 "10100001" // /* MW 7 */
+ 5818 "11000100" // /* MW 6 */
+ 5819 "00000111" // /* MW 5 */
+ 5820 "10010010" // /* MW 4 */
+ 5821 "01100001" // /* MW 3 */
+ 5822 "11000001" // /* MW 2 */
+ 5823 "01101010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5825 "00001001" // /* MW 9 */
+ 5826 "01101010" // /* MW 8 */
+ 5827 "10100011" // /* MW 7 */
+ 5828 "11000100" // /* MW 6 */
+ 5829 "00000011" // /* MW 5 */
+ 5830 "10010010" // /* MW 4 */
+ 5831 "01100010" // /* MW 3 */
+ 5832 "10000001" // /* MW 2 */
+ 5833 "11101011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1285 32 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5835 "00101001" // /* MW 11 */
+ 5836 "00000110" // /* MW 10 */
+ 5837 "10100000" // /* MW 9 */
+ 5838 "11100110" // /* MW 8 */
+ 5839 "00000000" // /* MW 7 */
+ 5840 "10001111" // /* MW 6 */
+ 5841 "00100010" // /* MW 5 */
+ 5842 "01010111" // /* MW 4 */
+ 5843 "01101111" // /* MW 3 */
+ 5844 "10010001" // /* MW 2 */
+ 5845 "10110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5847 "00001001" // /* MW 9 */
+ 5848 "01000110" // /* MW 8 */
+ 5849 "10100010" // /* MW 7 */
+ 5850 "11100100" // /* MW 6 */
+ 5851 "00000000" // /* MW 5 */
+ 5852 "00000110" // /* MW 4 */
+ 5853 "01100010" // /* MW 3 */
+ 5854 "10010001" // /* MW 2 */
+ 5855 "10010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5857 "00101001" // /* MW 7 */
+ 5858 "00101010" // /* MW 6 */
+ 5859 "10100001" // /* MW 5 */
+ 5860 "11000110" // /* MW 4 */
+ 5861 "00000011" // /* MW 3 */
+ 5862 "10010010" // /* MW 2 */
+ 5863 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5865 "00001001" // /* MW 7 */
+ 5866 "01101010" // /* MW 6 */
+ 5867 "10100011" // /* MW 5 */
+ 5868 "11000110" // /* MW 4 */
+ 5869 "00000111" // /* MW 3 */
+ 5870 "10010010" // /* MW 2 */
+ 5871 "00000001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+ 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5873 "00000000" // /* MW 3 */
+ 5874 "10001011" // /* MW 2 */
+ 5875 "00011111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+ 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5877 "00101001" // /* MW 7 */
+ 5878 "00101010" // /* MW 6 */
+ 5879 "10100001" // /* MW 5 */
+ 5880 "11100110" // /* MW 4 */
+ 5881 "10100000" // /* MW 3 */
+ 5882 "00001011" // /* MW 2 */
+ 5883 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+ 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5885 "00101001" // /* MW 7 */
+ 5886 "00000110" // /* MW 6 */
+ 5887 "10100000" // /* MW 5 */
+ 5888 "11100110" // /* MW 4 */
+ 5889 "10100000" // /* MW 3 */
+ 5890 "10001000" // /* MW 2 */
+ 5891 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+ 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5893 "00001001" // /* MW 9 */
+ 5894 "01101010" // /* MW 8 */
+ 5895 "10100011" // /* MW 7 */
+ 5896 "11100110" // /* MW 6 */
+ 5897 "00000000" // /* MW 5 */
+ 5898 "00000101" // /* MW 4 */
+ 5899 "00100011" // /* MW 3 */
+ 5900 "11110111" // /* MW 2 */
+ 5901 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32 first
+ 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5903 "00001001" // /* MW 11 */
+ 5904 "01000110" // /* MW 10 */
+ 5905 "10100010" // /* MW 9 */
+ 5906 "11100110" // /* MW 8 */
+ 5907 "10100000" // /* MW 7 */
+ 5908 "10000010" // /* MW 6 */
+ 5909 "00100101" // /* MW 5 */
+ 5910 "11010111" // /* MW 4 */
+ 5911 "01101110" // /* MW 3 */
+ 5912 "10001001" // /* MW 2 */
+ 5913 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+ 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5915 "01110000" // /* MW 7 */
+ 5916 "10000000" // /* MW 6 */
+ 5917 "11000101" // /* MW 5 */
+ 5918 "00000011" // /* MW 4 */
+ 5919 "01100000" // /* MW 3 */
+ 5920 "10001001" // /* MW 2 */
+ 5921 "01100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5923 "01110000" // /* MW 7 */
+ 5924 "00000000" // /* MW 6 */
+ 5925 "10000001" // /* MW 5 */
+ 5926 "00000001" // /* MW 4 */
+ 5927 "01100000" // /* MW 3 */
+ 5928 "01000001" // /* MW 2 */
+ 5929 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5931 "01110000" // /* MW 7 */
+ 5932 "01010000" // /* MW 6 */
+ 5933 "10000111" // /* MW 5 */
+ 5934 "00000000" // /* MW 4 */
+ 5935 "11000000" // /* MW 3 */
+ 5936 "00010010" // /* MW 2 */
+ 5937 "10110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5939 "01110000" // /* MW 7 */
+ 5940 "10010000" // /* MW 6 */
+ 5941 "11000111" // /* MW 5 */
+ 5942 "00000010" // /* MW 4 */
+ 5943 "11000000" // /* MW 3 */
+ 5944 "00000010" // /* MW 2 */
+ 5945 "10100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5947 "01110110" // /* MW 9 */
+ 5948 "01100000" // /* MW 8 */
+ 5949 "11001000" // /* MW 7 */
+ 5950 "00000001" // /* MW 6 */
+ 5951 "10010000" // /* MW 5 */
+ 5952 "00111011" // /* MW 4 */
+ 5953 "01100001" // /* MW 3 */
+ 5954 "10010001" // /* MW 2 */
+ 5955 "00010011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5957 "01110000" // /* MW 7 */
+ 5958 "00000000" // /* MW 6 */
+ 5959 "10000011" // /* MW 5 */
+ 5960 "00000000" // /* MW 4 */
+ 5961 "11000000" // /* MW 3 */
+ 5962 "00001010" // /* MW 2 */
+ 5963 "01100010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1218 20 first
+.src_ref 2 "conv2d_bf16.h" 1287 37 first
+ 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */
+ 5965 "01100000" // /* MW 11 */
+ 5966 "00000000" // /* MW 10 */
+ 5967 "00000000" // /* MW 9 */
+ 5968 "11111010" // /* MW 8 */
+ 5969 "00000010" // /* MW 7 */
+ 5970 "00100100" // /* MW 6 */
+ 5971 "00100000" // /* MW 5 */
+ 5972 "01010111" // /* MW 4 */
+ 5973 "11000000" // /* MW 3 */
+ 5974 "00100010" // /* MW 2 */
+ 5975 "01010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 738 8
+.delay_slot
+ 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5977 "01110000" // /* MW 7 */
+ 5978 "01100000" // /* MW 6 */
+ 5979 "10101001" // /* MW 5 */
+ 5980 "00000000" // /* MW 4 */
+ 5981 "11000000" // /* MW 3 */
+ 5982 "00011010" // /* MW 2 */
+ 5983 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5985 "01110000" // /* MW 7 */
+ 5986 "11000000" // /* MW 6 */
+ 5987 "10100111" // /* MW 5 */
+ 5988 "00000011" // /* MW 4 */
+ 5989 "11000000" // /* MW 3 */
+ 5990 "00110010" // /* MW 2 */
+ 5991 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5993 "01110110" // /* MW 9 */
+ 5994 "01100000" // /* MW 8 */
+ 5995 "10110101" // /* MW 7 */
+ 5996 "00000000" // /* MW 6 */
+ 5997 "10010000" // /* MW 5 */
+ 5998 "00101011" // /* MW 4 */
+ 5999 "11000101" // /* MW 3 */
+ 6000 "00111010" // /* MW 2 */
+ 6001 "00010010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.delay_slot
+ 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6003 "01110000" // /* MW 7 */
+ 6004 "10000000" // /* MW 6 */
+ 6005 "11000010" // /* MW 5 */
+ 6006 "00000010" // /* MW 4 */
+ 6007 "11000000" // /* MW 3 */
+ 6008 "00101010" // /* MW 2 */
+ 6009 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.delay_slot
+ 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6011 "01110000" // /* MW 7 */
+ 6012 "11000000" // /* MW 6 */
+ 6013 "01001101" // /* MW 5 */
+ 6014 "00000000" // /* MW 4 */
+ 6015 "01100000" // /* MW 3 */
+ 6016 "10001001" // /* MW 2 */
+ 6017 "11100001" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6019 "11101100" // /* MW 3 */
+ 6020 "11011100" // /* MW 2 */
+ 6021 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6023 "11101100" // /* MW 3 */
+ 6024 "10111100" // /* MW 2 */
+ 6025 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6027 "01110000" // /* MW 7 */
+ 6028 "01110110" // /* MW 6 */
+ 6029 "10101010" // /* MW 5 */
+ 6030 "00000010" // /* MW 4 */
+ 6031 "01100000" // /* MW 3 */
+ 6032 "01011010" // /* MW 2 */
+ 6033 "00111100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6035 "01110000" // /* MW 7 */
+ 6036 "01110110" // /* MW 6 */
+ 6037 "11011010" // /* MW 5 */
+ 6038 "00000001" // /* MW 4 */
+ 6039 "01100000" // /* MW 3 */
+ 6040 "10111010" // /* MW 2 */
+ 6041 "10100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */
+ 6043 "00100001" // /* MW 9 */
+ 6044 "00000000" // /* MW 8 */
+ 6045 "00000000" // /* MW 7 */
+ 6046 "11111110" // /* MW 6 */
+ 6047 "00000010" // /* MW 5 */
+ 6048 "00000000" // /* MW 4 */
+ 6049 "01100000" // /* MW 3 */
+ 6050 "11010010" // /* MW 2 */
+ 6051 "00100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6053 "01110000" // /* MW 7 */
+ 6054 "01110110" // /* MW 6 */
+ 6055 "10100010" // /* MW 5 */
+ 6056 "00000010" // /* MW 4 */
+ 6057 "01100000" // /* MW 3 */
+ 6058 "10111010" // /* MW 2 */
+ 6059 "00100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6061 "11101100" // /* MW 3 */
+ 6062 "10001100" // /* MW 2 */
+ 6063 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6065 "01110000" // /* MW 7 */
+ 6066 "01110110" // /* MW 6 */
+ 6067 "10010110" // /* MW 5 */
+ 6068 "00000010" // /* MW 4 */
+ 6069 "01100000" // /* MW 3 */
+ 6070 "11010010" // /* MW 2 */
+ 6071 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6073 "01110000" // /* MW 7 */
+ 6074 "01110110" // /* MW 6 */
+ 6075 "10001010" // /* MW 5 */
+ 6076 "00000000" // /* MW 4 */
+ 6077 "01100000" // /* MW 3 */
+ 6078 "10111010" // /* MW 2 */
+ 6079 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6081 "00000000" // /* MW 15 */
+ 6082 "00000000" // /* MW 14 */
+ 6083 "01111000" // /* MW 13 */
+ 6084 "10100101" // /* MW 12 */
+ 6085 "00000001" // /* MW 11 */
+ 6086 "00000000" // /* MW 10 */
+ 6087 "00000000" // /* MW 9 */
+ 6088 "00000000" // /* MW 8 */
+ 6089 "10010011" // /* MW 7 */
+ 6090 "10100010" // /* MW 6 */
+ 6091 "00100100" // /* MW 5 */
+ 6092 "00000000" // /* MW 4 */
+ 6093 "11110000" // /* MW 3 */
+ 6094 "00101100" // /* MW 2 */
+ 6095 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632
+.src_ref 4 "vector.hpp" 1152 43
+ 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6097 "10100011" // /* MW 3 */
+ 6098 "11100000" // /* MW 2 */
+ 6099 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6101 "11100011" // /* MW 3 */
+ 6102 "00010100" // /* MW 2 */
+ 6103 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6105 "00100011" // /* MW 3 */
+ 6106 "00000100" // /* MW 2 */
+ 6107 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6109 "01100011" // /* MW 3 */
+ 6110 "00010100" // /* MW 2 */
+ 6111 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6113 "00010011" // /* MW 3 */
+ 6114 "00000110" // /* MW 2 */
+ 6115 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6117 "11100011" // /* MW 3 */
+ 6118 "00010101" // /* MW 2 */
+ 6119 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6121 "01110000" // /* MW 7 */
+ 6122 "10100101" // /* MW 6 */
+ 6123 "00000001" // /* MW 5 */
+ 6124 "00000000" // /* MW 4 */
+ 6125 "01100000" // /* MW 3 */
+ 6126 "00100100" // /* MW 2 */
+ 6127 "10010100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1143 12 first
+ 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6129 "01011000" // /* MW 11 */
+ 6130 "00000000" // /* MW 10 */
+ 6131 "01000000" // /* MW 9 */
+ 6132 "00000001" // /* MW 8 */
+ 6133 "00110101" // /* MW 7 */
+ 6134 "00000110" // /* MW 6 */
+ 6135 "00100000" // /* MW 5 */
+ 6136 "01010111" // /* MW 4 */
+ 6137 "01101111" // /* MW 3 */
+ 6138 "10010010" // /* MW 2 */
+ 6139 "11100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.delay_slot
+ 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6141 "10000000" // /* MW 3 */
+ 6142 "01000100" // /* MW 2 */
+ 6143 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.delay_slot
+ 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6145 "10100000" // /* MW 3 */
+ 6146 "01001001" // /* MW 2 */
+ 6147 "00011010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.delay_slot
+ 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6149 "00000001" // /* MW 5 */
+ 6150 "00011110" // /* MW 4 */
+ 6151 "00000101" // /* MW 3 */
+ 6152 "01110010" // /* MW 2 */
+ 6153 "11101011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.delay_slot
+ 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6155 "10000000" // /* MW 3 */
+ 6156 "01001110" // /* MW 2 */
+ 6157 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6159 "00000000" // /* MW 1 */
+.loop_nesting 0
+ 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */
+ 6161 "00000000" // /* MW 5 */
+ 6162 "00000000" // /* MW 4 */
+ 6163 "01011000" // /* MW 3 */
+ 6164 "00001101" // /* MW 2 */
+ 6165 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6167 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6169 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6171 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6173 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6175 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 1364 80
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6177 "01111000" // /* MW 11 */
+ 6178 "10010000" // /* MW 10 */
+ 6179 "10110011" // /* MW 9 */
+ 6180 "00001000" // /* MW 8 */
+ 6181 "11100001" // /* MW 7 */
+ 6182 "00000100" // /* MW 6 */
+ 6183 "10001011" // /* MW 5 */
+ 6184 "00001100" // /* MW 4 */
+ 6185 "00100010" // /* MW 3 */
+ 6186 "01111110" // /* MW 2 */
+ 6187 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1369 80
+ 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6189 "01111000" // /* MW 11 */
+ 6190 "01000000" // /* MW 10 */
+ 6191 "01100010" // /* MW 9 */
+ 6192 "00000011" // /* MW 8 */
+ 6193 "11010100" // /* MW 7 */
+ 6194 "00011011" // /* MW 6 */
+ 6195 "00001011" // /* MW 5 */
+ 6196 "01010110" // /* MW 4 */
+ 6197 "10000010" // /* MW 3 */
+ 6198 "10010000" // /* MW 2 */
+ 6199 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 807 26
+.src_ref 2 "conv2d_bf16.h" 808 26
+.src_ref 2 "conv2d_bf16.h" 809 26
+.src_ref 2 "conv2d_bf16.h" 810 26
+.src_ref 2 "conv2d_bf16.h" 1436 26
+.src_ref 2 "conv2d_bf16.h" 1437 26
+.src_ref 2 "conv2d_bf16.h" 1438 26
+.src_ref 2 "conv2d_bf16.h" 1439 26
+ 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6201 "01111000" // /* MW 9 */
+ 6202 "11010000" // /* MW 8 */
+ 6203 "00000101" // /* MW 7 */
+ 6204 "10001001" // /* MW 6 */
+ 6205 "00110001" // /* MW 5 */
+ 6206 "00011001" // /* MW 4 */
+ 6207 "00000000" // /* MW 3 */
+ 6208 "10010100" // /* MW 2 */
+ 6209 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 802 83
+.src_ref 2 "conv2d_bf16.h" 1428 39
+ 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6211 "01111000" // /* MW 11 */
+ 6212 "10010000" // /* MW 10 */
+ 6213 "11000111" // /* MW 9 */
+ 6214 "11001010" // /* MW 8 */
+ 6215 "00100000" // /* MW 7 */
+ 6216 "00000001" // /* MW 6 */
+ 6217 "00001011" // /* MW 5 */
+ 6218 "01011100" // /* MW 4 */
+ 6219 "10000110" // /* MW 3 */
+ 6220 "10011000" // /* MW 2 */
+ 6221 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 794 8
+ 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6223 "01111000" // /* MW 11 */
+ 6224 "01010000" // /* MW 10 */
+ 6225 "10000111" // /* MW 9 */
+ 6226 "00001000" // /* MW 8 */
+ 6227 "10010000" // /* MW 7 */
+ 6228 "00000001" // /* MW 6 */
+ 6229 "00001011" // /* MW 5 */
+ 6230 "00000010" // /* MW 4 */
+ 6231 "00100101" // /* MW 3 */
+ 6232 "10000011" // /* MW 2 */
+ 6233 "11111010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 794 8
+.src_ref 2 "conv2d_bf16.h" 1455 20
+ 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6235 "01111000" // /* MW 9 */
+ 6236 "01010000" // /* MW 8 */
+ 6237 "01000101" // /* MW 7 */
+ 6238 "00001011" // /* MW 6 */
+ 6239 "10000000" // /* MW 5 */
+ 6240 "00000001" // /* MW 4 */
+ 6241 "00100000" // /* MW 3 */
+ 6242 "11010110" // /* MW 2 */
+ 6243 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 12
+ 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6245 "00010000" // /* MW 9 */
+ 6246 "01011000" // /* MW 8 */
+ 6247 "00110100" // /* MW 7 */
+ 6248 "00000101" // /* MW 6 */
+ 6249 "00000000" // /* MW 5 */
+ 6250 "00000000" // /* MW 4 */
+ 6251 "00100000" // /* MW 3 */
+ 6252 "00110110" // /* MW 2 */
+ 6253 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80 first
+.src_ref 2 "conv2d_bf16.h" 1873
+ 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6255 "01110010" // /* MW 5 */
+ 6256 "11011111" // /* MW 4 */
+ 6257 "00100110" // /* MW 3 */
+ 6258 "10000111" // /* MW 2 */
+ 6259 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6261 "11000110" // /* MW 3 */
+ 6262 "00011101" // /* MW 2 */
+ 6263 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 80 first
+ 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6265 "00000110" // /* MW 3 */
+ 6266 "10001010" // /* MW 2 */
+ 6267 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 799 87 first
+ 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6269 "10000110" // /* MW 3 */
+ 6270 "10011110" // /* MW 2 */
+ 6271 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 83 first
+ 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6273 "11010110" // /* MW 3 */
+ 6274 "00011110" // /* MW 2 */
+ 6275 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 83 first
+ 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6277 "11110110" // /* MW 3 */
+ 6278 "11001010" // /* MW 2 */
+ 6279 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 66 first
+ 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6281 "10110110" // /* MW 3 */
+ 6282 "00010111" // /* MW 2 */
+ 6283 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1443 71 first
+ 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6285 "10010110" // /* MW 3 */
+ 6286 "00000111" // /* MW 2 */
+ 6287 "00000011" // /* MW 1 */
+ 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6289 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1369 89
+ 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6291 "00000000" // /* MW 3 */
+ 6292 "10011000" // /* MW 2 */
+ 6293 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+.src_ref 2 "conv2d_bf16.h" 1518 37
+ 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6295 "00000000" // /* MW 3 */
+ 6296 "00000111" // /* MW 2 */
+ 6297 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+ 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6299 "00000000" // /* MW 3 */
+ 6300 "11011100" // /* MW 2 */
+ 6301 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89 first
+ 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6303 "11100000" // /* MW 3 */
+ 6304 "00001111" // /* MW 2 */
+ 6305 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 89 first
+ 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6307 "11000000" // /* MW 5 */
+ 6308 "00011110" // /* MW 4 */
+ 6309 "11101110" // /* MW 3 */
+ 6310 "01111111" // /* MW 2 */
+ 6311 "11101111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+ 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6313 "01110000" // /* MW 7 */
+ 6314 "10010000" // /* MW 6 */
+ 6315 "11000111" // /* MW 5 */
+ 6316 "00000011" // /* MW 4 */
+ 6317 "01100000" // /* MW 3 */
+ 6318 "00101011" // /* MW 2 */
+ 6319 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1362 31 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+.loop_nesting 1
+ 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6321 "01100000" // /* MW 13 */
+ 6322 "10000001" // /* MW 12 */
+ 6323 "01110001" // /* MW 11 */
+ 6324 "00000010" // /* MW 10 */
+ 6325 "10010110" // /* MW 9 */
+ 6326 "10001111" // /* MW 8 */
+ 6327 "00000000" // /* MW 7 */
+ 6328 "00000000" // /* MW 6 */
+ 6329 "00101000" // /* MW 5 */
+ 6330 "00101000" // /* MW 4 */
+ 6331 "01111010" // /* MW 3 */
+ 6332 "10000101" // /* MW 2 */
+ 6333 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1364 31 first
+.src_ref 2 "conv2d_bf16.h" 1443 16
+ 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6335 "00010000" // /* MW 11 */
+ 6336 "11001000" // /* MW 10 */
+ 6337 "10111100" // /* MW 9 */
+ 6338 "00000101" // /* MW 8 */
+ 6339 "00000000" // /* MW 7 */
+ 6340 "00000000" // /* MW 6 */
+ 6341 "00101000" // /* MW 5 */
+ 6342 "00101000" // /* MW 4 */
+ 6343 "01111010" // /* MW 3 */
+ 6344 "00001101" // /* MW 2 */
+ 6345 "11001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1428 39 first
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+ 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6347 "01001000" // /* MW 11 */
+ 6348 "00111111" // /* MW 10 */
+ 6349 "10111111" // /* MW 9 */
+ 6350 "01101110" // /* MW 8 */
+ 6351 "11101001" // /* MW 7 */
+ 6352 "00000101" // /* MW 6 */
+ 6353 "00101000" // /* MW 5 */
+ 6354 "00000101" // /* MW 4 */
+ 6355 "01110110" // /* MW 3 */
+ 6356 "10000001" // /* MW 2 */
+ 6357 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6359 "01111110" // /* MW 9 */
+ 6360 "10010000" // /* MW 8 */
+ 6361 "01000111" // /* MW 7 */
+ 6362 "00000001" // /* MW 6 */
+ 6363 "00010100" // /* MW 5 */
+ 6364 "00000001" // /* MW 4 */
+ 6365 "01110011" // /* MW 3 */
+ 6366 "01011001" // /* MW 2 */
+ 6367 "01010101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1367 31 first
+ 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6369 "00101000" // /* MW 5 */
+ 6370 "00000001" // /* MW 4 */
+ 6371 "01110110" // /* MW 3 */
+ 6372 "10010101" // /* MW 2 */
+ 6373 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1369 31 first
+ 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6375 "10101000" // /* MW 5 */
+ 6376 "00100001" // /* MW 4 */
+ 6377 "01111010" // /* MW 3 */
+ 6378 "00011101" // /* MW 2 */
+ 6379 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1372 31 first
+ 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6381 "00101000" // /* MW 5 */
+ 6382 "00101000" // /* MW 4 */
+ 6383 "01111010" // /* MW 3 */
+ 6384 "10100101" // /* MW 2 */
+ 6385 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1374 31 first
+ 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6387 "00101000" // /* MW 5 */
+ 6388 "00101000" // /* MW 4 */
+ 6389 "01111010" // /* MW 3 */
+ 6390 "00101101" // /* MW 2 */
+ 6391 "11001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1377 31 first
+ 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6393 "10101000" // /* MW 5 */
+ 6394 "00000000" // /* MW 4 */
+ 6395 "01110110" // /* MW 3 */
+ 6396 "10110101" // /* MW 2 */
+ 6397 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1379 31 first
+ 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6399 "00101000" // /* MW 5 */
+ 6400 "00000011" // /* MW 4 */
+ 6401 "01110110" // /* MW 3 */
+ 6402 "00111101" // /* MW 2 */
+ 6403 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50 first
+ 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6405 "10101000" // /* MW 5 */
+ 6406 "00000011" // /* MW 4 */
+ 6407 "01110110" // /* MW 3 */
+ 6408 "01000101" // /* MW 2 */
+ 6409 "01101000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6411 "11101110" // /* MW 9 */
+ 6412 "00101101" // /* MW 8 */
+ 6413 "01101001" // /* MW 7 */
+ 6414 "00000001" // /* MW 6 */
+ 6415 "00010100" // /* MW 5 */
+ 6416 "00010010" // /* MW 4 */
+ 6417 "01110101" // /* MW 3 */
+ 6418 "01001101" // /* MW 2 */
+ 6419 "01101000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6421 "11101110" // /* MW 9 */
+ 6422 "00101111" // /* MW 8 */
+ 6423 "10101001" // /* MW 7 */
+ 6424 "00000010" // /* MW 6 */
+ 6425 "00010100" // /* MW 5 */
+ 6426 "00010100" // /* MW 4 */
+ 6427 "01110101" // /* MW 3 */
+ 6428 "10000001" // /* MW 2 */
+ 6429 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6431 "01101001" // /* MW 11 */
+ 6432 "00001011" // /* MW 10 */
+ 6433 "01001000" // /* MW 9 */
+ 6434 "11000010" // /* MW 8 */
+ 6435 "11011011" // /* MW 7 */
+ 6436 "00010001" // /* MW 6 */
+ 6437 "00101010" // /* MW 5 */
+ 6438 "00101000" // /* MW 4 */
+ 6439 "01111010" // /* MW 3 */
+ 6440 "00000001" // /* MW 2 */
+ 6441 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6443 "01101001" // /* MW 9 */
+ 6444 "00110101" // /* MW 8 */
+ 6445 "01001001" // /* MW 7 */
+ 6446 "11000010" // /* MW 6 */
+ 6447 "11011111" // /* MW 5 */
+ 6448 "00010001" // /* MW 4 */
+ 6449 "01110101" // /* MW 3 */
+ 6450 "10000001" // /* MW 2 */
+ 6451 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6453 "01101001" // /* MW 3 */
+ 6454 "01001001" // /* MW 2 */
+ 6455 "01001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6457 "01101001" // /* MW 3 */
+ 6458 "01110101" // /* MW 2 */
+ 6459 "01001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.src_ref 2 "conv2d_bf16.h" 1437 26 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6461 "00111101" // /* MW 9 */
+ 6462 "10000100" // /* MW 8 */
+ 6463 "10100001" // /* MW 7 */
+ 6464 "11000110" // /* MW 6 */
+ 6465 "01011111" // /* MW 5 */
+ 6466 "10001011" // /* MW 4 */
+ 6467 "10101010" // /* MW 3 */
+ 6468 "00000000" // /* MW 2 */
+ 6469 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1436 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6471 "00111101" // /* MW 7 */
+ 6472 "10000000" // /* MW 6 */
+ 6473 "10100000" // /* MW 5 */
+ 6474 "00000000" // /* MW 4 */
+ 6475 "10010100" // /* MW 3 */
+ 6476 "00000001" // /* MW 2 */
+ 6477 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1438 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6479 "00111101" // /* MW 7 */
+ 6480 "10001000" // /* MW 6 */
+ 6481 "10100010" // /* MW 5 */
+ 6482 "00000000" // /* MW 4 */
+ 6483 "11010100" // /* MW 3 */
+ 6484 "00000001" // /* MW 2 */
+ 6485 "00000011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1439 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6487 "00111101" // /* MW 9 */
+ 6488 "10001100" // /* MW 8 */
+ 6489 "10100011" // /* MW 7 */
+ 6490 "00011101" // /* MW 6 */
+ 6491 "00010100" // /* MW 5 */
+ 6492 "00010010" // /* MW 4 */
+ 6493 "01110101" // /* MW 3 */
+ 6494 "00000001" // /* MW 2 */
+ 6495 "01010101" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.begin_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6497 "10110111" // /* MW 5 */
+ 6498 "00010110" // /* MW 4 */
+ 6499 "10000010" // /* MW 3 */
+ 6500 "10000010" // /* MW 2 */
+ 6501 "10100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6503 "00001001" // /* MW 9 */
+ 6504 "00101010" // /* MW 8 */
+ 6505 "10011001" // /* MW 7 */
+ 6506 "11000110" // /* MW 6 */
+ 6507 "01011111" // /* MW 5 */
+ 6508 "00111100" // /* MW 4 */
+ 6509 "00101010" // /* MW 3 */
+ 6510 "00101000" // /* MW 2 */
+ 6511 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6513 "00001001" // /* MW 9 */
+ 6514 "00000100" // /* MW 8 */
+ 6515 "10011000" // /* MW 7 */
+ 6516 "11000110" // /* MW 6 */
+ 6517 "01011011" // /* MW 5 */
+ 6518 "10111100" // /* MW 4 */
+ 6519 "10101001" // /* MW 3 */
+ 6520 "00000000" // /* MW 2 */
+ 6521 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6523 "00001001" // /* MW 7 */
+ 6524 "01101000" // /* MW 6 */
+ 6525 "10011011" // /* MW 5 */
+ 6526 "00000000" // /* MW 4 */
+ 6527 "10010100" // /* MW 3 */
+ 6528 "00000001" // /* MW 2 */
+ 6529 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6531 "00001001" // /* MW 13 */
+ 6532 "01000110" // /* MW 12 */
+ 6533 "10011010" // /* MW 11 */
+ 6534 "01101100" // /* MW 10 */
+ 6535 "00000101" // /* MW 9 */
+ 6536 "00000000" // /* MW 8 */
+ 6537 "00000000" // /* MW 7 */
+ 6538 "00000000" // /* MW 6 */
+ 6539 "10101000" // /* MW 5 */
+ 6540 "00000011" // /* MW 4 */
+ 6541 "01110110" // /* MW 3 */
+ 6542 "10000001" // /* MW 2 */
+ 6543 "00000010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.end_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6545 "00000000" // /* MW 15 */
+ 6546 "00000000" // /* MW 14 */
+ 6547 "11101000" // /* MW 13 */
+ 6548 "10101111" // /* MW 12 */
+ 6549 "01000101" // /* MW 11 */
+ 6550 "00000001" // /* MW 10 */
+ 6551 "00000000" // /* MW 9 */
+ 6552 "00000000" // /* MW 8 */
+ 6553 "01011011" // /* MW 7 */
+ 6554 "00000001" // /* MW 6 */
+ 6555 "00101000" // /* MW 5 */
+ 6556 "00100100" // /* MW 4 */
+ 6557 "01111010" // /* MW 3 */
+ 6558 "00000001" // /* MW 2 */
+ 6559 "01010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6561 "11100000" // /* MW 11 */
+ 6562 "10101101" // /* MW 10 */
+ 6563 "10000101" // /* MW 9 */
+ 6564 "00000000" // /* MW 8 */
+ 6565 "10001011" // /* MW 7 */
+ 6566 "10011100" // /* MW 6 */
+ 6567 "00100101" // /* MW 5 */
+ 6568 "10010111" // /* MW 4 */
+ 6569 "11111111" // /* MW 3 */
+ 6570 "00001100" // /* MW 2 */
+ 6571 "00000111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.src_ref 2 "conv2d_bf16.h" 1517 32 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6573 "00001001" // /* MW 11 */
+ 6574 "00101010" // /* MW 10 */
+ 6575 "10011001" // /* MW 9 */
+ 6576 "11000110" // /* MW 8 */
+ 6577 "01011111" // /* MW 7 */
+ 6578 "00111100" // /* MW 6 */
+ 6579 "00100010" // /* MW 5 */
+ 6580 "00010111" // /* MW 4 */
+ 6581 "01101111" // /* MW 3 */
+ 6582 "10010001" // /* MW 2 */
+ 6583 "10010011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.src_ref 2 "conv2d_bf16.h" 1518 37 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6585 "00001001" // /* MW 11 */
+ 6586 "00000100" // /* MW 10 */
+ 6587 "10011000" // /* MW 9 */
+ 6588 "11000110" // /* MW 8 */
+ 6589 "01011011" // /* MW 7 */
+ 6590 "10111100" // /* MW 6 */
+ 6591 "00100001" // /* MW 5 */
+ 6592 "10010111" // /* MW 4 */
+ 6593 "01101111" // /* MW 3 */
+ 6594 "10010001" // /* MW 2 */
+ 6595 "01110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6597 "00001001" // /* MW 7 */
+ 6598 "01101000" // /* MW 6 */
+ 6599 "10011011" // /* MW 5 */
+ 6600 "11100110" // /* MW 4 */
+ 6601 "10100000" // /* MW 3 */
+ 6602 "10001000" // /* MW 2 */
+ 6603 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.src_ref 2 "conv2d_bf16.h" 1428 39
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6605 "00001001" // /* MW 9 */
+ 6606 "01000110" // /* MW 8 */
+ 6607 "10011010" // /* MW 7 */
+ 6608 "11100110" // /* MW 6 */
+ 6609 "10000000" // /* MW 5 */
+ 6610 "10011011" // /* MW 4 */
+ 6611 "00100000" // /* MW 3 */
+ 6612 "10110111" // /* MW 2 */
+ 6613 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+ 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6615 "01011011" // /* MW 3 */
+ 6616 "00001011" // /* MW 2 */
+ 6617 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6619 "01011111" // /* MW 3 */
+ 6620 "10001011" // /* MW 2 */
+ 6621 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6623 "00001001" // /* MW 7 */
+ 6624 "00000100" // /* MW 6 */
+ 6625 "10011000" // /* MW 5 */
+ 6626 "11000110" // /* MW 4 */
+ 6627 "01011011" // /* MW 3 */
+ 6628 "10111100" // /* MW 2 */
+ 6629 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6631 "00001001" // /* MW 7 */
+ 6632 "00101010" // /* MW 6 */
+ 6633 "10011001" // /* MW 5 */
+ 6634 "11000110" // /* MW 4 */
+ 6635 "01011111" // /* MW 3 */
+ 6636 "00111100" // /* MW 2 */
+ 6637 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6639 "00001001" // /* MW 3 */
+ 6640 "01000110" // /* MW 2 */
+ 6641 "10011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+ 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6643 "00001001" // /* MW 3 */
+ 6644 "01101000" // /* MW 2 */
+ 6645 "10011011" // /* MW 1 */
+ 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6647 "00000000" // /* MW 1 */
+ 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6649 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6651 "00010110" // /* MW 3 */
+ 6652 "00010000" // /* MW 2 */
+ 6653 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6655 "10010110" // /* MW 3 */
+ 6656 "10010000" // /* MW 2 */
+ 6657 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1455 20 first
+ 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */
+ 6659 "01100001" // /* MW 9 */
+ 6660 "00000000" // /* MW 8 */
+ 6661 "00000000" // /* MW 7 */
+ 6662 "01001110" // /* MW 6 */
+ 6663 "00000011" // /* MW 5 */
+ 6664 "00101010" // /* MW 4 */
+ 6665 "11000000" // /* MW 3 */
+ 6666 "00011010" // /* MW 2 */
+ 6667 "00010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.delay_slot
+ 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6669 "01010110" // /* MW 3 */
+ 6670 "00010000" // /* MW 2 */
+ 6671 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6673 "10010110" // /* MW 3 */
+ 6674 "00010001" // /* MW 2 */
+ 6675 "00001001" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6677 "11010110" // /* MW 3 */
+ 6678 "10010001" // /* MW 2 */
+ 6679 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6681 "00010110" // /* MW 3 */
+ 6682 "10010001" // /* MW 2 */
+ 6683 "00001010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6685 "01010110" // /* MW 3 */
+ 6686 "00010001" // /* MW 2 */
+ 6687 "00001100" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6689 "11101100" // /* MW 3 */
+ 6690 "11011100" // /* MW 2 */
+ 6691 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6693 "11101100" // /* MW 3 */
+ 6694 "10001100" // /* MW 2 */
+ 6695 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6697 "01110000" // /* MW 7 */
+ 6698 "01110110" // /* MW 6 */
+ 6699 "10101010" // /* MW 5 */
+ 6700 "00000010" // /* MW 4 */
+ 6701 "01100000" // /* MW 3 */
+ 6702 "01011010" // /* MW 2 */
+ 6703 "10101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6705 "01110000" // /* MW 7 */
+ 6706 "01110110" // /* MW 6 */
+ 6707 "01011010" // /* MW 5 */
+ 6708 "00000000" // /* MW 4 */
+ 6709 "01100000" // /* MW 3 */
+ 6710 "10001010" // /* MW 2 */
+ 6711 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */
+ 6713 "00100001" // /* MW 9 */
+ 6714 "00000000" // /* MW 8 */
+ 6715 "00000000" // /* MW 7 */
+ 6716 "01010010" // /* MW 6 */
+ 6717 "00000011" // /* MW 5 */
+ 6718 "00000000" // /* MW 4 */
+ 6719 "01100000" // /* MW 3 */
+ 6720 "11010010" // /* MW 2 */
+ 6721 "10100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6723 "01110000" // /* MW 7 */
+ 6724 "01110110" // /* MW 6 */
+ 6725 "10001010" // /* MW 5 */
+ 6726 "00000010" // /* MW 4 */
+ 6727 "01100000" // /* MW 3 */
+ 6728 "10001010" // /* MW 2 */
+ 6729 "10100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6731 "11101100" // /* MW 3 */
+ 6732 "10111100" // /* MW 2 */
+ 6733 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6735 "01110000" // /* MW 7 */
+ 6736 "01110110" // /* MW 6 */
+ 6737 "10010110" // /* MW 5 */
+ 6738 "00000010" // /* MW 4 */
+ 6739 "01100000" // /* MW 3 */
+ 6740 "01010010" // /* MW 2 */
+ 6741 "01101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6743 "01110010" // /* MW 9 */
+ 6744 "01110110" // /* MW 8 */
+ 6745 "00100010" // /* MW 7 */
+ 6746 "00000010" // /* MW 6 */
+ 6747 "01010011" // /* MW 5 */
+ 6748 "00010100" // /* MW 4 */
+ 6749 "11110111" // /* MW 3 */
+ 6750 "00101100" // /* MW 2 */
+ 6751 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6753 "00000000" // /* MW 15 */
+ 6754 "00000000" // /* MW 14 */
+ 6755 "01111000" // /* MW 13 */
+ 6756 "10100101" // /* MW 12 */
+ 6757 "00000001" // /* MW 11 */
+ 6758 "00000000" // /* MW 10 */
+ 6759 "00000000" // /* MW 9 */
+ 6760 "00000000" // /* MW 8 */
+ 6761 "10010011" // /* MW 7 */
+ 6762 "11100010" // /* MW 6 */
+ 6763 "00100100" // /* MW 5 */
+ 6764 "00000000" // /* MW 4 */
+ 6765 "11110000" // /* MW 3 */
+ 6766 "00101100" // /* MW 2 */
+ 6767 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304
+.src_ref 4 "vector.hpp" 1152 43
+ 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6769 "10100011" // /* MW 3 */
+ 6770 "01100000" // /* MW 2 */
+ 6771 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6773 "11100011" // /* MW 3 */
+ 6774 "00010100" // /* MW 2 */
+ 6775 "00001100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6777 "00100011" // /* MW 3 */
+ 6778 "00000100" // /* MW 2 */
+ 6779 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6781 "01100011" // /* MW 3 */
+ 6782 "00010100" // /* MW 2 */
+ 6783 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6785 "10100011" // /* MW 3 */
+ 6786 "01100001" // /* MW 2 */
+ 6787 "00001011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6789 "11100011" // /* MW 3 */
+ 6790 "00010101" // /* MW 2 */
+ 6791 "00001111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6793 "01110000" // /* MW 7 */
+ 6794 "10100101" // /* MW 6 */
+ 6795 "00000001" // /* MW 5 */
+ 6796 "00000000" // /* MW 4 */
+ 6797 "01100000" // /* MW 3 */
+ 6798 "00100100" // /* MW 2 */
+ 6799 "10011100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1337 12 first
+ 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6801 "01000000" // /* MW 5 */
+ 6802 "11110101" // /* MW 4 */
+ 6803 "01101110" // /* MW 3 */
+ 6804 "11000010" // /* MW 2 */
+ 6805 "01100010" // /* MW 1 */
+.delay_slot
+ 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6807 "10010000" // /* MW 3 */
+ 6808 "10001011" // /* MW 2 */
+ 6809 "00111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6811 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6813 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6815 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6817 "00000000" // /* MW 15 */
+ 6818 "00000000" // /* MW 14 */
+ 6819 "01111000" // /* MW 13 */
+ 6820 "10100101" // /* MW 12 */
+ 6821 "00000001" // /* MW 11 */
+ 6822 "00000000" // /* MW 10 */
+ 6823 "00000000" // /* MW 9 */
+ 6824 "00000000" // /* MW 8 */
+ 6825 "01011011" // /* MW 7 */
+ 6826 "00000001" // /* MW 6 */
+ 6827 "00100000" // /* MW 5 */
+ 6828 "00000000" // /* MW 4 */
+ 6829 "11110000" // /* MW 3 */
+ 6830 "00101100" // /* MW 2 */
+ 6831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368
+.loop_nesting 0
+ 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6833 "11110001" // /* MW 3 */
+ 6834 "11101101" // /* MW 2 */
+ 6835 "00000111" // /* MW 1 */
+ 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6837 "10010001" // /* MW 3 */
+ 6838 "11110001" // /* MW 2 */
+ 6839 "00000111" // /* MW 1 */
+ 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6841 "00110001" // /* MW 3 */
+ 6842 "11110101" // /* MW 2 */
+ 6843 "00000111" // /* MW 1 */
+ 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6845 "00011001" // /* MW 3 */
+ 6846 "11101011" // /* MW 2 */
+ 6847 "00000111" // /* MW 1 */
+ 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6849 "10011001" // /* MW 3 */
+ 6850 "11111011" // /* MW 2 */
+ 6851 "00000111" // /* MW 1 */
+ 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6853 "11010001" // /* MW 3 */
+ 6854 "11111101" // /* MW 2 */
+ 6855 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873 first
+ 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 6857 "00000000" // /* MW 3 */
+ 6858 "00101000" // /* MW 2 */
+ 6859 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873
+.delay_slot
+ 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6861 "00000001" // /* MW 5 */
+ 6862 "00000000" // /* MW 4 */
+ 6863 "00000000" // /* MW 3 */
+ 6864 "11110000" // /* MW 2 */
+ 6865 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6869 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6871 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0
+ 6873 "00000000" // /* MW 1 */
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 74 first
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 81 4
+.function_start
+ 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6881 "00010000" // /* MW 9 */
+ 6882 "00100000" // /* MW 8 */
+ 6883 "00110010" // /* MW 7 */
+ 6884 "11110010" // /* MW 6 */
+ 6885 "00000001" // /* MW 5 */
+ 6886 "00000000" // /* MW 4 */
+ 6887 "00000000" // /* MW 3 */
+ 6888 "00100000" // /* MW 2 */
+ 6889 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6 first
+.src_ref 7 "superkernels.cpp" 81 4
+ 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6891 "01111000" // /* MW 9 */
+ 6892 "11010000" // /* MW 8 */
+ 6893 "01001011" // /* MW 7 */
+ 6894 "00001000" // /* MW 6 */
+ 6895 "00010000" // /* MW 5 */
+ 6896 "00000000" // /* MW 4 */
+ 6897 "11010000" // /* MW 3 */
+ 6898 "11000010" // /* MW 2 */
+ 6899 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 74
+ 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6901 "00000001" // /* MW 5 */
+ 6902 "00000000" // /* MW 4 */
+ 6903 "00000000" // /* MW 3 */
+ 6904 "00001000" // /* MW 2 */
+ 6905 "00000000" // /* MW 1 */
+ 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6907 "01010101" // /* MW 3 */
+ 6908 "11110000" // /* MW 2 */
+ 6909 "00001111" // /* MW 1 */
+ 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6911 "00000000" // /* MW 1 */
+ 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6913 "00000000" // /* MW 1 */
+ 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6915 "00000000" // /* MW 1 */
+ 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6917 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 79 16
+ 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */
+ 6919 "00000001" // /* MW 5 */
+ 6920 "01000000" // /* MW 4 */
+ 6921 "11011000" // /* MW 3 */
+ 6922 "00001101" // /* MW 2 */
+ 6923 "10000000" // /* MW 1 */
+.delay_slot
+ 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6925 "10011101" // /* MW 3 */
+ 6926 "11111011" // /* MW 2 */
+ 6927 "00001111" // /* MW 1 */
+.delay_slot
+ 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6929 "00011101" // /* MW 3 */
+ 6930 "11111111" // /* MW 2 */
+ 6931 "00001111" // /* MW 1 */
+.delay_slot
+ 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6933 "10011101" // /* MW 3 */
+ 6934 "11101101" // /* MW 2 */
+ 6935 "00001111" // /* MW 1 */
+.delay_slot
+ 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6937 "00111101" // /* MW 3 */
+ 6938 "11110100" // /* MW 2 */
+ 6939 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6940 "01000100" // MOVXM r15, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6941 "00000000" // /* MW 5 */
+ 6942 "10101100" // /* MW 4 */
+ 6943 "11000111" // /* MW 3 */
+ 6944 "00000111" // /* MW 2 */
+ 6945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6947 "00010001" // /* MW 9 */
+ 6948 "00110100" // /* MW 8 */
+ 6949 "10110010" // /* MW 7 */
+ 6950 "11110011" // /* MW 6 */
+ 6951 "00000001" // /* MW 5 */
+ 6952 "00000000" // /* MW 4 */
+ 6953 "01100000" // /* MW 3 */
+ 6954 "10010001" // /* MW 2 */
+ 6955 "11010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6957 "00010000" // /* MW 11 */
+ 6958 "00110010" // /* MW 10 */
+ 6959 "10110010" // /* MW 9 */
+ 6960 "11110011" // /* MW 8 */
+ 6961 "00000001" // /* MW 7 */
+ 6962 "00000000" // /* MW 6 */
+ 6963 "00001011" // /* MW 5 */
+ 6964 "10001111" // /* MW 4 */
+ 6965 "11100001" // /* MW 3 */
+ 6966 "11000000" // /* MW 2 */
+ 6967 "11100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6969 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6971 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */
+ 6973 "00000001" // /* MW 5 */
+ 6974 "00000000" // /* MW 4 */
+ 6975 "01100000" // /* MW 3 */
+ 6976 "00000101" // /* MW 2 */
+ 6977 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6979 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6981 "00110001" // /* MW 3 */
+ 6982 "00100000" // /* MW 2 */
+ 6983 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6985 "00000101" // /* MW 3 */
+ 6986 "00100000" // /* MW 2 */
+ 6987 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6989 "01110000" // /* MW 7 */
+ 6990 "01100000" // /* MW 6 */
+ 6991 "10110000" // /* MW 5 */
+ 6992 "00000011" // /* MW 4 */
+ 6993 "00110000" // /* MW 3 */
+ 6994 "11000010" // /* MW 2 */
+ 6995 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6997 "01110000" // /* MW 11 */
+ 6998 "01100000" // /* MW 10 */
+ 6999 "00110010" // /* MW 9 */
+ 7000 "00000000" // /* MW 8 */
+ 7001 "01011011" // /* MW 7 */
+ 7002 "00000001" // /* MW 6 */
+ 7003 "00100000" // /* MW 5 */
+ 7004 "00000000" // /* MW 4 */
+ 7005 "11110000" // /* MW 3 */
+ 7006 "00101100" // /* MW 2 */
+ 7007 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.return_address
+ 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7009 "10000101" // /* MW 3 */
+ 7010 "01100111" // /* MW 2 */
+ 7011 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19
+.src_ref 7 "superkernels.cpp" 87 35 first
+ 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7013 "00010000" // /* MW 9 */
+ 7014 "00100010" // /* MW 8 */
+ 7015 "10110010" // /* MW 7 */
+ 7016 "11110000" // /* MW 6 */
+ 7017 "00000001" // /* MW 5 */
+ 7018 "00000000" // /* MW 4 */
+ 7019 "01010000" // /* MW 3 */
+ 7020 "11000001" // /* MW 2 */
+ 7021 "01001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 37 first
+.src_ref 7 "superkernels.cpp" 89 13
+ 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7023 "00010000" // /* MW 9 */
+ 7024 "00110000" // /* MW 8 */
+ 7025 "00110010" // /* MW 7 */
+ 7026 "11110000" // /* MW 6 */
+ 7027 "00000001" // /* MW 5 */
+ 7028 "00000000" // /* MW 4 */
+ 7029 "01010000" // /* MW 3 */
+ 7030 "11001111" // /* MW 2 */
+ 7031 "01000011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 73
+ 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7033 "00111010" // /* MW 3 */
+ 7034 "00000110" // /* MW 2 */
+ 7035 "00000010" // /* MW 1 */
+ 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7037 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 110
+ 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7039 "01011010" // /* MW 3 */
+ 7040 "00010110" // /* MW 2 */
+ 7041 "00000010" // /* MW 1 */
+ 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7043 "00000000" // /* MW 1 */
+ 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7045 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19 first
+.src_ref 7 "superkernels.cpp" 113 2
+ 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7047 "01110000" // /* MW 7 */
+ 7048 "01100000" // /* MW 6 */
+ 7049 "10110110" // /* MW 5 */
+ 7050 "00000000" // /* MW 4 */
+ 7051 "00110000" // /* MW 3 */
+ 7052 "11000010" // /* MW 2 */
+ 7053 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 57 first
+ 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7055 "00001111" // /* MW 3 */
+ 7056 "11100001" // /* MW 2 */
+ 7057 "00010100" // /* MW 1 */
+ 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7059 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 94
+ 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7061 "00001111" // /* MW 3 */
+ 7062 "01100001" // /* MW 2 */
+ 7063 "00010100" // /* MW 1 */
+ 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7065 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 28 first
+ 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7067 "00001111" // /* MW 3 */
+ 7068 "10100001" // /* MW 2 */
+ 7069 "00010100" // /* MW 1 */
+ 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7071 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 13
+.src_ref 7 "superkernels.cpp" 113 2
+ 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7073 "00000000" // /* MW 15 */
+ 7074 "00000000" // /* MW 14 */
+ 7075 "01111000" // /* MW 13 */
+ 7076 "01100000" // /* MW 12 */
+ 7077 "00110111" // /* MW 11 */
+ 7078 "00000000" // /* MW 10 */
+ 7079 "00000000" // /* MW 9 */
+ 7080 "10000000" // /* MW 8 */
+ 7081 "00010001" // /* MW 7 */
+ 7082 "00000110" // /* MW 6 */
+ 7083 "00100000" // /* MW 5 */
+ 7084 "00000000" // /* MW 4 */
+ 7085 "11110000" // /* MW 3 */
+ 7086 "00101100" // /* MW 2 */
+ 7087 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 106 12
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 7 "superkernels.cpp" 117 6
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7089 "00010000" // /* MW 9 */
+ 7090 "00100100" // /* MW 8 */
+ 7091 "00110010" // /* MW 7 */
+ 7092 "11110011" // /* MW 6 */
+ 7093 "00000001" // /* MW 5 */
+ 7094 "00000000" // /* MW 4 */
+ 7095 "00100000" // /* MW 3 */
+ 7096 "10111110" // /* MW 2 */
+ 7097 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.src_ref 7 "superkernels.cpp" 108 13
+ 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7099 "00010000" // /* MW 9 */
+ 7100 "00100110" // /* MW 8 */
+ 7101 "00110010" // /* MW 7 */
+ 7102 "11110001" // /* MW 6 */
+ 7103 "00000001" // /* MW 5 */
+ 7104 "00000000" // /* MW 4 */
+ 7105 "11010000" // /* MW 3 */
+ 7106 "11000010" // /* MW 2 */
+ 7107 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11
+.src_ref 7 "superkernels.cpp" 108 13 first
+.src_ref 7 "superkernels.cpp" 139 6
+.src_ref 7 "superkernels.cpp" 140 14
+ 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7109 "00010000" // /* MW 9 */
+ 7110 "00100000" // /* MW 8 */
+ 7111 "10110010" // /* MW 7 */
+ 7112 "11110011" // /* MW 6 */
+ 7113 "00000001" // /* MW 5 */
+ 7114 "00000000" // /* MW 4 */
+ 7115 "11010000" // /* MW 3 */
+ 7116 "11000110" // /* MW 2 */
+ 7117 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+ 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7119 "01010110" // /* MW 3 */
+ 7120 "00000110" // /* MW 2 */
+ 7121 "00000111" // /* MW 1 */
+ 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7123 "00000000" // /* MW 1 */
+ 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7125 "00000000" // /* MW 1 */
+ 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7127 "00000000" // /* MW 1 */
+ 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7129 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 110 6 first
+.src_ref 7 "superkernels.cpp" 110 17 first
+ 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */
+ 7131 "00000001" // /* MW 5 */
+ 7132 "01000000" // /* MW 4 */
+ 7133 "00011000" // /* MW 3 */
+ 7134 "00001110" // /* MW 2 */
+ 7135 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 108 13 first
+.delay_slot
+ 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7137 "00000111" // /* MW 3 */
+ 7138 "01100010" // /* MW 2 */
+ 7139 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.src_ref 7 "superkernels.cpp" 108 13
+.delay_slot
+ 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7141 "00001110" // /* MW 5 */
+ 7142 "01000100" // /* MW 4 */
+ 7143 "00111001" // /* MW 3 */
+ 7144 "11000110" // /* MW 2 */
+ 7145 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.delay_slot
+ 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7147 "00000111" // /* MW 3 */
+ 7148 "00100110" // /* MW 2 */
+ 7149 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12
+.delay_slot
+ 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7151 "01110001" // /* MW 3 */
+ 7152 "00000110" // /* MW 2 */
+ 7153 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.delay_slot
+ 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7155 "00110001" // /* MW 3 */
+ 7156 "00000110" // /* MW 2 */
+ 7157 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7159 "10000110" // /* MW 3 */
+ 7160 "01100111" // /* MW 2 */
+ 7161 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7163 "01110110" // /* MW 3 */
+ 7164 "11111111" // /* MW 2 */
+ 7165 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7167 "00010110" // /* MW 3 */
+ 7168 "11111110" // /* MW 2 */
+ 7169 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7171 "00110110" // /* MW 3 */
+ 7172 "11111110" // /* MW 2 */
+ 7173 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7175 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7177 "00010110" // /* MW 3 */
+ 7178 "01000110" // /* MW 2 */
+ 7179 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7181 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7183 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7185 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7187 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7189 "00000010" // /* MW 3 */
+ 7190 "01100001" // /* MW 2 */
+ 7191 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7193 "00010001" // /* MW 3 */
+ 7194 "00000110" // /* MW 2 */
+ 7195 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7197 "11111101" // /* MW 3 */
+ 7198 "11100010" // /* MW 2 */
+ 7199 "00010111" // /* MW 1 */
+ 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7201 "00000000" // /* MW 1 */
+ 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7203 "00000000" // /* MW 1 */
+ 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7205 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7207 "00011000" // /* MW 9 */
+ 7208 "00010011" // /* MW 8 */
+ 7209 "00000100" // /* MW 7 */
+ 7210 "00000000" // /* MW 6 */
+ 7211 "01011011" // /* MW 5 */
+ 7212 "00000001" // /* MW 4 */
+ 7213 "11110000" // /* MW 3 */
+ 7214 "00101100" // /* MW 2 */
+ 7215 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336
+.src_ref 7 "superkernels.cpp" 113 2 first
+.no_stack_arguments
+ 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */
+ 7217 "00000001" // /* MW 5 */
+ 7218 "00000000" // /* MW 4 */
+ 7219 "10111000" // /* MW 3 */
+ 7220 "00001000" // /* MW 2 */
+ 7221 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7222 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7223 "00000000" // /* MW 5 */
+ 7224 "11001100" // /* MW 4 */
+ 7225 "11000110" // /* MW 3 */
+ 7226 "00000111" // /* MW 2 */
+ 7227 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7229 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7233 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7235 "00011100" // /* MW 13 */
+ 7236 "00000000" // /* MW 12 */
+ 7237 "00000000" // /* MW 11 */
+ 7238 "00000111" // /* MW 10 */
+ 7239 "00111101" // /* MW 9 */
+ 7240 "01010011" // /* MW 8 */
+ 7241 "00000000" // /* MW 7 */
+ 7242 "00000000" // /* MW 6 */
+ 7243 "10110110" // /* MW 5 */
+ 7244 "00000010" // /* MW 4 */
+ 7245 "11110000" // /* MW 3 */
+ 7246 "00101100" // /* MW 2 */
+ 7247 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6 first
+.src_ref 7 "superkernels.cpp" 117 20
+.return_address
+ 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7249 "00010000" // /* MW 9 */
+ 7250 "00100010" // /* MW 8 */
+ 7251 "10110010" // /* MW 7 */
+ 7252 "11110000" // /* MW 6 */
+ 7253 "00000001" // /* MW 5 */
+ 7254 "00000000" // /* MW 4 */
+ 7255 "11010000" // /* MW 3 */
+ 7256 "11000010" // /* MW 2 */
+ 7257 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 20
+ 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7259 "00110110" // /* MW 3 */
+ 7260 "00000110" // /* MW 2 */
+ 7261 "00000001" // /* MW 1 */
+ 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7263 "00010001" // /* MW 3 */
+ 7264 "11110000" // /* MW 2 */
+ 7265 "00000111" // /* MW 1 */
+ 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7267 "00000000" // /* MW 1 */
+ 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7269 "00000000" // /* MW 1 */
+ 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7271 "00000000" // /* MW 1 */
+ 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7273 "00000000" // /* MW 1 */
+ 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7275 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 17
+ 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7277 "00001000" // /* MW 3 */
+ 7278 "01100001" // /* MW 2 */
+ 7279 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6
+ 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */
+ 7281 "00000001" // /* MW 5 */
+ 7282 "01000000" // /* MW 4 */
+ 7283 "01100000" // /* MW 3 */
+ 7284 "00001110" // /* MW 2 */
+ 7285 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 7 "superkernels.cpp" 140 14
+.delay_slot
+ 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7287 "00000001" // /* MW 3 */
+ 7288 "00110000" // /* MW 2 */
+ 7289 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7293 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7295 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7297 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7299 "00010100" // /* MW 5 */
+ 7300 "11001111" // /* MW 4 */
+ 7301 "10100010" // /* MW 3 */
+ 7302 "00000000" // /* MW 2 */
+ 7303 "00000100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7305 "00110110" // /* MW 3 */
+ 7306 "00000110" // /* MW 2 */
+ 7307 "00000001" // /* MW 1 */
+ 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7309 "00000000" // /* MW 1 */
+ 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7311 "00000000" // /* MW 1 */
+ 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7313 "00000000" // /* MW 1 */
+ 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7315 "00000000" // /* MW 1 */
+ 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7317 "00000000" // /* MW 1 */
+ 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7319 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7321 "00001000" // /* MW 3 */
+ 7322 "01010001" // /* MW 2 */
+ 7323 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15 first
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7325 "00100011" // /* MW 5 */
+ 7326 "00001110" // /* MW 4 */
+ 7327 "11011100" // /* MW 3 */
+ 7328 "11000110" // /* MW 2 */
+ 7329 "00111100" // /* MW 1 */
+ 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7331 "00000000" // /* MW 1 */
+ 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7333 "00000000" // /* MW 1 */
+ 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7335 "00000000" // /* MW 1 */
+ 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7337 "00000000" // /* MW 1 */
+ 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7339 "00000000" // /* MW 1 */
+ 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7341 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7343 "00010001" // /* MW 3 */
+ 7344 "00100001" // /* MW 2 */
+ 7345 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7347 "00011100" // /* MW 13 */
+ 7348 "00000000" // /* MW 12 */
+ 7349 "00000000" // /* MW 11 */
+ 7350 "01010111" // /* MW 10 */
+ 7351 "00011010" // /* MW 9 */
+ 7352 "01000000" // /* MW 8 */
+ 7353 "00000000" // /* MW 7 */
+ 7354 "00000000" // /* MW 6 */
+ 7355 "00100011" // /* MW 5 */
+ 7356 "11001100" // /* MW 4 */
+ 7357 "11110011" // /* MW 3 */
+ 7358 "00101100" // /* MW 2 */
+ 7359 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480
+.src_ref 7 "superkernels.cpp" 139 6 first
+.src_ref 7 "superkernels.cpp" 139 19
+ 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7361 "00010000" // /* MW 9 */
+ 7362 "00110000" // /* MW 8 */
+ 7363 "00110010" // /* MW 7 */
+ 7364 "11110011" // /* MW 6 */
+ 7365 "00000001" // /* MW 5 */
+ 7366 "00000000" // /* MW 4 */
+ 7367 "11010000" // /* MW 3 */
+ 7368 "11000010" // /* MW 2 */
+ 7369 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 19
+ 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7371 "00110110" // /* MW 3 */
+ 7372 "00000110" // /* MW 2 */
+ 7373 "00000110" // /* MW 1 */
+ 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7375 "10011001" // /* MW 3 */
+ 7376 "11111000" // /* MW 2 */
+ 7377 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+ 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7379 "00111001" // /* MW 3 */
+ 7380 "11110100" // /* MW 2 */
+ 7381 "00000111" // /* MW 1 */
+ 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7383 "00000000" // /* MW 1 */
+ 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7385 "00000000" // /* MW 1 */
+ 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7387 "00000000" // /* MW 1 */
+ 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7389 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 16
+ 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7391 "00001000" // /* MW 3 */
+ 7392 "01100001" // /* MW 2 */
+ 7393 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 6
+ 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */
+ 7395 "00000001" // /* MW 5 */
+ 7396 "01000000" // /* MW 4 */
+ 7397 "10000000" // /* MW 3 */
+ 7398 "00001110" // /* MW 2 */
+ 7399 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7407 "00000000" // /* MW 1 */
+.delay_slot
+ 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7409 "00100000" // /* MW 3 */
+ 7410 "11010000" // /* MW 2 */
+ 7411 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 140 14 first
+ 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7413 "11000001" // /* MW 11 */
+ 7414 "10001000" // /* MW 10 */
+ 7415 "10000011" // /* MW 9 */
+ 7416 "00000011" // /* MW 8 */
+ 7417 "00000000" // /* MW 7 */
+ 7418 "00000000" // /* MW 6 */
+ 7419 "00100000" // /* MW 5 */
+ 7420 "00000000" // /* MW 4 */
+ 7421 "11110000" // /* MW 3 */
+ 7422 "00101100" // /* MW 2 */
+ 7423 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544
+ 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7425 "00011001" // /* MW 3 */
+ 7426 "11111111" // /* MW 2 */
+ 7427 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142 first
+ 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7429 "00000000" // /* MW 3 */
+ 7430 "00101000" // /* MW 2 */
+ 7431 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+.delay_slot
+ 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7433 "00000001" // /* MW 5 */
+ 7434 "00000000" // /* MW 4 */
+ 7435 "00000000" // /* MW 3 */
+ 7436 "11111000" // /* MW 2 */
+ 7437 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7443 "00000000" // /* MW 1 */
+.delay_slot
+ 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7445 "10001011" // /* MW 3 */
+ 7446 "10000100" // /* MW 2 */
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 7447 "00001111" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7457 "00000001" // /* MW 5 */
+ 7458 "00100001" // /* MW 4 */
+ 7459 "00000000" // /* MW 3 */
+ 7460 "00000000" // /* MW 2 */
+ 7461 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7463 "11000000" // /* MW 3 */
+ 7464 "01010000" // /* MW 2 */
+ 7465 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7467 "10010000" // /* MW 3 */
+ 7468 "01100000" // /* MW 2 */
+ 7469 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7471 "00010001" // /* MW 3 */
+ 7472 "00000100" // /* MW 2 */
+ 7473 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7475 "00010001" // /* MW 3 */
+ 7476 "00010100" // /* MW 2 */
+ 7477 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7479 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7489 "00101110" // /* MW 3 */
+ 7490 "00011100" // /* MW 2 */
+ 7491 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7493 "00000001" // /* MW 5 */
+ 7494 "00000000" // /* MW 4 */
+ 7495 "00000000" // /* MW 3 */
+ 7496 "00001000" // /* MW 2 */
+ 7497 "00000000" // /* MW 1 */
+ 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7499 "00111101" // /* MW 3 */
+ 7500 "11111000" // /* MW 2 */
+ 7501 "00001111" // /* MW 1 */
+ 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7503 "11110101" // /* MW 3 */
+ 7504 "11111101" // /* MW 2 */
+ 7505 "00001111" // /* MW 1 */
+ 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7507 "00000000" // /* MW 1 */
+ 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7509 "00000000" // /* MW 1 */
+ 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7511 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7513 "00101001" // /* MW 3 */
+ 7514 "00011100" // /* MW 2 */
+ 7515 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7517 "00101110" // /* MW 3 */
+ 7518 "00011100" // /* MW 2 */
+ 7519 "00000001" // /* MW 1 */
+ 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7521 "00000000" // /* MW 1 */
+ 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7523 "00000000" // /* MW 1 */
+ 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7525 "00000000" // /* MW 1 */
+ 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7527 "00000000" // /* MW 1 */
+ 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7529 "00000000" // /* MW 1 */
+ 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7531 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7533 "00101001" // /* MW 3 */
+ 7534 "00011100" // /* MW 2 */
+ 7535 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7537 "00101110" // /* MW 3 */
+ 7538 "00000100" // /* MW 2 */
+ 7539 "00000001" // /* MW 1 */
+ 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7541 "00000000" // /* MW 1 */
+ 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7543 "00000000" // /* MW 1 */
+ 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7545 "00000000" // /* MW 1 */
+ 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7547 "00000000" // /* MW 1 */
+ 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7549 "00000000" // /* MW 1 */
+ 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7551 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7553 "00101001" // /* MW 3 */
+ 7554 "00011100" // /* MW 2 */
+ 7555 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7557 "00101110" // /* MW 3 */
+ 7558 "00010100" // /* MW 2 */
+ 7559 "00000001" // /* MW 1 */
+ 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7561 "00000000" // /* MW 1 */
+ 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7563 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */
+ 7565 "00000001" // /* MW 5 */
+ 7566 "00000000" // /* MW 4 */
+ 7567 "10010000" // /* MW 3 */
+ 7568 "00001110" // /* MW 2 */
+ 7569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7571 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7573 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7575 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7577 "00101001" // /* MW 3 */
+ 7578 "11011100" // /* MW 2 */
+ 7579 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.delay_slot
+ 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7581 "11000000" // /* MW 3 */
+ 7582 "11010000" // /* MW 2 */
+ 7583 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "add_impl.h" 146 29
+.return_address
+ 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7585 "00001000" // /* MW 9 */
+ 7586 "11000100" // /* MW 8 */
+ 7587 "00110011" // /* MW 7 */
+ 7588 "01101000" // /* MW 6 */
+ 7589 "00000000" // /* MW 5 */
+ 7590 "00000001" // /* MW 4 */
+ 7591 "00100000" // /* MW 3 */
+ 7592 "00000111" // /* MW 2 */
+ 7593 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29
+.src_ref 8 "add_impl.h" 147 37
+.src_ref 8 "add_impl.h" 147 39
+ 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7595 "01011000" // /* MW 9 */
+ 7596 "11111101" // /* MW 8 */
+ 7597 "00000111" // /* MW 7 */
+ 7598 "00001000" // /* MW 6 */
+ 7599 "10000000" // /* MW 5 */
+ 7600 "00000001" // /* MW 4 */
+ 7601 "10000000" // /* MW 3 */
+ 7602 "11100010" // /* MW 2 */
+ 7603 "00000001" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29 first
+.src_ref 8 "add_impl.h" 147 39
+ 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7605 "00000001" // /* MW 9 */
+ 7606 "10100000" // /* MW 8 */
+ 7607 "00000111" // /* MW 7 */
+ 7608 "10000000" // /* MW 6 */
+ 7609 "00010001" // /* MW 5 */
+ 7610 "00001010" // /* MW 4 */
+ 7611 "00100000" // /* MW 3 */
+ 7612 "10111110" // /* MW 2 */
+ 7613 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 50 first
+ 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7615 "01001010" // /* MW 3 */
+ 7616 "00000110" // /* MW 2 */
+ 7617 "00000000" // /* MW 1 */
+ 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7619 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7621 "00000000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 37
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7623 "00010111" // /* MW 3 */
+ 7624 "00000010" // /* MW 2 */
+ 7625 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7627 "00000000" // /* MW 3 */
+ 7628 "00101000" // /* MW 2 */
+ 7629 "00010000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7631 "00000101" // /* MW 3 */
+ 7632 "00100010" // /* MW 2 */
+ 7633 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7635 "00000001" // /* MW 5 */
+ 7636 "00000000" // /* MW 4 */
+ 7637 "00000000" // /* MW 3 */
+ 7638 "11111000" // /* MW 2 */
+ 7639 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7641 "00100111" // /* MW 3 */
+ 7642 "01110111" // /* MW 2 */
+ 7643 "00010100" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 39
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7645 "10000010" // /* MW 3 */
+ 7646 "00100001" // /* MW 2 */
+ 7647 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7649 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 81 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25
+.function_start
+ 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7665 "01111000" // /* MW 9 */
+ 7666 "01100000" // /* MW 8 */
+ 7667 "00001000" // /* MW 7 */
+ 7668 "11001000" // /* MW 6 */
+ 7669 "00010000" // /* MW 5 */
+ 7670 "00000000" // /* MW 4 */
+ 7671 "10000000" // /* MW 3 */
+ 7672 "10000000" // /* MW 2 */
+ 7673 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+ 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7675 "00001100" // /* MW 5 */
+ 7676 "11000000" // /* MW 4 */
+ 7677 "10100000" // /* MW 3 */
+ 7678 "00000000" // /* MW 2 */
+ 7679 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+ 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7681 "01001010" // /* MW 3 */
+ 7682 "00001000" // /* MW 2 */
+ 7683 "00000000" // /* MW 1 */
+ 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7685 "00000000" // /* MW 1 */
+ 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7687 "00000000" // /* MW 1 */
+ 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7689 "00000000" // /* MW 1 */
+ 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7691 "00000000" // /* MW 1 */
+ 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7693 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first
+ 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7695 "00000000" // /* MW 3 */
+ 7696 "00101000" // /* MW 2 */
+ 7697 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.delay_slot
+ 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7699 "00001000" // /* MW 3 */
+ 7700 "10000000" // /* MW 2 */
+ 7701 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first
+.delay_slot
+ 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7703 "00011101" // /* MW 3 */
+ 7704 "00000000" // /* MW 2 */
+ 7705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 23
+.delay_slot
+ 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7707 "11100000" // /* MW 5 */
+ 7708 "00001101" // /* MW 4 */
+ 7709 "00110001" // /* MW 3 */
+ 7710 "10000010" // /* MW 2 */
+ 7711 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.delay_slot
+ 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7713 "00011101" // /* MW 3 */
+ 7714 "11000100" // /* MW 2 */
+ 7715 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 23
+.delay_slot
+ 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7717 "01010001" // /* MW 3 */
+ 7718 "00000100" // /* MW 2 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7719 "00001000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_broadcasting.h" 76
+.src_ref 3 "elementwise_binary_broadcasting.h" 76 first
+.function_start
+ 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7729 "00000001" // /* MW 5 */
+ 7730 "00000000" // /* MW 4 */
+ 7731 "00000000" // /* MW 3 */
+ 7732 "00001000" // /* MW 2 */
+ 7733 "00000000" // /* MW 1 */
+ 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7735 "00111101" // /* MW 3 */
+ 7736 "11111100" // /* MW 2 */
+ 7737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first
+.no_stack_arguments
+ 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */
+ 7739 "00000001" // /* MW 5 */
+ 7740 "00000000" // /* MW 4 */
+ 7741 "10100000" // /* MW 3 */
+ 7742 "00001110" // /* MW 2 */
+ 7743 "00000000" // /* MW 1 */
+.delay_slot
+ 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7745 "10011101" // /* MW 3 */
+ 7746 "11111011" // /* MW 2 */
+ 7747 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+ 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7749 "11000000" // /* MW 3 */
+ 7750 "01100000" // /* MW 2 */
+ 7751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7757 "01100111" // /* MW 3 */
+ 7758 "00000001" // /* MW 2 */
+ 7759 "00000000" // /* MW 1 */
+.return_address
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7761 "10011001" // /* MW 3 */
+ 7762 "11111011" // /* MW 2 */
+ 7763 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7765 "00111001" // /* MW 3 */
+ 7766 "11111100" // /* MW 2 */
+ 7767 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first
+.tail_call
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */
+ 7769 "00000000" // /* MW 5 */
+ 7770 "00000000" // /* MW 4 */
+ 7771 "11111000" // /* MW 3 */
+ 7772 "00001110" // /* MW 2 */
+ 7773 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7775 "11000000" // /* MW 3 */
+ 7776 "01101110" // /* MW 2 */
+ 7777 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first
+.delay_slot
+ 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7779 "00000001" // /* MW 5 */
+ 7780 "00000000" // /* MW 4 */
+ 7781 "00000000" // /* MW 3 */
+ 7782 "11111000" // /* MW 2 */
+ 7783 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7789 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 89 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19
+.function_start
+ 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7793 "01010001" // /* MW 5 */
+ 7794 "00000000" // /* MW 4 */
+ 7795 "11010000" // /* MW 3 */
+ 7796 "10000010" // /* MW 2 */
+ 7797 "01100111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7799 "10000001" // /* MW 5 */
+ 7800 "11001101" // /* MW 4 */
+ 7801 "01011000" // /* MW 3 */
+ 7802 "00000101" // /* MW 2 */
+ 7803 "01100001" // /* MW 1 */
+ 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7805 "00000000" // /* MW 1 */
+ 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7807 "00000000" // /* MW 1 */
+ 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7809 "00000000" // /* MW 1 */
+ 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7811 "00000000" // /* MW 1 */
+ 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7813 "00000000" // /* MW 1 */
+ 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7815 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 12
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 35
+ 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */
+ 7817 "00000001" // /* MW 5 */
+ 7818 "01000000" // /* MW 4 */
+ 7819 "01100000" // /* MW 3 */
+ 7820 "00001111" // /* MW 2 */
+ 7821 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78
+.delay_slot
+ 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7823 "11101001" // /* MW 3 */
+ 7824 "11000100" // /* MW 2 */
+ 7825 "00010111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first
+.delay_slot
+ 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7827 "00101101" // /* MW 3 */
+ 7828 "00000000" // /* MW 2 */
+ 7829 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7835 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first
+ 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7837 "00110010" // /* MW 3 */
+ 7838 "00000100" // /* MW 2 */
+ 7839 "00000000" // /* MW 1 */
+ 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7841 "00000000" // /* MW 1 */
+ 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7843 "00000000" // /* MW 1 */
+ 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7845 "00000000" // /* MW 1 */
+ 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */
+ 7847 "00000000" // /* MW 5 */
+ 7848 "00000000" // /* MW 4 */
+ 7849 "01110000" // /* MW 3 */
+ 7850 "00001111" // /* MW 2 */
+ 7851 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7853 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7857 "01110010" // /* MW 3 */
+ 7858 "00000101" // /* MW 2 */
+ 7859 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7861 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7863 "00000000" // /* MW 9 */
+ 7864 "00000000" // /* MW 8 */
+ 7865 "00000000" // /* MW 7 */
+ 7866 "00000000" // /* MW 6 */
+ 7867 "00010011" // /* MW 5 */
+ 7868 "00000100" // /* MW 4 */
+ 7869 "11110000" // /* MW 3 */
+ 7870 "00101100" // /* MW 2 */
+ 7871 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80
+.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first
+ 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7873 "00110010" // /* MW 3 */
+ 7874 "00000100" // /* MW 2 */
+ 7875 "00000001" // /* MW 1 */
+ 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7877 "00000000" // /* MW 1 */
+ 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7879 "00000000" // /* MW 1 */
+ 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7881 "00000000" // /* MW 1 */
+ 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7883 "00000000" // /* MW 1 */
+ 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7885 "00000000" // /* MW 1 */
+ 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7887 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7889 "01110010" // /* MW 3 */
+ 7890 "00000101" // /* MW 2 */
+ 7891 "00011000" // /* MW 1 */
+ 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7893 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7895 "00000000" // /* MW 9 */
+ 7896 "00000000" // /* MW 8 */
+ 7897 "00000000" // /* MW 7 */
+ 7898 "00000000" // /* MW 6 */
+ 7899 "00010011" // /* MW 5 */
+ 7900 "00000100" // /* MW 4 */
+ 7901 "11110001" // /* MW 3 */
+ 7902 "00101100" // /* MW 2 */
+ 7903 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+ 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7905 "01001000" // /* MW 9 */
+ 7906 "00111111" // /* MW 8 */
+ 7907 "10111000" // /* MW 7 */
+ 7908 "10001010" // /* MW 6 */
+ 7909 "00000111" // /* MW 5 */
+ 7910 "00000000" // /* MW 4 */
+ 7911 "11010000" // /* MW 3 */
+ 7912 "10000000" // /* MW 2 */
+ 7913 "10001010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7915 "00010000" // /* MW 9 */
+ 7916 "10101000" // /* MW 8 */
+ 7917 "01111111" // /* MW 7 */
+ 7918 "00000100" // /* MW 6 */
+ 7919 "00000000" // /* MW 5 */
+ 7920 "00000000" // /* MW 4 */
+ 7921 "11010000" // /* MW 3 */
+ 7922 "10010000" // /* MW 2 */
+ 7923 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7925 "11100000" // /* MW 5 */
+ 7926 "11111110" // /* MW 4 */
+ 7927 "00010110" // /* MW 3 */
+ 7928 "00000000" // /* MW 2 */
+ 7929 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7931 "11010000" // /* MW 5 */
+ 7932 "11001000" // /* MW 4 */
+ 7933 "11001000" // /* MW 3 */
+ 7934 "00000111" // /* MW 2 */
+ 7935 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7937 "00100010" // /* MW 3 */
+ 7938 "00000100" // /* MW 2 */
+ 7939 "00000100" // /* MW 1 */
+ 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7941 "00000000" // /* MW 1 */
+ 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7945 "10101011" // /* MW 3 */
+ 7946 "00001000" // /* MW 2 */
+ 7947 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 189 20 first
+ 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7949 "00101011" // /* MW 3 */
+ 7950 "00101001" // /* MW 2 */
+ 7951 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+ 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7953 "00101011" // /* MW 3 */
+ 7954 "00001000" // /* MW 2 */
+ 7955 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7957 "00101011" // /* MW 3 */
+ 7958 "00101010" // /* MW 2 */
+ 7959 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7961 "00000000" // /* MW 5 */
+ 7962 "11110101" // /* MW 4 */
+ 7963 "01110000" // /* MW 3 */
+ 7964 "00010101" // /* MW 2 */
+ 7965 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7967 "00111101" // /* MW 7 */
+ 7968 "00101000" // /* MW 6 */
+ 7969 "00000011" // /* MW 5 */
+ 7970 "00000100" // /* MW 4 */
+ 7971 "01110000" // /* MW 3 */
+ 7972 "00100101" // /* MW 2 */
+ 7973 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7975 "00101011" // /* MW 3 */
+ 7976 "00001000" // /* MW 2 */
+ 7977 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7979 "00111101" // /* MW 7 */
+ 7980 "00010000" // /* MW 6 */
+ 7981 "00000100" // /* MW 5 */
+ 7982 "00000100" // /* MW 4 */
+ 7983 "01110000" // /* MW 3 */
+ 7984 "01000101" // /* MW 2 */
+ 7985 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7987 "10101011" // /* MW 3 */
+ 7988 "00001000" // /* MW 2 */
+ 7989 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7991 "00111101" // /* MW 7 */
+ 7992 "00101000" // /* MW 6 */
+ 7993 "00000011" // /* MW 5 */
+ 7994 "00000100" // /* MW 4 */
+ 7995 "01110000" // /* MW 3 */
+ 7996 "00100101" // /* MW 2 */
+ 7997 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7999 "00101011" // /* MW 3 */
+ 8000 "00001000" // /* MW 2 */
+ 8001 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8003 "00111101" // /* MW 13 */
+ 8004 "00010000" // /* MW 12 */
+ 8005 "00000100" // /* MW 11 */
+ 8006 "01010111" // /* MW 10 */
+ 8007 "00011010" // /* MW 9 */
+ 8008 "01000000" // /* MW 8 */
+ 8009 "00000000" // /* MW 7 */
+ 8010 "00000000" // /* MW 6 */
+ 8011 "01000110" // /* MW 5 */
+ 8012 "00111011" // /* MW 4 */
+ 8013 "01110100" // /* MW 3 */
+ 8014 "01000101" // /* MW 2 */
+ 8015 "00100101" // /* MW 1 */
+.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8017 "10101011" // /* MW 3 */
+ 8018 "00001000" // /* MW 2 */
+ 8019 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8021 "00111101" // /* MW 11 */
+ 8022 "00101000" // /* MW 10 */
+ 8023 "00000011" // /* MW 9 */
+ 8024 "10001110" // /* MW 8 */
+ 8025 "00010001" // /* MW 7 */
+ 8026 "00001111" // /* MW 6 */
+ 8027 "00100001" // /* MW 5 */
+ 8028 "00000000" // /* MW 4 */
+ 8029 "01110000" // /* MW 3 */
+ 8030 "00100101" // /* MW 2 */
+ 8031 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8033 "00000000" // /* MW 15 */
+ 8034 "00000000" // /* MW 14 */
+ 8035 "01111000" // /* MW 13 */
+ 8036 "10100101" // /* MW 12 */
+ 8037 "00000001" // /* MW 11 */
+ 8038 "00000000" // /* MW 10 */
+ 8039 "00000000" // /* MW 9 */
+ 8040 "00000000" // /* MW 8 */
+ 8041 "01011011" // /* MW 7 */
+ 8042 "00000001" // /* MW 6 */
+ 8043 "00100000" // /* MW 5 */
+ 8044 "00000000" // /* MW 4 */
+ 8045 "01110000" // /* MW 3 */
+ 8046 "00000101" // /* MW 2 */
+ 8047 "00000001" // /* MW 1 */
+.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8049 "10000001" // /* MW 15 */
+ 8050 "00100000" // /* MW 14 */
+ 8051 "01111000" // /* MW 13 */
+ 8052 "10100101" // /* MW 12 */
+ 8053 "00000001" // /* MW 11 */
+ 8054 "00000000" // /* MW 10 */
+ 8055 "00000000" // /* MW 9 */
+ 8056 "00000000" // /* MW 8 */
+ 8057 "10100011" // /* MW 7 */
+ 8058 "00011101" // /* MW 6 */
+ 8059 "00100010" // /* MW 5 */
+ 8060 "00000000" // /* MW 4 */
+ 8061 "01110000" // /* MW 3 */
+ 8062 "01000101" // /* MW 2 */
+ 8063 "00100101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8065 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8067 "00111101" // /* MW 7 */
+ 8068 "00101000" // /* MW 6 */
+ 8069 "00000011" // /* MW 5 */
+ 8070 "00000010" // /* MW 4 */
+ 8071 "01100000" // /* MW 3 */
+ 8072 "11000100" // /* MW 2 */
+ 8073 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8075 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8077 "00111101" // /* MW 7 */
+ 8078 "00010000" // /* MW 6 */
+ 8079 "00000100" // /* MW 5 */
+ 8080 "00000010" // /* MW 4 */
+ 8081 "01100000" // /* MW 3 */
+ 8082 "10110100" // /* MW 2 */
+ 8083 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8085 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8087 "00000000" // /* MW 5 */
+ 8088 "01010000" // /* MW 4 */
+ 8089 "01100000" // /* MW 3 */
+ 8090 "11000100" // /* MW 2 */
+ 8091 "01000011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8093 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8095 "10100011" // /* MW 3 */
+ 8096 "00011101" // /* MW 2 */
+ 8097 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8099 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8101 "00100011" // /* MW 3 */
+ 8102 "00011110" // /* MW 2 */
+ 8103 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8105 "00000000" // /* MW 1 */
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first
+.function_start
+ 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8113 "00000001" // /* MW 5 */
+ 8114 "00000000" // /* MW 4 */
+ 8115 "00000000" // /* MW 3 */
+ 8116 "00010000" // /* MW 2 */
+ 8117 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24
+ 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8119 "01110000" // /* MW 7 */
+ 8120 "01100000" // /* MW 6 */
+ 8121 "00001010" // /* MW 5 */
+ 8122 "00000010" // /* MW 4 */
+ 8123 "10110000" // /* MW 3 */
+ 8124 "10000111" // /* MW 2 */
+ 8125 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+ 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8127 "00000000" // /* MW 7 */
+ 8128 "00000011" // /* MW 6 */
+ 8129 "10110100" // /* MW 5 */
+ 8130 "00000001" // /* MW 4 */
+ 8131 "01100000" // /* MW 3 */
+ 8132 "10010001" // /* MW 2 */
+ 8133 "01010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+ 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8135 "10000001" // /* MW 5 */
+ 8136 "00100001" // /* MW 4 */
+ 8137 "01011000" // /* MW 3 */
+ 8138 "11101101" // /* MW 2 */
+ 8139 "01100101" // /* MW 1 */
+ 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8141 "11000001" // /* MW 5 */
+ 8142 "10101011" // /* MW 4 */
+ 8143 "01011000" // /* MW 3 */
+ 8144 "11001010" // /* MW 2 */
+ 8145 "01110011" // /* MW 1 */
+ 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8147 "11000000" // /* MW 3 */
+ 8148 "01101000" // /* MW 2 */
+ 8149 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+ 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8151 "00101011" // /* MW 3 */
+ 8152 "00000111" // /* MW 2 */
+ 8153 "00001000" // /* MW 1 */
+ 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8155 "01010111" // /* MW 3 */
+ 8156 "00000110" // /* MW 2 */
+ 8157 "00000000" // /* MW 1 */
+ 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8159 "00000000" // /* MW 1 */
+ 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8161 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first
+.no_stack_arguments
+ 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */
+ 8163 "00000001" // /* MW 5 */
+ 8164 "00000000" // /* MW 4 */
+ 8165 "00111000" // /* MW 3 */
+ 8166 "00001111" // /* MW 2 */
+ 8167 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.delay_slot
+ 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8169 "11000000" // /* MW 3 */
+ 8170 "01010000" // /* MW 2 */
+ 8171 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first
+.delay_slot
+ 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8175 "00010010" // /* MW 3 */
+ 8176 "00100101" // /* MW 2 */
+ 8177 "00010100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8179 "01000001" // /* MW 5 */
+ 8180 "11010010" // /* MW 4 */
+ 8181 "01000010" // /* MW 3 */
+ 8182 "00100000" // /* MW 2 */
+ 8183 "10001100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8185 "01110000" // /* MW 7 */
+ 8186 "00010000" // /* MW 6 */
+ 8187 "00110100" // /* MW 5 */
+ 8188 "00000000" // /* MW 4 */
+ 8189 "01100000" // /* MW 3 */
+ 8190 "00101011" // /* MW 2 */
+ 8191 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.return_address
+ 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8193 "00111001" // /* MW 3 */
+ 8194 "11111100" // /* MW 2 */
+ 8195 "00000111" // /* MW 1 */
+ 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8197 "00000000" // /* MW 1 */
+ 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8199 "00000000" // /* MW 1 */
+ 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8201 "00000000" // /* MW 1 */
+ 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8203 "00000000" // /* MW 1 */
+ 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8205 "00000000" // /* MW 1 */
+ 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8207 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first
+ 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8209 "00000000" // /* MW 3 */
+ 8210 "00101000" // /* MW 2 */
+ 8211 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.delay_slot
+ 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8213 "00000001" // /* MW 5 */
+ 8214 "00000000" // /* MW 4 */
+ 8215 "00000000" // /* MW 3 */
+ 8216 "11110000" // /* MW 2 */
+ 8217 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8219 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8221 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8223 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8225 "00000000" // /* MW 1 */
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 147 first
+.src_ref 7 "superkernels.cpp" 152 6
+.function_start
+ 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8241 "10000000" // /* MW 5 */
+ 8242 "11001000" // /* MW 4 */
+ 8243 "11000110" // /* MW 3 */
+ 8244 "00000111" // /* MW 2 */
+ 8245 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6 first
+ 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8247 "11000001" // /* MW 5 */
+ 8248 "10110101" // /* MW 4 */
+ 8249 "11011000" // /* MW 3 */
+ 8250 "11000010" // /* MW 2 */
+ 8251 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 147
+ 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8253 "00000001" // /* MW 5 */
+ 8254 "00000000" // /* MW 4 */
+ 8255 "00000000" // /* MW 3 */
+ 8256 "00001000" // /* MW 2 */
+ 8257 "00000000" // /* MW 1 */
+ 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8259 "01110000" // /* MW 7 */
+ 8260 "11010000" // /* MW 6 */
+ 8261 "00001011" // /* MW 5 */
+ 8262 "00000000" // /* MW 4 */
+ 8263 "10110000" // /* MW 3 */
+ 8264 "01100011" // /* MW 2 */
+ 8265 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+ 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8267 "00010001" // /* MW 9 */
+ 8268 "00101000" // /* MW 8 */
+ 8269 "00110010" // /* MW 7 */
+ 8270 "11110011" // /* MW 6 */
+ 8271 "00000001" // /* MW 5 */
+ 8272 "00000000" // /* MW 4 */
+ 8273 "10110000" // /* MW 3 */
+ 8274 "10000010" // /* MW 2 */
+ 8275 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8277 "11000000" // /* MW 3 */
+ 8278 "11010100" // /* MW 2 */
+ 8279 "00011011" // /* MW 1 */
+ 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8281 "00000000" // /* MW 1 */
+ 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8283 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6
+.src_ref 7 "superkernels.cpp" 152 16
+ 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */
+ 8285 "00000001" // /* MW 5 */
+ 8286 "01000000" // /* MW 4 */
+ 8287 "10000000" // /* MW 3 */
+ 8288 "00010000" // /* MW 2 */
+ 8289 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 22 first
+.delay_slot
+ 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8291 "10010000" // /* MW 3 */
+ 8292 "01100010" // /* MW 2 */
+ 8293 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 30
+.delay_slot
+ 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8295 "11111011" // /* MW 3 */
+ 8296 "01100011" // /* MW 2 */
+ 8297 "00010100" // /* MW 1 */
+.delay_slot
+ 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8299 "00111101" // /* MW 3 */
+ 8300 "11110100" // /* MW 2 */
+ 8301 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8303 "01110000" // /* MW 7 */
+ 8304 "01100000" // /* MW 6 */
+ 8305 "00110000" // /* MW 5 */
+ 8306 "00000011" // /* MW 4 */
+ 8307 "00110000" // /* MW 3 */
+ 8308 "11000110" // /* MW 2 */
+ 8309 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4
+.src_ref 7 "superkernels.cpp" 166 2
+.delay_slot
+ 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8311 "10000000" // /* MW 5 */
+ 8312 "11001001" // /* MW 4 */
+ 8313 "11000000" // /* MW 3 */
+ 8314 "00000111" // /* MW 2 */
+ 8315 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8317 "11010000" // /* MW 5 */
+ 8318 "11001000" // /* MW 4 */
+ 8319 "11000100" // /* MW 3 */
+ 8320 "00000111" // /* MW 2 */
+ 8321 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8323 "00010000" // /* MW 9 */
+ 8324 "00110010" // /* MW 8 */
+ 8325 "00110010" // /* MW 7 */
+ 8326 "11110001" // /* MW 6 */
+ 8327 "00000001" // /* MW 5 */
+ 8328 "00000000" // /* MW 4 */
+ 8329 "11100000" // /* MW 3 */
+ 8330 "11000000" // /* MW 2 */
+ 8331 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8333 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */
+ 8335 "00000001" // /* MW 5 */
+ 8336 "00000000" // /* MW 4 */
+ 8337 "00011000" // /* MW 3 */
+ 8338 "00001111" // /* MW 2 */
+ 8339 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8341 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8343 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8345 "00110001" // /* MW 3 */
+ 8346 "00100000" // /* MW 2 */
+ 8347 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8349 "00000101" // /* MW 3 */
+ 8350 "00100000" // /* MW 2 */
+ 8351 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8353 "00000000" // /* MW 15 */
+ 8354 "00000000" // /* MW 14 */
+ 8355 "01111000" // /* MW 13 */
+ 8356 "10100101" // /* MW 12 */
+ 8357 "00000001" // /* MW 11 */
+ 8358 "00000000" // /* MW 10 */
+ 8359 "00000000" // /* MW 9 */
+ 8360 "10000000" // /* MW 8 */
+ 8361 "00010001" // /* MW 7 */
+ 8362 "00000110" // /* MW 6 */
+ 8363 "00100010" // /* MW 5 */
+ 8364 "00000000" // /* MW 4 */
+ 8365 "11110000" // /* MW 3 */
+ 8366 "00101100" // /* MW 2 */
+ 8367 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18
+.return_address
+ 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8369 "10100000" // /* MW 5 */
+ 8370 "11001000" // /* MW 4 */
+ 8371 "11000100" // /* MW 3 */
+ 8372 "00000111" // /* MW 2 */
+ 8373 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18 first
+.src_ref 7 "superkernels.cpp" 159 65
+ 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8375 "00010000" // /* MW 9 */
+ 8376 "01100000" // /* MW 8 */
+ 8377 "00110010" // /* MW 7 */
+ 8378 "11110001" // /* MW 6 */
+ 8379 "00000001" // /* MW 5 */
+ 8380 "00000000" // /* MW 4 */
+ 8381 "11010000" // /* MW 3 */
+ 8382 "11000010" // /* MW 2 */
+ 8383 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51
+.src_ref 7 "superkernels.cpp" 159 65
+.src_ref 7 "superkernels.cpp" 166 2
+ 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8385 "00010000" // /* MW 9 */
+ 8386 "01100000" // /* MW 8 */
+ 8387 "00110010" // /* MW 7 */
+ 8388 "11110001" // /* MW 6 */
+ 8389 "00000001" // /* MW 5 */
+ 8390 "00000000" // /* MW 4 */
+ 8391 "11010000" // /* MW 3 */
+ 8392 "11000110" // /* MW 2 */
+ 8393 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51 first
+.src_ref 7 "superkernels.cpp" 159 16
+.src_ref 7 "superkernels.cpp" 164 47
+ 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8395 "00010000" // /* MW 9 */
+ 8396 "00101010" // /* MW 8 */
+ 8397 "10110010" // /* MW 7 */
+ 8398 "11110000" // /* MW 6 */
+ 8399 "00000001" // /* MW 5 */
+ 8400 "00000000" // /* MW 4 */
+ 8401 "01010000" // /* MW 3 */
+ 8402 "11001011" // /* MW 2 */
+ 8403 "01001010" // /* MW 1 */
+ 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8405 "00000000" // /* MW 1 */
+ 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8407 "00000000" // /* MW 1 */
+ 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */
+ 8409 "00000000" // /* MW 5 */
+ 8410 "00000000" // /* MW 4 */
+ 8411 "10001000" // /* MW 3 */
+ 8412 "00010000" // /* MW 2 */
+ 8413 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13
+.delay_slot
+ 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8415 "11000000" // /* MW 5 */
+ 8416 "11001000" // /* MW 4 */
+ 8417 "11000000" // /* MW 3 */
+ 8418 "00000111" // /* MW 2 */
+ 8419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8421 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 27 first
+.delay_slot
+ 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8423 "00001111" // /* MW 3 */
+ 8424 "01100001" // /* MW 2 */
+ 8425 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13 first
+.delay_slot
+ 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8427 "10100011" // /* MW 5 */
+ 8428 "00001100" // /* MW 4 */
+ 8429 "11110000" // /* MW 3 */
+ 8430 "00101100" // /* MW 2 */
+ 8431 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 16 first
+.delay_slot
+ 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8433 "00000000" // /* MW 15 */
+ 8434 "00000000" // /* MW 14 */
+ 8435 "01111000" // /* MW 13 */
+ 8436 "10100101" // /* MW 12 */
+ 8437 "00000001" // /* MW 11 */
+ 8438 "00000000" // /* MW 10 */
+ 8439 "00000000" // /* MW 9 */
+ 8440 "10000000" // /* MW 8 */
+ 8441 "00010001" // /* MW 7 */
+ 8442 "00000110" // /* MW 6 */
+ 8443 "00100001" // /* MW 5 */
+ 8444 "00000000" // /* MW 4 */
+ 8445 "11110000" // /* MW 3 */
+ 8446 "00101100" // /* MW 2 */
+ 8447 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 164 47
+.src_ref 7 "superkernels.cpp" 166 2
+ 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8449 "00000000" // /* MW 15 */
+ 8450 "00000000" // /* MW 14 */
+ 8451 "00010000" // /* MW 13 */
+ 8452 "00101010" // /* MW 12 */
+ 8453 "10110010" // /* MW 11 */
+ 8454 "11110000" // /* MW 10 */
+ 8455 "00000001" // /* MW 9 */
+ 8456 "00000000" // /* MW 8 */
+ 8457 "10001011" // /* MW 7 */
+ 8458 "10000000" // /* MW 6 */
+ 8459 "00100010" // /* MW 5 */
+ 8460 "00000000" // /* MW 4 */
+ 8461 "11110000" // /* MW 3 */
+ 8462 "00101100" // /* MW 2 */
+ 8463 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8465 "00000000" // /* MW 7 */
+ 8466 "11000011" // /* MW 6 */
+ 8467 "10110011" // /* MW 5 */
+ 8468 "00000011" // /* MW 4 */
+ 8469 "01100000" // /* MW 3 */
+ 8470 "10010001" // /* MW 2 */
+ 8471 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8473 "00010000" // /* MW 9 */
+ 8474 "00100000" // /* MW 8 */
+ 8475 "00110010" // /* MW 7 */
+ 8476 "11110000" // /* MW 6 */
+ 8477 "00000001" // /* MW 5 */
+ 8478 "00000000" // /* MW 4 */
+ 8479 "11010000" // /* MW 3 */
+ 8480 "11101110" // /* MW 2 */
+ 8481 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8483 "00010110" // /* MW 3 */
+ 8484 "11111110" // /* MW 2 */
+ 8485 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8487 "00110110" // /* MW 3 */
+ 8488 "11111110" // /* MW 2 */
+ 8489 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8491 "01010110" // /* MW 3 */
+ 8492 "01000110" // /* MW 2 */
+ 8493 "00000111" // /* MW 1 */
+ 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8495 "00000000" // /* MW 1 */
+ 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8497 "00000000" // /* MW 1 */
+ 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8499 "00000000" // /* MW 1 */
+ 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8501 "00000000" // /* MW 1 */
+ 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8503 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8505 "00000010" // /* MW 3 */
+ 8506 "01100001" // /* MW 2 */
+ 8507 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8509 "00010001" // /* MW 3 */
+ 8510 "00000110" // /* MW 2 */
+ 8511 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8513 "11111101" // /* MW 3 */
+ 8514 "11100000" // /* MW 2 */
+ 8515 "00010111" // /* MW 1 */
+ 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8517 "00000000" // /* MW 1 */
+ 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8519 "00000000" // /* MW 1 */
+ 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8521 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8523 "00001000" // /* MW 3 */
+ 8524 "10010011" // /* MW 2 */
+ 8525 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+ 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8527 "10000001" // /* MW 5 */
+ 8528 "10101101" // /* MW 4 */
+ 8529 "10100111" // /* MW 3 */
+ 8530 "00000000" // /* MW 2 */
+ 8531 "00000100" // /* MW 1 */
+ 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8533 "00000000" // /* MW 1 */
+ 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8535 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+ 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8537 "00110110" // /* MW 3 */
+ 8538 "00000110" // /* MW 2 */
+ 8539 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8541 "10000001" // /* MW 5 */
+ 8542 "11011101" // /* MW 4 */
+ 8543 "11011100" // /* MW 3 */
+ 8544 "11001010" // /* MW 2 */
+ 8545 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 47 first
+ 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8547 "01110110" // /* MW 3 */
+ 8548 "00000110" // /* MW 2 */
+ 8549 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8551 "10011110" // /* MW 3 */
+ 8552 "01011100" // /* MW 2 */
+ 8553 "00000111" // /* MW 1 */
+ 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 166 2 first
+.no_stack_arguments
+ 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */
+ 8557 "00000001" // /* MW 5 */
+ 8558 "00000000" // /* MW 4 */
+ 8559 "11011000" // /* MW 3 */
+ 8560 "00001111" // /* MW 2 */
+ 8561 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8563 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+.delay_slot
+ 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8565 "00000111" // /* MW 3 */
+ 8566 "01100010" // /* MW 2 */
+ 8567 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.delay_slot
+ 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8569 "00110001" // /* MW 3 */
+ 8570 "00000110" // /* MW 2 */
+ 8571 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45 first
+.delay_slot
+ 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8573 "00001101" // /* MW 3 */
+ 8574 "11100001" // /* MW 2 */
+ 8575 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+.delay_slot
+ 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8577 "00000000" // /* MW 15 */
+ 8578 "00000000" // /* MW 14 */
+ 8579 "10101000" // /* MW 13 */
+ 8580 "10100000" // /* MW 12 */
+ 8581 "00110100" // /* MW 11 */
+ 8582 "00000000" // /* MW 10 */
+ 8583 "00000000" // /* MW 9 */
+ 8584 "00000000" // /* MW 8 */
+ 8585 "01011011" // /* MW 7 */
+ 8586 "00000001" // /* MW 6 */
+ 8587 "00100000" // /* MW 5 */
+ 8588 "00000000" // /* MW 4 */
+ 8589 "11110000" // /* MW 3 */
+ 8590 "00101100" // /* MW 2 */
+ 8591 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+.src_ref 7 "superkernels.cpp" 169 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8593 "00010000" // /* MW 9 */
+ 8594 "00100000" // /* MW 8 */
+ 8595 "00110010" // /* MW 7 */
+ 8596 "11110011" // /* MW 6 */
+ 8597 "00000001" // /* MW 5 */
+ 8598 "00000000" // /* MW 4 */
+ 8599 "11010000" // /* MW 3 */
+ 8600 "11000110" // /* MW 2 */
+ 8601 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8603 "00000101" // /* MW 3 */
+ 8604 "00100000" // /* MW 2 */
+ 8605 "00010000" // /* MW 1 */
+ 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8607 "00000000" // /* MW 1 */
+ 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8609 "00000000" // /* MW 1 */
+ 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8611 "00000000" // /* MW 1 */
+ 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8613 "00000000" // /* MW 1 */
+ 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8617 "00001000" // /* MW 3 */
+ 8618 "01010001" // /* MW 2 */
+ 8619 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8621 "00010000" // /* MW 9 */
+ 8622 "00110000" // /* MW 8 */
+ 8623 "00110010" // /* MW 7 */
+ 8624 "11110001" // /* MW 6 */
+ 8625 "00000001" // /* MW 5 */
+ 8626 "00000000" // /* MW 4 */
+ 8627 "11010000" // /* MW 3 */
+ 8628 "11001110" // /* MW 2 */
+ 8629 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6 first
+ 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8631 "00110110" // /* MW 3 */
+ 8632 "00000110" // /* MW 2 */
+ 8633 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+ 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8635 "01010110" // /* MW 3 */
+ 8636 "00000110" // /* MW 2 */
+ 8637 "00000010" // /* MW 1 */
+ 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8639 "00000000" // /* MW 1 */
+ 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8641 "00000000" // /* MW 1 */
+ 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8643 "00000000" // /* MW 1 */
+ 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8645 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8647 "00110001" // /* MW 3 */
+ 8648 "00100001" // /* MW 2 */
+ 8649 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8651 "00010001" // /* MW 3 */
+ 8652 "11100110" // /* MW 2 */
+ 8653 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 16 first
+ 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8655 "00101000" // /* MW 3 */
+ 8656 "01100001" // /* MW 2 */
+ 8657 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+ 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */
+ 8659 "00000001" // /* MW 5 */
+ 8660 "01000000" // /* MW 4 */
+ 8661 "11111000" // /* MW 3 */
+ 8662 "00010000" // /* MW 2 */
+ 8663 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8665 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8671 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8673 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14
+ 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8675 "00000001" // /* MW 3 */
+ 8676 "00100000" // /* MW 2 */
+ 8677 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14 first
+ 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8679 "00000000" // /* MW 9 */
+ 8680 "00000000" // /* MW 8 */
+ 8681 "00000000" // /* MW 7 */
+ 8682 "10000000" // /* MW 6 */
+ 8683 "00010001" // /* MW 5 */
+ 8684 "00000110" // /* MW 4 */
+ 8685 "11110110" // /* MW 3 */
+ 8686 "00101100" // /* MW 2 */
+ 8687 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 171
+ 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8689 "00111001" // /* MW 3 */
+ 8690 "11110100" // /* MW 2 */
+ 8691 "00000111" // /* MW 1 */
+ 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8693 "00011001" // /* MW 3 */
+ 8694 "11111011" // /* MW 2 */
+ 8695 "00000111" // /* MW 1 */
+ 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8697 "00000000" // /* MW 1 */
+ 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8699 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8701 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8703 "11110001" // /* MW 3 */
+ 8704 "11111101" // /* MW 2 */
+ 8705 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8707 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8709 "00000000" // /* MW 3 */
+ 8710 "00101000" // /* MW 2 */
+ 8711 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8713 "10100000" // /* MW 3 */
+ 8714 "01100111" // /* MW 2 */
+ 8715 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171
+.delay_slot
+ 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8717 "00000001" // /* MW 5 */
+ 8718 "00000000" // /* MW 4 */
+ 8719 "00000000" // /* MW 3 */
+ 8720 "11111000" // /* MW 2 */
+ 8721 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8723 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 8727 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.src_ref 3 "elementwise_unary.h" 124 first
+.src_ref 3 "elementwise_unary.h" 126 24 first
+.function_start
+ 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8737 "00101110" // /* MW 3 */
+ 8738 "00011100" // /* MW 2 */
+ 8739 "00000001" // /* MW 1 */
+ 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8741 "00000000" // /* MW 1 */
+ 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8743 "00000000" // /* MW 1 */
+ 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8745 "00000000" // /* MW 1 */
+ 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8747 "00000000" // /* MW 1 */
+ 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8749 "00000000" // /* MW 1 */
+ 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8751 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 126 22 first
+ 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8753 "00101001" // /* MW 3 */
+ 8754 "00011100" // /* MW 2 */
+ 8755 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 24 first
+ 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8757 "00101110" // /* MW 3 */
+ 8758 "00011100" // /* MW 2 */
+ 8759 "00000001" // /* MW 1 */
+ 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8761 "00000000" // /* MW 1 */
+ 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8763 "00000000" // /* MW 1 */
+ 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8765 "00000000" // /* MW 1 */
+ 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8767 "00000000" // /* MW 1 */
+ 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8769 "00000000" // /* MW 1 */
+ 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8771 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 22
+ 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8773 "00101001" // /* MW 3 */
+ 8774 "00011100" // /* MW 2 */
+ 8775 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 24 first
+ 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8777 "00101110" // /* MW 3 */
+ 8778 "01101100" // /* MW 2 */
+ 8779 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8781 "00010010" // /* MW 3 */
+ 8782 "00000100" // /* MW 2 */
+ 8783 "00000001" // /* MW 1 */
+ 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8785 "00000000" // /* MW 1 */
+ 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8787 "00000000" // /* MW 1 */
+ 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8789 "00000000" // /* MW 1 */
+ 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8791 "00000000" // /* MW 1 */
+ 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8793 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 22 first
+ 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8795 "00101001" // /* MW 3 */
+ 8796 "01101100" // /* MW 2 */
+ 8797 "00001000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8799 "00010111" // /* MW 3 */
+ 8800 "00000100" // /* MW 2 */
+ 8801 "00000000" // /* MW 1 */
+ 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8803 "00000000" // /* MW 1 */
+ 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8805 "00000000" // /* MW 1 */
+ 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8807 "00000000" // /* MW 1 */
+ 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8809 "00000000" // /* MW 1 */
+ 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8811 "00000000" // /* MW 1 */
+ 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8813 "00000000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33 first
+ 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8815 "00010010" // /* MW 3 */
+ 8816 "00100100" // /* MW 2 */
+ 8817 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33
+ 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8819 "00010111" // /* MW 3 */
+ 8820 "00010100" // /* MW 2 */
+ 8821 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 130 4 first
+ 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8823 "00000000" // /* MW 3 */
+ 8824 "00101000" // /* MW 2 */
+ 8825 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8827 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8829 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0
+ 8835 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 136 first
+.src_ref 3 "elementwise_unary.h" 142 37
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 171 19
+.function_start
+ 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8849 "00010000" // /* MW 11 */
+ 8850 "10001000" // /* MW 10 */
+ 8851 "01111001" // /* MW 9 */
+ 8852 "00001000" // /* MW 8 */
+ 8853 "00000000" // /* MW 7 */
+ 8854 "00000000" // /* MW 6 */
+ 8855 "01101000" // /* MW 5 */
+ 8856 "00111010" // /* MW 4 */
+ 8857 "10000000" // /* MW 3 */
+ 8858 "11000010" // /* MW 2 */
+ 8859 "11111011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 142 78
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+ 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8861 "00010000" // /* MW 11 */
+ 8862 "10100000" // /* MW 10 */
+ 8863 "10111001" // /* MW 9 */
+ 8864 "00001001" // /* MW 8 */
+ 8865 "00000000" // /* MW 7 */
+ 8866 "00000000" // /* MW 6 */
+ 8867 "01101000" // /* MW 5 */
+ 8868 "00111001" // /* MW 4 */
+ 8869 "00000000" // /* MW 3 */
+ 8870 "01010001" // /* MW 2 */
+ 8871 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136
+ 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8873 "11000000" // /* MW 3 */
+ 8874 "00010100" // /* MW 2 */
+ 8875 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136 first
+ 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8877 "00010000" // /* MW 3 */
+ 8878 "01100000" // /* MW 2 */
+ 8879 "00011010" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 103 16 first
+ 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8881 "01010010" // /* MW 3 */
+ 8882 "00011100" // /* MW 2 */
+ 8883 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 142 37 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8885 "00010110" // /* MW 3 */
+ 8886 "00000000" // /* MW 2 */
+ 8887 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 8 "clip_impl.h" 104 16 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8889 "01101000" // /* MW 5 */
+ 8890 "00111010" // /* MW 4 */
+ 8891 "01010000" // /* MW 3 */
+ 8892 "10000110" // /* MW 2 */
+ 8893 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8895 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8897 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8899 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8901 "10110100" // /* MW 3 */
+ 8902 "00011100" // /* MW 2 */
+ 8903 "00111000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8905 "01110010" // /* MW 3 */
+ 8906 "00001001" // /* MW 2 */
+ 8907 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 142 78 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8909 "01111000" // /* MW 9 */
+ 8910 "00110110" // /* MW 8 */
+ 8911 "01010000" // /* MW 7 */
+ 8912 "11101101" // /* MW 6 */
+ 8913 "00011000" // /* MW 5 */
+ 8914 "00000001" // /* MW 4 */
+ 8915 "01101000" // /* MW 3 */
+ 8916 "00111010" // /* MW 2 */
+ 8917 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8919 "11111110" // /* MW 3 */
+ 8920 "01111000" // /* MW 2 */
+ 8921 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8923 "01110010" // /* MW 3 */
+ 8924 "10000101" // /* MW 2 */
+ 8925 "00011000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8927 "10101100" // /* MW 3 */
+ 8928 "10101000" // /* MW 2 */
+ 8929 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8931 "01100000" // /* MW 13 */
+ 8932 "00101011" // /* MW 12 */
+ 8933 "00000000" // /* MW 11 */
+ 8934 "11001111" // /* MW 10 */
+ 8935 "00000110" // /* MW 9 */
+ 8936 "00110001" // /* MW 8 */
+ 8937 "00000000" // /* MW 7 */
+ 8938 "00000000" // /* MW 6 */
+ 8939 "01101000" // /* MW 5 */
+ 8940 "00111001" // /* MW 4 */
+ 8941 "11110000" // /* MW 3 */
+ 8942 "00101100" // /* MW 2 */
+ 8943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8945 "00000000" // /* MW 15 */
+ 8946 "00000000" // /* MW 14 */
+ 8947 "01111000" // /* MW 13 */
+ 8948 "01010110" // /* MW 12 */
+ 8949 "11011000" // /* MW 11 */
+ 8950 "00000001" // /* MW 10 */
+ 8951 "00000000" // /* MW 9 */
+ 8952 "00000000" // /* MW 8 */
+ 8953 "11010011" // /* MW 7 */
+ 8954 "00011100" // /* MW 6 */
+ 8955 "00100001" // /* MW 5 */
+ 8956 "00000000" // /* MW 4 */
+ 8957 "11110000" // /* MW 3 */
+ 8958 "00101100" // /* MW 2 */
+ 8959 "00000000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8961 "00000000" // /* MW 15 */
+ 8962 "00000000" // /* MW 14 */
+ 8963 "01111000" // /* MW 13 */
+ 8964 "00110110" // /* MW 12 */
+ 8965 "01010000" // /* MW 11 */
+ 8966 "00000001" // /* MW 10 */
+ 8967 "00000000" // /* MW 9 */
+ 8968 "00000000" // /* MW 8 */
+ 8969 "01011011" // /* MW 7 */
+ 8970 "00000001" // /* MW 6 */
+ 8971 "00100000" // /* MW 5 */
+ 8972 "00000000" // /* MW 4 */
+ 8973 "11110000" // /* MW 3 */
+ 8974 "00101100" // /* MW 2 */
+ 8975 "00000000" // /* MW 1 */
+.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8977 "00000000" // /* MW 15 */
+ 8978 "00000000" // /* MW 14 */
+ 8979 "01111000" // /* MW 13 */
+ 8980 "01010110" // /* MW 12 */
+ 8981 "11010100" // /* MW 11 */
+ 8982 "00000000" // /* MW 10 */
+ 8983 "00000000" // /* MW 9 */
+ 8984 "00000000" // /* MW 8 */
+ 8985 "11010011" // /* MW 7 */
+ 8986 "00011101" // /* MW 6 */
+ 8987 "01101001" // /* MW 5 */
+ 8988 "00111010" // /* MW 4 */
+ 8989 "11110000" // /* MW 3 */
+ 8990 "00101100" // /* MW 2 */
+ 8991 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8993 "00000000" // /* MW 15 */
+ 8994 "00000000" // /* MW 14 */
+ 8995 "01111000" // /* MW 13 */
+ 8996 "00110110" // /* MW 12 */
+ 8997 "10001000" // /* MW 11 */
+ 8998 "00000001" // /* MW 10 */
+ 8999 "00000000" // /* MW 9 */
+ 9000 "00000000" // /* MW 8 */
+ 9001 "01011011" // /* MW 7 */
+ 9002 "00000001" // /* MW 6 */
+ 9003 "01101000" // /* MW 5 */
+ 9004 "00111001" // /* MW 4 */
+ 9005 "11110000" // /* MW 3 */
+ 9006 "00101100" // /* MW 2 */
+ 9007 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9009 "00000000" // /* MW 15 */
+ 9010 "00000000" // /* MW 14 */
+ 9011 "01111000" // /* MW 13 */
+ 9012 "01010110" // /* MW 12 */
+ 9013 "11011000" // /* MW 11 */
+ 9014 "00000001" // /* MW 10 */
+ 9015 "00000000" // /* MW 9 */
+ 9016 "00000000" // /* MW 8 */
+ 9017 "11010011" // /* MW 7 */
+ 9018 "00011100" // /* MW 6 */
+ 9019 "00100001" // /* MW 5 */
+ 9020 "00000000" // /* MW 4 */
+ 9021 "11110000" // /* MW 3 */
+ 9022 "00101100" // /* MW 2 */
+ 9023 "00000000" // /* MW 1 */
+.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176
+.src_ref 4 "max_min.hpp" 20 104 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9025 "00000000" // /* MW 15 */
+ 9026 "00000000" // /* MW 14 */
+ 9027 "01111000" // /* MW 13 */
+ 9028 "00110110" // /* MW 12 */
+ 9029 "01010000" // /* MW 11 */
+ 9030 "00000001" // /* MW 10 */
+ 9031 "00000000" // /* MW 9 */
+ 9032 "00000000" // /* MW 8 */
+ 9033 "01011011" // /* MW 7 */
+ 9034 "00000001" // /* MW 6 */
+ 9035 "00100000" // /* MW 5 */
+ 9036 "00000000" // /* MW 4 */
+ 9037 "11110000" // /* MW 3 */
+ 9038 "00101100" // /* MW 2 */
+ 9039 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9041 "01110000" // /* MW 7 */
+ 9042 "01010110" // /* MW 6 */
+ 9043 "11010100" // /* MW 5 */
+ 9044 "00000000" // /* MW 4 */
+ 9045 "01100000" // /* MW 3 */
+ 9046 "10111010" // /* MW 2 */
+ 9047 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9049 "01101100" // /* MW 3 */
+ 9050 "00010000" // /* MW 2 */
+ 9051 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+ 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9053 "01110000" // /* MW 7 */
+ 9054 "01010110" // /* MW 6 */
+ 9055 "11011000" // /* MW 5 */
+ 9056 "00000001" // /* MW 4 */
+ 9057 "01100000" // /* MW 3 */
+ 9058 "10011010" // /* MW 2 */
+ 9059 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 158 4 first
+ 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9061 "11011001" // /* MW 5 */
+ 9062 "01000000" // /* MW 4 */
+ 9063 "00000101" // /* MW 3 */
+ 9064 "00000000" // /* MW 2 */
+ 9065 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9067 "01110000" // /* MW 7 */
+ 9068 "01010110" // /* MW 6 */
+ 9069 "11010100" // /* MW 5 */
+ 9070 "00000000" // /* MW 4 */
+ 9071 "01100000" // /* MW 3 */
+ 9072 "10111010" // /* MW 2 */
+ 9073 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9075 "01101100" // /* MW 3 */
+ 9076 "00010000" // /* MW 2 */
+ 9077 "00011011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.delay_slot
+ 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9079 "10101100" // /* MW 3 */
+ 9080 "10110000" // /* MW 2 */
+ 9081 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.delay_slot
+ 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9083 "11010011" // /* MW 3 */
+ 9084 "00011100" // /* MW 2 */
+ 9085 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9087 "11010011" // /* MW 3 */
+ 9088 "00011101" // /* MW 2 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0
+ 9089 "00001001" // /* MW 1 */
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 176 first
+.src_ref 7 "superkernels.cpp" 181 6
+.function_start
+ 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9105 "10000000" // /* MW 5 */
+ 9106 "11001000" // /* MW 4 */
+ 9107 "11000110" // /* MW 3 */
+ 9108 "00000111" // /* MW 2 */
+ 9109 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6 first
+ 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9111 "11000001" // /* MW 5 */
+ 9112 "10110101" // /* MW 4 */
+ 9113 "11011000" // /* MW 3 */
+ 9114 "11000010" // /* MW 2 */
+ 9115 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 176
+ 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9117 "00000001" // /* MW 5 */
+ 9118 "00000000" // /* MW 4 */
+ 9119 "00000000" // /* MW 3 */
+ 9120 "00001000" // /* MW 2 */
+ 9121 "00000000" // /* MW 1 */
+ 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9123 "01110000" // /* MW 7 */
+ 9124 "11010000" // /* MW 6 */
+ 9125 "00001011" // /* MW 5 */
+ 9126 "00000000" // /* MW 4 */
+ 9127 "10110000" // /* MW 3 */
+ 9128 "01100011" // /* MW 2 */
+ 9129 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+ 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9131 "00010001" // /* MW 9 */
+ 9132 "00101000" // /* MW 8 */
+ 9133 "00110010" // /* MW 7 */
+ 9134 "11110011" // /* MW 6 */
+ 9135 "00000001" // /* MW 5 */
+ 9136 "00000000" // /* MW 4 */
+ 9137 "10110000" // /* MW 3 */
+ 9138 "10000010" // /* MW 2 */
+ 9139 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9141 "11000000" // /* MW 3 */
+ 9142 "11010100" // /* MW 2 */
+ 9143 "00011011" // /* MW 1 */
+ 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9145 "00000000" // /* MW 1 */
+ 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9147 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6
+.src_ref 7 "superkernels.cpp" 181 16
+ 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */
+ 9149 "00000001" // /* MW 5 */
+ 9150 "01000000" // /* MW 4 */
+ 9151 "00110000" // /* MW 3 */
+ 9152 "00010010" // /* MW 2 */
+ 9153 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 22 first
+.delay_slot
+ 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9155 "10010000" // /* MW 3 */
+ 9156 "01100010" // /* MW 2 */
+ 9157 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 30
+.delay_slot
+ 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9159 "11111011" // /* MW 3 */
+ 9160 "01100011" // /* MW 2 */
+ 9161 "00010100" // /* MW 1 */
+.delay_slot
+ 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9163 "00111101" // /* MW 3 */
+ 9164 "11110100" // /* MW 2 */
+ 9165 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9167 "01110000" // /* MW 7 */
+ 9168 "01100000" // /* MW 6 */
+ 9169 "00110000" // /* MW 5 */
+ 9170 "00000011" // /* MW 4 */
+ 9171 "00110000" // /* MW 3 */
+ 9172 "11000110" // /* MW 2 */
+ 9173 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4
+.src_ref 7 "superkernels.cpp" 195 2
+.delay_slot
+ 9174 "01000100" // MOVXM p0, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9175 "10000000" // /* MW 5 */
+ 9176 "11001011" // /* MW 4 */
+ 9177 "11000000" // /* MW 3 */
+ 9178 "00000111" // /* MW 2 */
+ 9179 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9181 "11010000" // /* MW 5 */
+ 9182 "11001000" // /* MW 4 */
+ 9183 "11000100" // /* MW 3 */
+ 9184 "00000111" // /* MW 2 */
+ 9185 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9187 "00010000" // /* MW 9 */
+ 9188 "00110010" // /* MW 8 */
+ 9189 "00110010" // /* MW 7 */
+ 9190 "11110001" // /* MW 6 */
+ 9191 "00000001" // /* MW 5 */
+ 9192 "00000000" // /* MW 4 */
+ 9193 "11100000" // /* MW 3 */
+ 9194 "11000000" // /* MW 2 */
+ 9195 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9197 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */
+ 9199 "00000001" // /* MW 5 */
+ 9200 "00000000" // /* MW 4 */
+ 9201 "00010000" // /* MW 3 */
+ 9202 "00010001" // /* MW 2 */
+ 9203 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9205 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9207 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9209 "00110001" // /* MW 3 */
+ 9210 "00100000" // /* MW 2 */
+ 9211 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9213 "00000101" // /* MW 3 */
+ 9214 "00100000" // /* MW 2 */
+ 9215 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9217 "00000000" // /* MW 15 */
+ 9218 "00000000" // /* MW 14 */
+ 9219 "01111000" // /* MW 13 */
+ 9220 "10100101" // /* MW 12 */
+ 9221 "00000001" // /* MW 11 */
+ 9222 "00000000" // /* MW 10 */
+ 9223 "00000000" // /* MW 9 */
+ 9224 "10000000" // /* MW 8 */
+ 9225 "00010001" // /* MW 7 */
+ 9226 "00000110" // /* MW 6 */
+ 9227 "00100010" // /* MW 5 */
+ 9228 "00000000" // /* MW 4 */
+ 9229 "11110000" // /* MW 3 */
+ 9230 "00101100" // /* MW 2 */
+ 9231 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18
+.return_address
+ 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9233 "10100000" // /* MW 5 */
+ 9234 "11001000" // /* MW 4 */
+ 9235 "11000100" // /* MW 3 */
+ 9236 "00000111" // /* MW 2 */
+ 9237 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18 first
+.src_ref 7 "superkernels.cpp" 188 43
+ 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9239 "00010000" // /* MW 9 */
+ 9240 "11100000" // /* MW 8 */
+ 9241 "00110010" // /* MW 7 */
+ 9242 "11110001" // /* MW 6 */
+ 9243 "00000001" // /* MW 5 */
+ 9244 "00000000" // /* MW 4 */
+ 9245 "11010000" // /* MW 3 */
+ 9246 "11000010" // /* MW 2 */
+ 9247 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29
+.src_ref 7 "superkernels.cpp" 188 43
+.src_ref 7 "superkernels.cpp" 195 2
+ 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9249 "00010000" // /* MW 9 */
+ 9250 "11100000" // /* MW 8 */
+ 9251 "00110010" // /* MW 7 */
+ 9252 "11110001" // /* MW 6 */
+ 9253 "00000001" // /* MW 5 */
+ 9254 "00000000" // /* MW 4 */
+ 9255 "11010000" // /* MW 3 */
+ 9256 "11000110" // /* MW 2 */
+ 9257 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29 first
+.src_ref 7 "superkernels.cpp" 188 16
+.src_ref 7 "superkernels.cpp" 193 47
+ 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9259 "00010000" // /* MW 9 */
+ 9260 "00101010" // /* MW 8 */
+ 9261 "10110010" // /* MW 7 */
+ 9262 "11110000" // /* MW 6 */
+ 9263 "00000001" // /* MW 5 */
+ 9264 "00000000" // /* MW 4 */
+ 9265 "01010000" // /* MW 3 */
+ 9266 "11001011" // /* MW 2 */
+ 9267 "01001000" // /* MW 1 */
+ 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9269 "00000000" // /* MW 1 */
+ 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9271 "00000000" // /* MW 1 */
+ 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */
+ 9273 "00000000" // /* MW 5 */
+ 9274 "00000000" // /* MW 4 */
+ 9275 "00111000" // /* MW 3 */
+ 9276 "00010010" // /* MW 2 */
+ 9277 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13
+.delay_slot
+ 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9279 "11000000" // /* MW 5 */
+ 9280 "11001000" // /* MW 4 */
+ 9281 "11000000" // /* MW 3 */
+ 9282 "00000111" // /* MW 2 */
+ 9283 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9285 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 27 first
+.delay_slot
+ 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9287 "00001111" // /* MW 3 */
+ 9288 "01100001" // /* MW 2 */
+ 9289 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13 first
+.delay_slot
+ 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9291 "10100011" // /* MW 5 */
+ 9292 "00001100" // /* MW 4 */
+ 9293 "11110000" // /* MW 3 */
+ 9294 "00101100" // /* MW 2 */
+ 9295 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 16 first
+.delay_slot
+ 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9297 "00000000" // /* MW 15 */
+ 9298 "00000000" // /* MW 14 */
+ 9299 "01111000" // /* MW 13 */
+ 9300 "10100101" // /* MW 12 */
+ 9301 "00000001" // /* MW 11 */
+ 9302 "00000000" // /* MW 10 */
+ 9303 "00000000" // /* MW 9 */
+ 9304 "10000000" // /* MW 8 */
+ 9305 "00010001" // /* MW 7 */
+ 9306 "00000110" // /* MW 6 */
+ 9307 "00100001" // /* MW 5 */
+ 9308 "00000000" // /* MW 4 */
+ 9309 "11110000" // /* MW 3 */
+ 9310 "00101100" // /* MW 2 */
+ 9311 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 193 47
+.src_ref 7 "superkernels.cpp" 195 2
+ 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9313 "00000000" // /* MW 15 */
+ 9314 "00000000" // /* MW 14 */
+ 9315 "00010000" // /* MW 13 */
+ 9316 "00101010" // /* MW 12 */
+ 9317 "10110010" // /* MW 11 */
+ 9318 "11110000" // /* MW 10 */
+ 9319 "00000001" // /* MW 9 */
+ 9320 "00000000" // /* MW 8 */
+ 9321 "10001011" // /* MW 7 */
+ 9322 "10000000" // /* MW 6 */
+ 9323 "00100010" // /* MW 5 */
+ 9324 "00000000" // /* MW 4 */
+ 9325 "11110000" // /* MW 3 */
+ 9326 "00101100" // /* MW 2 */
+ 9327 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9329 "00000000" // /* MW 7 */
+ 9330 "11000011" // /* MW 6 */
+ 9331 "10110011" // /* MW 5 */
+ 9332 "00000011" // /* MW 4 */
+ 9333 "01100000" // /* MW 3 */
+ 9334 "10010001" // /* MW 2 */
+ 9335 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9337 "00010000" // /* MW 9 */
+ 9338 "00100000" // /* MW 8 */
+ 9339 "00110010" // /* MW 7 */
+ 9340 "11110000" // /* MW 6 */
+ 9341 "00000001" // /* MW 5 */
+ 9342 "00000000" // /* MW 4 */
+ 9343 "11010000" // /* MW 3 */
+ 9344 "11101110" // /* MW 2 */
+ 9345 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9347 "00010110" // /* MW 3 */
+ 9348 "11111110" // /* MW 2 */
+ 9349 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9351 "00110110" // /* MW 3 */
+ 9352 "11111110" // /* MW 2 */
+ 9353 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9355 "01010110" // /* MW 3 */
+ 9356 "01000110" // /* MW 2 */
+ 9357 "00000111" // /* MW 1 */
+ 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9359 "00000000" // /* MW 1 */
+ 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9361 "00000000" // /* MW 1 */
+ 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9363 "00000000" // /* MW 1 */
+ 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9365 "00000000" // /* MW 1 */
+ 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9367 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9369 "00000010" // /* MW 3 */
+ 9370 "01100001" // /* MW 2 */
+ 9371 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9373 "00010001" // /* MW 3 */
+ 9374 "00000110" // /* MW 2 */
+ 9375 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9377 "11111101" // /* MW 3 */
+ 9378 "11100000" // /* MW 2 */
+ 9379 "00010111" // /* MW 1 */
+ 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9381 "00000000" // /* MW 1 */
+ 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9383 "00000000" // /* MW 1 */
+ 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9385 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9387 "00001000" // /* MW 3 */
+ 9388 "10010011" // /* MW 2 */
+ 9389 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+ 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9391 "10000001" // /* MW 5 */
+ 9392 "10101101" // /* MW 4 */
+ 9393 "10100111" // /* MW 3 */
+ 9394 "00000000" // /* MW 2 */
+ 9395 "00000100" // /* MW 1 */
+ 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9397 "00000000" // /* MW 1 */
+ 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9399 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+ 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9401 "00110110" // /* MW 3 */
+ 9402 "00000110" // /* MW 2 */
+ 9403 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9405 "10000001" // /* MW 5 */
+ 9406 "11011101" // /* MW 4 */
+ 9407 "11011100" // /* MW 3 */
+ 9408 "11001010" // /* MW 2 */
+ 9409 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 47 first
+ 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9411 "01110110" // /* MW 3 */
+ 9412 "00000110" // /* MW 2 */
+ 9413 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9415 "10011110" // /* MW 3 */
+ 9416 "01011100" // /* MW 2 */
+ 9417 "00000111" // /* MW 1 */
+ 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9419 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 195 2 first
+.no_stack_arguments
+ 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */
+ 9421 "00000001" // /* MW 5 */
+ 9422 "00000000" // /* MW 4 */
+ 9423 "01001000" // /* MW 3 */
+ 9424 "00010001" // /* MW 2 */
+ 9425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9427 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+.delay_slot
+ 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9429 "00000111" // /* MW 3 */
+ 9430 "01100010" // /* MW 2 */
+ 9431 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.delay_slot
+ 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9433 "00110001" // /* MW 3 */
+ 9434 "00000110" // /* MW 2 */
+ 9435 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45 first
+.delay_slot
+ 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9437 "00001101" // /* MW 3 */
+ 9438 "11100001" // /* MW 2 */
+ 9439 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+.delay_slot
+ 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9441 "00000000" // /* MW 15 */
+ 9442 "00000000" // /* MW 14 */
+ 9443 "10101000" // /* MW 13 */
+ 9444 "10100000" // /* MW 12 */
+ 9445 "00110100" // /* MW 11 */
+ 9446 "00000000" // /* MW 10 */
+ 9447 "00000000" // /* MW 9 */
+ 9448 "00000000" // /* MW 8 */
+ 9449 "01011011" // /* MW 7 */
+ 9450 "00000001" // /* MW 6 */
+ 9451 "00100000" // /* MW 5 */
+ 9452 "00000000" // /* MW 4 */
+ 9453 "11110000" // /* MW 3 */
+ 9454 "00101100" // /* MW 2 */
+ 9455 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+.src_ref 7 "superkernels.cpp" 198 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9457 "00010000" // /* MW 9 */
+ 9458 "00100000" // /* MW 8 */
+ 9459 "00110010" // /* MW 7 */
+ 9460 "11110011" // /* MW 6 */
+ 9461 "00000001" // /* MW 5 */
+ 9462 "00000000" // /* MW 4 */
+ 9463 "11010000" // /* MW 3 */
+ 9464 "11000110" // /* MW 2 */
+ 9465 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9467 "00000101" // /* MW 3 */
+ 9468 "00100000" // /* MW 2 */
+ 9469 "00010000" // /* MW 1 */
+ 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9471 "00000000" // /* MW 1 */
+ 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9473 "00000000" // /* MW 1 */
+ 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9475 "00000000" // /* MW 1 */
+ 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9477 "00000000" // /* MW 1 */
+ 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9479 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9481 "00001000" // /* MW 3 */
+ 9482 "01010001" // /* MW 2 */
+ 9483 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9485 "00010000" // /* MW 9 */
+ 9486 "00110000" // /* MW 8 */
+ 9487 "00110010" // /* MW 7 */
+ 9488 "11110001" // /* MW 6 */
+ 9489 "00000001" // /* MW 5 */
+ 9490 "00000000" // /* MW 4 */
+ 9491 "11010000" // /* MW 3 */
+ 9492 "11001110" // /* MW 2 */
+ 9493 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6 first
+ 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9495 "00110110" // /* MW 3 */
+ 9496 "00000110" // /* MW 2 */
+ 9497 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+ 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9499 "01010110" // /* MW 3 */
+ 9500 "00000110" // /* MW 2 */
+ 9501 "00000010" // /* MW 1 */
+ 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9503 "00000000" // /* MW 1 */
+ 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9505 "00000000" // /* MW 1 */
+ 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9507 "00000000" // /* MW 1 */
+ 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9509 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9511 "00110001" // /* MW 3 */
+ 9512 "00100001" // /* MW 2 */
+ 9513 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9515 "00010001" // /* MW 3 */
+ 9516 "11100110" // /* MW 2 */
+ 9517 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 16 first
+ 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9519 "00101000" // /* MW 3 */
+ 9520 "01100001" // /* MW 2 */
+ 9521 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+ 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */
+ 9523 "00000001" // /* MW 5 */
+ 9524 "01000000" // /* MW 4 */
+ 9525 "10101000" // /* MW 3 */
+ 9526 "00010010" // /* MW 2 */
+ 9527 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9535 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9537 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14
+ 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9539 "00000001" // /* MW 3 */
+ 9540 "00100000" // /* MW 2 */
+ 9541 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14 first
+ 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9543 "00000000" // /* MW 9 */
+ 9544 "00000000" // /* MW 8 */
+ 9545 "00000000" // /* MW 7 */
+ 9546 "10000000" // /* MW 6 */
+ 9547 "00010001" // /* MW 5 */
+ 9548 "00000110" // /* MW 4 */
+ 9549 "11110110" // /* MW 3 */
+ 9550 "00101100" // /* MW 2 */
+ 9551 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 200
+ 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9553 "00111001" // /* MW 3 */
+ 9554 "11110100" // /* MW 2 */
+ 9555 "00000111" // /* MW 1 */
+ 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9557 "00011001" // /* MW 3 */
+ 9558 "11111011" // /* MW 2 */
+ 9559 "00000111" // /* MW 1 */
+ 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9561 "00000000" // /* MW 1 */
+ 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9563 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9565 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9567 "11110001" // /* MW 3 */
+ 9568 "11111101" // /* MW 2 */
+ 9569 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9571 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9573 "00000000" // /* MW 3 */
+ 9574 "00101000" // /* MW 2 */
+ 9575 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9577 "10100000" // /* MW 3 */
+ 9578 "01100111" // /* MW 2 */
+ 9579 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200
+.delay_slot
+ 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9581 "00000001" // /* MW 5 */
+ 9582 "00000000" // /* MW 4 */
+ 9583 "00000000" // /* MW 3 */
+ 9584 "11111000" // /* MW 2 */
+ 9585 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9587 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9589 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 9591 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 205 first
+.src_ref 3 "elementwise_binary_shared.h" 211 24 first
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.function_start
+ 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9601 "01011000" // /* MW 9 */
+ 9602 "00000000" // /* MW 8 */
+ 9603 "00001000" // /* MW 7 */
+ 9604 "00001011" // /* MW 6 */
+ 9605 "00100000" // /* MW 5 */
+ 9606 "00001000" // /* MW 4 */
+ 9607 "11010000" // /* MW 3 */
+ 9608 "10000101" // /* MW 2 */
+ 9609 "00100011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+ 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9611 "00000001" // /* MW 3 */
+ 9612 "10000000" // /* MW 2 */
+ 9613 "00010111" // /* MW 1 */
+ 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9615 "00000000" // /* MW 1 */
+ 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9617 "00000000" // /* MW 1 */
+ 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9619 "00000000" // /* MW 1 */
+ 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9621 "00000000" // /* MW 1 */
+ 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9623 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 211 22 first
+ 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9625 "00101001" // /* MW 3 */
+ 9626 "00011100" // /* MW 2 */
+ 9627 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 24 first
+ 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9629 "00101110" // /* MW 3 */
+ 9630 "00011100" // /* MW 2 */
+ 9631 "00000001" // /* MW 1 */
+ 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9633 "00000000" // /* MW 1 */
+ 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9635 "00000000" // /* MW 1 */
+ 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9637 "00000000" // /* MW 1 */
+ 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9639 "00000000" // /* MW 1 */
+ 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9641 "00000000" // /* MW 1 */
+ 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9643 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 22
+ 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9645 "00101001" // /* MW 3 */
+ 9646 "00011100" // /* MW 2 */
+ 9647 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 24 first
+ 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9649 "00101110" // /* MW 3 */
+ 9650 "00000100" // /* MW 2 */
+ 9651 "00000001" // /* MW 1 */
+ 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9653 "00000000" // /* MW 1 */
+ 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9655 "00000000" // /* MW 1 */
+ 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9657 "00000000" // /* MW 1 */
+ 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9659 "00000000" // /* MW 1 */
+ 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9661 "00000000" // /* MW 1 */
+ 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9663 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 22
+ 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9665 "00101001" // /* MW 3 */
+ 9666 "00011100" // /* MW 2 */
+ 9667 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 24 first
+ 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9669 "01110110" // /* MW 3 */
+ 9670 "00010100" // /* MW 2 */
+ 9671 "00000001" // /* MW 1 */
+ 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9673 "00000000" // /* MW 1 */
+ 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9675 "00000000" // /* MW 1 */
+ 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9677 "00000000" // /* MW 1 */
+ 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9679 "00000000" // /* MW 1 */
+ 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9681 "00000000" // /* MW 1 */
+ 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9683 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 22
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9685 "01110001" // /* MW 3 */
+ 9686 "01001100" // /* MW 2 */
+ 9687 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 34 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9689 "00010111" // /* MW 3 */
+ 9690 "00000100" // /* MW 2 */
+ 9691 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 217 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9693 "00000000" // /* MW 3 */
+ 9694 "00101000" // /* MW 2 */
+ 9695 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9697 "00000000" // /* MW 5 */
+ 9698 "10111110" // /* MW 4 */
+ 9699 "11110000" // /* MW 3 */
+ 9700 "00000000" // /* MW 2 */
+ 9701 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9703 "00010100" // /* MW 3 */
+ 9704 "11000010" // /* MW 2 */
+ 9705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9707 "00100111" // /* MW 3 */
+ 9708 "01110110" // /* MW 2 */
+ 9709 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9711 "10000010" // /* MW 3 */
+ 9712 "00000001" // /* MW 2 */
+ 9713 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9715 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 219
+.src_ref 3 "elementwise_binary_shared.h" 219 first
+.function_start
+ 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9729 "00000001" // /* MW 5 */
+ 9730 "00000000" // /* MW 4 */
+ 9731 "00000000" // /* MW 3 */
+ 9732 "00001000" // /* MW 2 */
+ 9733 "00000000" // /* MW 1 */
+ 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9735 "00111101" // /* MW 3 */
+ 9736 "11111000" // /* MW 2 */
+ 9737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8 first
+.no_stack_arguments
+ 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */
+ 9739 "00000001" // /* MW 5 */
+ 9740 "00000000" // /* MW 4 */
+ 9741 "11000000" // /* MW 3 */
+ 9742 "00010010" // /* MW 2 */
+ 9743 "00000000" // /* MW 1 */
+.delay_slot
+ 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9745 "10011101" // /* MW 3 */
+ 9746 "11111111" // /* MW 2 */
+ 9747 "00001111" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+ 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9749 "11000000" // /* MW 3 */
+ 9750 "01100000" // /* MW 2 */
+ 9751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9757 "01100111" // /* MW 3 */
+ 9758 "00000001" // /* MW 2 */
+ 9759 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.return_address
+ 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9761 "00111001" // /* MW 3 */
+ 9762 "11111000" // /* MW 2 */
+ 9763 "00000111" // /* MW 1 */
+ 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9765 "00000000" // /* MW 1 */
+ 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9767 "00000000" // /* MW 1 */
+ 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9769 "00000000" // /* MW 1 */
+ 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9771 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9773 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9775 "10011001" // /* MW 3 */
+ 9776 "11111111" // /* MW 2 */
+ 9777 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9779 "00000000" // /* MW 3 */
+ 9780 "00101000" // /* MW 2 */
+ 9781 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9783 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9787 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9789 "00001001" // /* MW 3 */
+ 9790 "00100000" // /* MW 2 */
+ 9791 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.src_ref 8 "mul_impl.h" 193 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9793 "01110001" // /* MW 9 */
+ 9794 "00000000" // /* MW 8 */
+ 9795 "00000000" // /* MW 7 */
+ 9796 "00000000" // /* MW 6 */
+ 9797 "11111110" // /* MW 5 */
+ 9798 "00111111" // /* MW 4 */
+ 9799 "00110000" // /* MW 3 */
+ 9800 "11000010" // /* MW 2 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9801 "11101000" // /* MW 1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0
+.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.src_ref 3 "elementwise_binary_shared.h" 107 first
+.src_ref 3 "elementwise_binary_shared.h" 119 37
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.function_start
+ 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9809 "11000000" // /* MW 3 */
+ 9810 "00010110" // /* MW 2 */
+ 9811 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+ 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9813 "00000111" // /* MW 3 */
+ 9814 "01100000" // /* MW 2 */
+ 9815 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 122 22 first
+ 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9817 "01010010" // /* MW 3 */
+ 9818 "00011100" // /* MW 2 */
+ 9819 "00000011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 15 first
+ 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9821 "10010110" // /* MW 3 */
+ 9822 "00000100" // /* MW 2 */
+ 9823 "00000011" // /* MW 1 */
+ 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9825 "00000000" // /* MW 1 */
+ 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9827 "00000000" // /* MW 1 */
+ 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9829 "00000000" // /* MW 1 */
+ 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9831 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9833 "00001001" // /* MW 3 */
+ 9834 "00000110" // /* MW 2 */
+ 9835 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 107
+ 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9837 "00000001" // /* MW 5 */
+ 9838 "00000000" // /* MW 4 */
+ 9839 "00000000" // /* MW 3 */
+ 9840 "00010000" // /* MW 2 */
+ 9841 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9843 "01001100" // /* MW 3 */
+ 9844 "11000110" // /* MW 2 */
+ 9845 "00010000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25
+.src_ref 3 "elementwise_binary_shared.h" 124 8
+ 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */
+ 9847 "01100000" // /* MW 9 */
+ 9848 "00000000" // /* MW 8 */
+ 9849 "00010000" // /* MW 7 */
+ 9850 "11100010" // /* MW 6 */
+ 9851 "00000100" // /* MW 5 */
+ 9852 "00000110" // /* MW 4 */
+ 9853 "00000000" // /* MW 3 */
+ 9854 "00000001" // /* MW 2 */
+ 9855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25 first
+.delay_slot
+ 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9857 "01110010" // /* MW 3 */
+ 9858 "00000101" // /* MW 2 */
+ 9859 "00011000" // /* MW 1 */
+.delay_slot
+ 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9861 "11000000" // /* MW 3 */
+ 9862 "01011110" // /* MW 2 */
+ 9863 "00011000" // /* MW 1 */
+.delay_slot
+ 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9865 "11100000" // /* MW 3 */
+ 9866 "01100101" // /* MW 2 */
+ 9867 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9869 "10000001" // /* MW 5 */
+ 9870 "11011101" // /* MW 4 */
+ 9871 "00001010" // /* MW 3 */
+ 9872 "11110010" // /* MW 2 */
+ 9873 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+.delay_slot
+ 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9875 "00010011" // /* MW 3 */
+ 9876 "00000100" // /* MW 2 */
+ 9877 "00001111" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+ 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9879 "01110010" // /* MW 9 */
+ 9880 "10111001" // /* MW 8 */
+ 9881 "00000100" // /* MW 7 */
+ 9882 "00000000" // /* MW 6 */
+ 9883 "00001011" // /* MW 5 */
+ 9884 "10000000" // /* MW 4 */
+ 9885 "10000100" // /* MW 3 */
+ 9886 "10000010" // /* MW 2 */
+ 9887 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 126 34 first
+.src_ref 3 "elementwise_binary_shared.h" 131 19 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9889 "00000001" // /* MW 5 */
+ 9890 "00000001" // /* MW 4 */
+ 9891 "01010100" // /* MW 3 */
+ 9892 "00000001" // /* MW 2 */
+ 9893 "10000000" // /* MW 1 */
+ 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9895 "00000000" // /* MW 1 */
+ 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9897 "00000000" // /* MW 1 */
+ 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9899 "00000000" // /* MW 1 */
+ 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9901 "00000000" // /* MW 1 */
+ 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9903 "00000000" // /* MW 1 */
+ 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9905 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 131 12
+.src_ref 3 "elementwise_binary_shared.h" 131 35
+ 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */
+ 9907 "00000001" // /* MW 5 */
+ 9908 "01000000" // /* MW 4 */
+ 9909 "01110000" // /* MW 3 */
+ 9910 "00010011" // /* MW 2 */
+ 9911 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9913 "00000000" // /* MW 3 */
+ 9914 "00000000" // /* MW 2 */
+ 9915 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9917 "11010000" // /* MW 5 */
+ 9918 "11001000" // /* MW 4 */
+ 9919 "11001000" // /* MW 3 */
+ 9920 "00000111" // /* MW 2 */
+ 9921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9927 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */
+ 9929 "00100000" // /* MW 9 */
+ 9930 "00000000" // /* MW 8 */
+ 9931 "00000000" // /* MW 7 */
+ 9932 "11011110" // /* MW 6 */
+ 9933 "00000100" // /* MW 5 */
+ 9934 "00000000" // /* MW 4 */
+ 9935 "10000000" // /* MW 3 */
+ 9936 "00000100" // /* MW 2 */
+ 9937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9945 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9947 "00100110" // /* MW 5 */
+ 9948 "00001000" // /* MW 4 */
+ 9949 "11110000" // /* MW 3 */
+ 9950 "00101100" // /* MW 2 */
+ 9951 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9953 "10000000" // /* MW 3 */
+ 9954 "00000000" // /* MW 2 */
+ 9955 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9957 "01010000" // /* MW 11 */
+ 9958 "00000000" // /* MW 10 */
+ 9959 "00000000" // /* MW 9 */
+ 9960 "00000001" // /* MW 8 */
+ 9961 "00010011" // /* MW 7 */
+ 9962 "00000100" // /* MW 6 */
+ 9963 "00100001" // /* MW 5 */
+ 9964 "00000000" // /* MW 4 */
+ 9965 "11110000" // /* MW 3 */
+ 9966 "00101100" // /* MW 2 */
+ 9967 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160
+ 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */
+ 9969 "00000000" // /* MW 5 */
+ 9970 "00000000" // /* MW 4 */
+ 9971 "11001000" // /* MW 3 */
+ 9972 "00010011" // /* MW 2 */
+ 9973 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9975 "01110000" // /* MW 7 */
+ 9976 "01100000" // /* MW 6 */
+ 9977 "10110000" // /* MW 5 */
+ 9978 "00000011" // /* MW 4 */
+ 9979 "01100000" // /* MW 3 */
+ 9980 "10010001" // /* MW 2 */
+ 9981 "00010011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9983 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9985 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9987 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9989 "10000001" // /* MW 11 */
+ 9990 "10101101" // /* MW 10 */
+ 9991 "00000000" // /* MW 9 */
+ 9992 "00000000" // /* MW 8 */
+ 9993 "00000000" // /* MW 7 */
+ 9994 "00000000" // /* MW 6 */
+ 9995 "00100000" // /* MW 5 */
+ 9996 "00000000" // /* MW 4 */
+ 9997 "11110000" // /* MW 3 */
+ 9998 "00101100" // /* MW 2 */
+ 9999 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192
+.src_ref 3 "elementwise_binary_shared.h" 150 97
+ 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10001 "00001101" // /* MW 3 */
+ 10002 "00000100" // /* MW 2 */
+ 10003 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 97 first
+ 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10005 "01000111" // /* MW 3 */
+ 10006 "10000100" // /* MW 2 */
+ 10007 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */
+ 10009 "00000001" // /* MW 5 */
+ 10010 "01000000" // /* MW 4 */
+ 10011 "10100000" // /* MW 3 */
+ 10012 "00010011" // /* MW 2 */
+ 10013 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10015 "00000000" // /* MW 5 */
+ 10016 "00100000" // /* MW 4 */
+ 10017 "00000000" // /* MW 3 */
+ 10018 "10000000" // /* MW 2 */
+ 10019 "00111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10021 "11010000" // /* MW 5 */
+ 10022 "11001000" // /* MW 4 */
+ 10023 "11001000" // /* MW 3 */
+ 10024 "00000111" // /* MW 2 */
+ 10025 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10027 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10029 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10031 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10033 "00000000" // /* MW 15 */
+ 10034 "00000000" // /* MW 14 */
+ 10035 "00010000" // /* MW 13 */
+ 10036 "00000000" // /* MW 12 */
+ 10037 "00001000" // /* MW 11 */
+ 10038 "00000000" // /* MW 10 */
+ 10039 "11100000" // /* MW 9 */
+ 10040 "00101111" // /* MW 8 */
+ 10041 "01011011" // /* MW 7 */
+ 10042 "00000001" // /* MW 6 */
+ 10043 "00100000" // /* MW 5 */
+ 10044 "00000000" // /* MW 4 */
+ 10045 "11110000" // /* MW 3 */
+ 10046 "00101100" // /* MW 2 */
+ 10047 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10049 "01011000" // /* MW 9 */
+ 10050 "10111110" // /* MW 8 */
+ 10051 "01000111" // /* MW 7 */
+ 10052 "00000000" // /* MW 6 */
+ 10053 "11010010" // /* MW 5 */
+ 10054 "00000010" // /* MW 4 */
+ 10055 "01010000" // /* MW 3 */
+ 10056 "10000000" // /* MW 2 */
+ 10057 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10059 "10000000" // /* MW 3 */
+ 10060 "00000000" // /* MW 2 */
+ 10061 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10063 "00000000" // /* MW 3 */
+ 10064 "00000000" // /* MW 2 */
+ 10065 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10067 "10000000" // /* MW 3 */
+ 10068 "00000000" // /* MW 2 */
+ 10069 "00011010" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10071 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10073 "00010001" // /* MW 3 */
+ 10074 "00000000" // /* MW 2 */
+ 10075 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10077 "00100101" // /* MW 5 */
+ 10078 "00000001" // /* MW 4 */
+ 10079 "11100010" // /* MW 3 */
+ 10080 "00000010" // /* MW 2 */
+ 10081 "10100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10083 "10000000" // /* MW 3 */
+ 10084 "00111010" // /* MW 2 */
+ 10085 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10087 "10010110" // /* MW 3 */
+ 10088 "01000000" // /* MW 2 */
+ 10089 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10091 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10093 "00000001" // /* MW 3 */
+ 10094 "00000001" // /* MW 2 */
+ 10095 "00011000" // /* MW 1 */
+ 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10097 "00000000" // /* MW 1 */
+ 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10099 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10101 "00010010" // /* MW 3 */
+ 10102 "00000000" // /* MW 2 */
+ 10103 "00000101" // /* MW 1 */
+ 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10105 "00000000" // /* MW 1 */
+ 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10107 "00000000" // /* MW 1 */
+ 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10109 "00000000" // /* MW 1 */
+ 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10111 "00000000" // /* MW 1 */
+ 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10113 "00000000" // /* MW 1 */
+ 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10115 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10117 "01110010" // /* MW 3 */
+ 10118 "00000001" // /* MW 2 */
+ 10119 "00011000" // /* MW 1 */
+ 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10121 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10123 "01100110" // /* MW 5 */
+ 10124 "11111000" // /* MW 4 */
+ 10125 "11111111" // /* MW 3 */
+ 10126 "00101100" // /* MW 2 */
+ 10127 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 166 4 first
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+ 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10129 "00010000" // /* MW 11 */
+ 10130 "00000000" // /* MW 10 */
+ 10131 "01111100" // /* MW 9 */
+ 10132 "00001000" // /* MW 8 */
+ 10133 "00000000" // /* MW 7 */
+ 10134 "00000000" // /* MW 6 */
+ 10135 "11101000" // /* MW 5 */
+ 10136 "01010000" // /* MW 4 */
+ 10137 "11011110" // /* MW 3 */
+ 10138 "10001010" // /* MW 2 */
+ 10139 "01111000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10141 "00010000" // /* MW 11 */
+ 10142 "00011000" // /* MW 10 */
+ 10143 "10111100" // /* MW 9 */
+ 10144 "00001001" // /* MW 8 */
+ 10145 "00000000" // /* MW 7 */
+ 10146 "00000000" // /* MW 6 */
+ 10147 "01101000" // /* MW 5 */
+ 10148 "10010000" // /* MW 4 */
+ 10149 "00000010" // /* MW 3 */
+ 10150 "01100011" // /* MW 2 */
+ 10151 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 177 44
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10153 "11110001" // /* MW 7 */
+ 10154 "00000000" // /* MW 6 */
+ 10155 "11101000" // /* MW 5 */
+ 10156 "01010000" // /* MW 4 */
+ 10157 "01111110" // /* MW 3 */
+ 10158 "00000101" // /* MW 2 */
+ 10159 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10161 "01101000" // /* MW 5 */
+ 10162 "10010000" // /* MW 4 */
+ 10163 "01010010" // /* MW 3 */
+ 10164 "10010000" // /* MW 2 */
+ 10165 "10000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10167 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10169 "00101011" // /* MW 3 */
+ 10170 "00001000" // /* MW 2 */
+ 10171 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10175 "00111101" // /* MW 3 */
+ 10176 "10000100" // /* MW 2 */
+ 10177 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10179 "00000001" // /* MW 7 */
+ 10180 "00000010" // /* MW 6 */
+ 10181 "00000001" // /* MW 5 */
+ 10182 "10000110" // /* MW 4 */
+ 10183 "01111110" // /* MW 3 */
+ 10184 "01110001" // /* MW 2 */
+ 10185 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10187 "11101000" // /* MW 5 */
+ 10188 "01010000" // /* MW 4 */
+ 10189 "01111110" // /* MW 3 */
+ 10190 "00000011" // /* MW 2 */
+ 10191 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10193 "00000000" // /* MW 15 */
+ 10194 "00000000" // /* MW 14 */
+ 10195 "01111000" // /* MW 13 */
+ 10196 "10100101" // /* MW 12 */
+ 10197 "00000001" // /* MW 11 */
+ 10198 "00000000" // /* MW 10 */
+ 10199 "11010100" // /* MW 9 */
+ 10200 "00001001" // /* MW 8 */
+ 10201 "01011011" // /* MW 7 */
+ 10202 "00000001" // /* MW 6 */
+ 10203 "00100000" // /* MW 5 */
+ 10204 "00000000" // /* MW 4 */
+ 10205 "01110000" // /* MW 3 */
+ 10206 "00000101" // /* MW 2 */
+ 10207 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10209 "00000000" // /* MW 15 */
+ 10210 "00000000" // /* MW 14 */
+ 10211 "01111000" // /* MW 13 */
+ 10212 "10100101" // /* MW 12 */
+ 10213 "00000001" // /* MW 11 */
+ 10214 "00000000" // /* MW 10 */
+ 10215 "00000000" // /* MW 9 */
+ 10216 "00000000" // /* MW 8 */
+ 10217 "01011011" // /* MW 7 */
+ 10218 "00000001" // /* MW 6 */
+ 10219 "00100000" // /* MW 5 */
+ 10220 "00000000" // /* MW 4 */
+ 10221 "11110000" // /* MW 3 */
+ 10222 "00101100" // /* MW 2 */
+ 10223 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10225 "00010000" // /* MW 15 */
+ 10226 "00001000" // /* MW 14 */
+ 10227 "01111000" // /* MW 13 */
+ 10228 "10100101" // /* MW 12 */
+ 10229 "00000001" // /* MW 11 */
+ 10230 "00000000" // /* MW 10 */
+ 10231 "00000000" // /* MW 9 */
+ 10232 "00000000" // /* MW 8 */
+ 10233 "01011011" // /* MW 7 */
+ 10234 "00000001" // /* MW 6 */
+ 10235 "00100000" // /* MW 5 */
+ 10236 "00000000" // /* MW 4 */
+ 10237 "11110000" // /* MW 3 */
+ 10238 "00101100" // /* MW 2 */
+ 10239 "00000000" // /* MW 1 */
+.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10241 "00000000" // /* MW 15 */
+ 10242 "00000000" // /* MW 14 */
+ 10243 "01111000" // /* MW 13 */
+ 10244 "10100101" // /* MW 12 */
+ 10245 "00000001" // /* MW 11 */
+ 10246 "00000000" // /* MW 10 */
+ 10247 "00000000" // /* MW 9 */
+ 10248 "00000000" // /* MW 8 */
+ 10249 "01011011" // /* MW 7 */
+ 10250 "00000001" // /* MW 6 */
+ 10251 "11101000" // /* MW 5 */
+ 10252 "01010000" // /* MW 4 */
+ 10253 "01111110" // /* MW 3 */
+ 10254 "00000011" // /* MW 2 */
+ 10255 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10257 "00000000" // /* MW 15 */
+ 10258 "00000000" // /* MW 14 */
+ 10259 "01111000" // /* MW 13 */
+ 10260 "10100101" // /* MW 12 */
+ 10261 "00000001" // /* MW 11 */
+ 10262 "00000000" // /* MW 10 */
+ 10263 "00000000" // /* MW 9 */
+ 10264 "00000000" // /* MW 8 */
+ 10265 "10100011" // /* MW 7 */
+ 10266 "00011100" // /* MW 6 */
+ 10267 "00100010" // /* MW 5 */
+ 10268 "00000000" // /* MW 4 */
+ 10269 "01110000" // /* MW 3 */
+ 10270 "00000101" // /* MW 2 */
+ 10271 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10273 "00000000" // /* MW 15 */
+ 10274 "00000000" // /* MW 14 */
+ 10275 "01111000" // /* MW 13 */
+ 10276 "10100101" // /* MW 12 */
+ 10277 "00000001" // /* MW 11 */
+ 10278 "00000000" // /* MW 10 */
+ 10279 "00000000" // /* MW 9 */
+ 10280 "00000000" // /* MW 8 */
+ 10281 "01011011" // /* MW 7 */
+ 10282 "00000001" // /* MW 6 */
+ 10283 "00100000" // /* MW 5 */
+ 10284 "00000000" // /* MW 4 */
+ 10285 "11110000" // /* MW 3 */
+ 10286 "00101100" // /* MW 2 */
+ 10287 "00000000" // /* MW 1 */
+.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10289 "00010000" // /* MW 15 */
+ 10290 "00001000" // /* MW 14 */
+ 10291 "01111000" // /* MW 13 */
+ 10292 "10100101" // /* MW 12 */
+ 10293 "00000001" // /* MW 11 */
+ 10294 "00000000" // /* MW 10 */
+ 10295 "00000000" // /* MW 9 */
+ 10296 "00000000" // /* MW 8 */
+ 10297 "01011011" // /* MW 7 */
+ 10298 "00000001" // /* MW 6 */
+ 10299 "00100000" // /* MW 5 */
+ 10300 "00000000" // /* MW 4 */
+ 10301 "11110000" // /* MW 3 */
+ 10302 "00101100" // /* MW 2 */
+ 10303 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10305 "00000001" // /* MW 5 */
+ 10306 "00000000" // /* MW 4 */
+ 10307 "00000000" // /* MW 3 */
+ 10308 "11110000" // /* MW 2 */
+ 10309 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10311 "10100011" // /* MW 3 */
+ 10312 "00011100" // /* MW 2 */
+ 10313 "00001010" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10315 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10317 "00000001" // /* MW 3 */
+ 10318 "00000010" // /* MW 2 */
+ 10319 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10321 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10323 "00000000" // /* MW 3 */
+ 10324 "00101000" // /* MW 2 */
+ 10325 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10327 "10100011" // /* MW 3 */
+ 10328 "00011100" // /* MW 2 */
+ 10329 "00001010" // /* MW 1 */
+.delay_slot
+ 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10331 "10100000" // /* MW 3 */
+ 10332 "01100000" // /* MW 2 */
+ 10333 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10335 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.delay_slot
+ 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10337 "10100011" // /* MW 3 */
+ 10338 "00011100" // /* MW 2 */
+ 10339 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0
+ 10341 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 3 "elementwise_binary_shared.h" 237 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.function_start
+ 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10353 "01110010" // /* MW 9 */
+ 10354 "11110000" // /* MW 8 */
+ 10355 "01100000" // /* MW 7 */
+ 10356 "00000000" // /* MW 6 */
+ 10357 "10001011" // /* MW 5 */
+ 10358 "10001000" // /* MW 4 */
+ 10359 "10000011" // /* MW 3 */
+ 10360 "10000010" // /* MW 2 */
+ 10361 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19 first
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+ 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10363 "10000001" // /* MW 5 */
+ 10364 "11000101" // /* MW 4 */
+ 10365 "01010100" // /* MW 3 */
+ 10366 "00000001" // /* MW 2 */
+ 10367 "01000000" // /* MW 1 */
+ 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10369 "00000000" // /* MW 1 */
+ 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10371 "00000000" // /* MW 1 */
+ 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10373 "00000000" // /* MW 1 */
+ 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10375 "00000000" // /* MW 1 */
+ 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10377 "00000000" // /* MW 1 */
+ 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10379 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 244 12
+.src_ref 3 "elementwise_binary_shared.h" 244 35
+ 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */
+ 10381 "00000001" // /* MW 5 */
+ 10382 "00000000" // /* MW 4 */
+ 10383 "01101000" // /* MW 3 */
+ 10384 "00010100" // /* MW 2 */
+ 10385 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 237
+.delay_slot
+ 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10387 "00000001" // /* MW 5 */
+ 10388 "00000000" // /* MW 4 */
+ 10389 "00000000" // /* MW 3 */
+ 10390 "00001000" // /* MW 2 */
+ 10391 "00000000" // /* MW 1 */
+.delay_slot
+ 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10393 "11100000" // /* MW 3 */
+ 10394 "01010101" // /* MW 2 */
+ 10395 "00011000" // /* MW 1 */
+.delay_slot
+ 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10397 "11100000" // /* MW 3 */
+ 10398 "01100000" // /* MW 2 */
+ 10399 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+.delay_slot
+ 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10401 "00101011" // /* MW 3 */
+ 10402 "00000111" // /* MW 2 */
+ 10403 "00001001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10405 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 247 12 first
+.no_stack_arguments
+ 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10407 "00000001" // /* MW 5 */
+ 10408 "00000000" // /* MW 4 */
+ 10409 "00101000" // /* MW 3 */
+ 10410 "00010011" // /* MW 2 */
+ 10411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10421 "10000001" // /* MW 11 */
+ 10422 "10101101" // /* MW 10 */
+ 10423 "00000000" // /* MW 9 */
+ 10424 "00000000" // /* MW 8 */
+ 10425 "00000000" // /* MW 7 */
+ 10426 "00000000" // /* MW 6 */
+ 10427 "00100000" // /* MW 5 */
+ 10428 "00000000" // /* MW 4 */
+ 10429 "11110000" // /* MW 3 */
+ 10430 "00101100" // /* MW 2 */
+ 10431 "00000000" // /* MW 1 */
+.return_address
+ 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 10433 "00000000" // /* MW 5 */
+ 10434 "00000000" // /* MW 4 */
+ 10435 "01111000" // /* MW 3 */
+ 10436 "00010100" // /* MW 2 */
+ 10437 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10447 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96
+.src_ref 3 "elementwise_binary_shared.h" 245 12 first
+.no_stack_arguments
+ 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10449 "00000001" // /* MW 5 */
+ 10450 "00000000" // /* MW 4 */
+ 10451 "00101000" // /* MW 3 */
+ 10452 "00010011" // /* MW 2 */
+ 10453 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.delay_slot
+ 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10455 "01110000" // /* MW 7 */
+ 10456 "01100000" // /* MW 6 */
+ 10457 "10110000" // /* MW 5 */
+ 10458 "00000000" // /* MW 4 */
+ 10459 "01100000" // /* MW 3 */
+ 10460 "10010001" // /* MW 2 */
+ 10461 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10463 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10469 "10000001" // /* MW 11 */
+ 10470 "10101101" // /* MW 10 */
+ 10471 "00000000" // /* MW 9 */
+ 10472 "00000000" // /* MW 8 */
+ 10473 "00000000" // /* MW 7 */
+ 10474 "00000000" // /* MW 6 */
+ 10475 "00100000" // /* MW 5 */
+ 10476 "00000000" // /* MW 4 */
+ 10477 "11110000" // /* MW 3 */
+ 10478 "00101100" // /* MW 2 */
+ 10479 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.return_address
+ 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10481 "10000000" // /* MW 3 */
+ 10482 "01110001" // /* MW 2 */
+ 10483 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4 first
+ 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10485 "00000000" // /* MW 3 */
+ 10486 "00101000" // /* MW 2 */
+ 10487 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.delay_slot
+ 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10489 "00000001" // /* MW 5 */
+ 10490 "00000000" // /* MW 4 */
+ 10491 "00000000" // /* MW 3 */
+ 10492 "11111000" // /* MW 2 */
+ 10493 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10495 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0
+ 10501 "00000000" // /* MW 1 */
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 205 first
+.src_ref 7 "superkernels.cpp" 210 6
+.function_start
+ 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10513 "10000000" // /* MW 5 */
+ 10514 "11001000" // /* MW 4 */
+ 10515 "11000110" // /* MW 3 */
+ 10516 "00000111" // /* MW 2 */
+ 10517 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6 first
+ 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10519 "11000001" // /* MW 5 */
+ 10520 "10110101" // /* MW 4 */
+ 10521 "11011000" // /* MW 3 */
+ 10522 "11000010" // /* MW 2 */
+ 10523 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 205
+ 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10525 "00000001" // /* MW 5 */
+ 10526 "00000000" // /* MW 4 */
+ 10527 "00000000" // /* MW 3 */
+ 10528 "00001000" // /* MW 2 */
+ 10529 "00000000" // /* MW 1 */
+ 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10531 "01110000" // /* MW 7 */
+ 10532 "11010000" // /* MW 6 */
+ 10533 "00001011" // /* MW 5 */
+ 10534 "00000000" // /* MW 4 */
+ 10535 "10110000" // /* MW 3 */
+ 10536 "01100011" // /* MW 2 */
+ 10537 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+ 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10539 "00010001" // /* MW 9 */
+ 10540 "00101000" // /* MW 8 */
+ 10541 "00110010" // /* MW 7 */
+ 10542 "11110011" // /* MW 6 */
+ 10543 "00000001" // /* MW 5 */
+ 10544 "00000000" // /* MW 4 */
+ 10545 "10110000" // /* MW 3 */
+ 10546 "10000010" // /* MW 2 */
+ 10547 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10549 "11000000" // /* MW 3 */
+ 10550 "11010100" // /* MW 2 */
+ 10551 "00011011" // /* MW 1 */
+ 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10553 "00000000" // /* MW 1 */
+ 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6
+.src_ref 7 "superkernels.cpp" 210 16
+ 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */
+ 10557 "00000001" // /* MW 5 */
+ 10558 "01000000" // /* MW 4 */
+ 10559 "11110000" // /* MW 3 */
+ 10560 "00010100" // /* MW 2 */
+ 10561 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 22 first
+.delay_slot
+ 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10563 "10010000" // /* MW 3 */
+ 10564 "01100010" // /* MW 2 */
+ 10565 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 30
+.delay_slot
+ 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10567 "11111011" // /* MW 3 */
+ 10568 "01100011" // /* MW 2 */
+ 10569 "00010100" // /* MW 1 */
+.delay_slot
+ 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10571 "00111101" // /* MW 3 */
+ 10572 "11110100" // /* MW 2 */
+ 10573 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10575 "01110000" // /* MW 7 */
+ 10576 "01100000" // /* MW 6 */
+ 10577 "00110000" // /* MW 5 */
+ 10578 "00000011" // /* MW 4 */
+ 10579 "00110000" // /* MW 3 */
+ 10580 "11000110" // /* MW 2 */
+ 10581 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4
+.src_ref 7 "superkernels.cpp" 224 2
+.delay_slot
+ 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10583 "00000000" // /* MW 5 */
+ 10584 "11001010" // /* MW 4 */
+ 10585 "11000000" // /* MW 3 */
+ 10586 "00000111" // /* MW 2 */
+ 10587 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10589 "11010000" // /* MW 5 */
+ 10590 "11001000" // /* MW 4 */
+ 10591 "11000100" // /* MW 3 */
+ 10592 "00000111" // /* MW 2 */
+ 10593 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10595 "00010000" // /* MW 9 */
+ 10596 "00110010" // /* MW 8 */
+ 10597 "00110010" // /* MW 7 */
+ 10598 "11110001" // /* MW 6 */
+ 10599 "00000001" // /* MW 5 */
+ 10600 "00000000" // /* MW 4 */
+ 10601 "11100000" // /* MW 3 */
+ 10602 "11000000" // /* MW 2 */
+ 10603 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10605 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */
+ 10607 "00000001" // /* MW 5 */
+ 10608 "00000000" // /* MW 4 */
+ 10609 "00000000" // /* MW 3 */
+ 10610 "00010011" // /* MW 2 */
+ 10611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10615 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10617 "00110001" // /* MW 3 */
+ 10618 "00100000" // /* MW 2 */
+ 10619 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10621 "00000101" // /* MW 3 */
+ 10622 "00100000" // /* MW 2 */
+ 10623 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10625 "00000000" // /* MW 15 */
+ 10626 "00000000" // /* MW 14 */
+ 10627 "01111000" // /* MW 13 */
+ 10628 "10100101" // /* MW 12 */
+ 10629 "00000001" // /* MW 11 */
+ 10630 "00000000" // /* MW 10 */
+ 10631 "00000000" // /* MW 9 */
+ 10632 "10000000" // /* MW 8 */
+ 10633 "00010001" // /* MW 7 */
+ 10634 "00000110" // /* MW 6 */
+ 10635 "00100010" // /* MW 5 */
+ 10636 "00000000" // /* MW 4 */
+ 10637 "11110000" // /* MW 3 */
+ 10638 "00101100" // /* MW 2 */
+ 10639 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18
+.return_address
+ 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10641 "10100000" // /* MW 5 */
+ 10642 "11001000" // /* MW 4 */
+ 10643 "11000100" // /* MW 3 */
+ 10644 "00000111" // /* MW 2 */
+ 10645 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18 first
+.src_ref 7 "superkernels.cpp" 217 65
+ 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10647 "00010000" // /* MW 9 */
+ 10648 "10000000" // /* MW 8 */
+ 10649 "00110010" // /* MW 7 */
+ 10650 "11110001" // /* MW 6 */
+ 10651 "00000001" // /* MW 5 */
+ 10652 "00000000" // /* MW 4 */
+ 10653 "11010000" // /* MW 3 */
+ 10654 "11000010" // /* MW 2 */
+ 10655 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51
+.src_ref 7 "superkernels.cpp" 217 65
+.src_ref 7 "superkernels.cpp" 224 2
+ 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10657 "00010000" // /* MW 9 */
+ 10658 "10000000" // /* MW 8 */
+ 10659 "00110010" // /* MW 7 */
+ 10660 "11110001" // /* MW 6 */
+ 10661 "00000001" // /* MW 5 */
+ 10662 "00000000" // /* MW 4 */
+ 10663 "11010000" // /* MW 3 */
+ 10664 "11000110" // /* MW 2 */
+ 10665 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51 first
+.src_ref 7 "superkernels.cpp" 217 16
+.src_ref 7 "superkernels.cpp" 222 47
+ 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10667 "00010000" // /* MW 9 */
+ 10668 "00101010" // /* MW 8 */
+ 10669 "10110010" // /* MW 7 */
+ 10670 "11110000" // /* MW 6 */
+ 10671 "00000001" // /* MW 5 */
+ 10672 "00000000" // /* MW 4 */
+ 10673 "01010000" // /* MW 3 */
+ 10674 "11001011" // /* MW 2 */
+ 10675 "01001010" // /* MW 1 */
+ 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10677 "00000000" // /* MW 1 */
+ 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10679 "00000000" // /* MW 1 */
+ 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */
+ 10681 "00000000" // /* MW 5 */
+ 10682 "00000000" // /* MW 4 */
+ 10683 "11111000" // /* MW 3 */
+ 10684 "00010100" // /* MW 2 */
+ 10685 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13
+.delay_slot
+ 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10687 "11000000" // /* MW 5 */
+ 10688 "11001000" // /* MW 4 */
+ 10689 "11000000" // /* MW 3 */
+ 10690 "00000111" // /* MW 2 */
+ 10691 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10693 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 27 first
+.delay_slot
+ 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10695 "00001111" // /* MW 3 */
+ 10696 "01100001" // /* MW 2 */
+ 10697 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13 first
+.delay_slot
+ 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10699 "10100011" // /* MW 5 */
+ 10700 "00001100" // /* MW 4 */
+ 10701 "11110000" // /* MW 3 */
+ 10702 "00101100" // /* MW 2 */
+ 10703 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 16 first
+.delay_slot
+ 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10705 "00000000" // /* MW 15 */
+ 10706 "00000000" // /* MW 14 */
+ 10707 "01111000" // /* MW 13 */
+ 10708 "10100101" // /* MW 12 */
+ 10709 "00000001" // /* MW 11 */
+ 10710 "00000000" // /* MW 10 */
+ 10711 "00000000" // /* MW 9 */
+ 10712 "10000000" // /* MW 8 */
+ 10713 "00010001" // /* MW 7 */
+ 10714 "00000110" // /* MW 6 */
+ 10715 "00100001" // /* MW 5 */
+ 10716 "00000000" // /* MW 4 */
+ 10717 "11110000" // /* MW 3 */
+ 10718 "00101100" // /* MW 2 */
+ 10719 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 222 47
+.src_ref 7 "superkernels.cpp" 224 2
+ 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10721 "00000000" // /* MW 15 */
+ 10722 "00000000" // /* MW 14 */
+ 10723 "00010000" // /* MW 13 */
+ 10724 "00101010" // /* MW 12 */
+ 10725 "10110010" // /* MW 11 */
+ 10726 "11110000" // /* MW 10 */
+ 10727 "00000001" // /* MW 9 */
+ 10728 "00000000" // /* MW 8 */
+ 10729 "10001011" // /* MW 7 */
+ 10730 "10000000" // /* MW 6 */
+ 10731 "00100010" // /* MW 5 */
+ 10732 "00000000" // /* MW 4 */
+ 10733 "11110000" // /* MW 3 */
+ 10734 "00101100" // /* MW 2 */
+ 10735 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10737 "00000000" // /* MW 7 */
+ 10738 "11000011" // /* MW 6 */
+ 10739 "10110011" // /* MW 5 */
+ 10740 "00000011" // /* MW 4 */
+ 10741 "01100000" // /* MW 3 */
+ 10742 "10010001" // /* MW 2 */
+ 10743 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10745 "00010000" // /* MW 9 */
+ 10746 "00100000" // /* MW 8 */
+ 10747 "00110010" // /* MW 7 */
+ 10748 "11110000" // /* MW 6 */
+ 10749 "00000001" // /* MW 5 */
+ 10750 "00000000" // /* MW 4 */
+ 10751 "11010000" // /* MW 3 */
+ 10752 "11101110" // /* MW 2 */
+ 10753 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10755 "00010110" // /* MW 3 */
+ 10756 "11111110" // /* MW 2 */
+ 10757 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10759 "00110110" // /* MW 3 */
+ 10760 "11111110" // /* MW 2 */
+ 10761 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10763 "01010110" // /* MW 3 */
+ 10764 "01000110" // /* MW 2 */
+ 10765 "00000111" // /* MW 1 */
+ 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10767 "00000000" // /* MW 1 */
+ 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10769 "00000000" // /* MW 1 */
+ 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10771 "00000000" // /* MW 1 */
+ 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10773 "00000000" // /* MW 1 */
+ 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10775 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10777 "00000010" // /* MW 3 */
+ 10778 "01100001" // /* MW 2 */
+ 10779 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10781 "00010001" // /* MW 3 */
+ 10782 "00000110" // /* MW 2 */
+ 10783 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10785 "11111101" // /* MW 3 */
+ 10786 "11100000" // /* MW 2 */
+ 10787 "00010111" // /* MW 1 */
+ 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10789 "00000000" // /* MW 1 */
+ 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10791 "00000000" // /* MW 1 */
+ 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10793 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10795 "00001000" // /* MW 3 */
+ 10796 "10010011" // /* MW 2 */
+ 10797 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+ 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10799 "10000001" // /* MW 5 */
+ 10800 "10101101" // /* MW 4 */
+ 10801 "10100111" // /* MW 3 */
+ 10802 "00000000" // /* MW 2 */
+ 10803 "00000100" // /* MW 1 */
+ 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10805 "00000000" // /* MW 1 */
+ 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10807 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+ 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10809 "00110110" // /* MW 3 */
+ 10810 "00000110" // /* MW 2 */
+ 10811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10813 "10000001" // /* MW 5 */
+ 10814 "11011101" // /* MW 4 */
+ 10815 "11011100" // /* MW 3 */
+ 10816 "11001010" // /* MW 2 */
+ 10817 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 47 first
+ 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10819 "01110110" // /* MW 3 */
+ 10820 "00000110" // /* MW 2 */
+ 10821 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10823 "10011110" // /* MW 3 */
+ 10824 "01011100" // /* MW 2 */
+ 10825 "00000111" // /* MW 1 */
+ 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10827 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 224 2 first
+.no_stack_arguments
+ 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */
+ 10829 "00000001" // /* MW 5 */
+ 10830 "00000000" // /* MW 4 */
+ 10831 "00111000" // /* MW 3 */
+ 10832 "00010100" // /* MW 2 */
+ 10833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10835 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+.delay_slot
+ 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10837 "00000111" // /* MW 3 */
+ 10838 "01100010" // /* MW 2 */
+ 10839 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.delay_slot
+ 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10841 "00110001" // /* MW 3 */
+ 10842 "00000110" // /* MW 2 */
+ 10843 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45 first
+.delay_slot
+ 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10845 "00001101" // /* MW 3 */
+ 10846 "11100001" // /* MW 2 */
+ 10847 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+.delay_slot
+ 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10849 "00000000" // /* MW 15 */
+ 10850 "00000000" // /* MW 14 */
+ 10851 "10101000" // /* MW 13 */
+ 10852 "10100000" // /* MW 12 */
+ 10853 "00110100" // /* MW 11 */
+ 10854 "00000000" // /* MW 10 */
+ 10855 "00000000" // /* MW 9 */
+ 10856 "00000000" // /* MW 8 */
+ 10857 "01011011" // /* MW 7 */
+ 10858 "00000001" // /* MW 6 */
+ 10859 "00100000" // /* MW 5 */
+ 10860 "00000000" // /* MW 4 */
+ 10861 "11110000" // /* MW 3 */
+ 10862 "00101100" // /* MW 2 */
+ 10863 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+.src_ref 7 "superkernels.cpp" 227 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10865 "00010000" // /* MW 9 */
+ 10866 "00100000" // /* MW 8 */
+ 10867 "00110010" // /* MW 7 */
+ 10868 "11110011" // /* MW 6 */
+ 10869 "00000001" // /* MW 5 */
+ 10870 "00000000" // /* MW 4 */
+ 10871 "11010000" // /* MW 3 */
+ 10872 "11000110" // /* MW 2 */
+ 10873 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10875 "00000101" // /* MW 3 */
+ 10876 "00100000" // /* MW 2 */
+ 10877 "00010000" // /* MW 1 */
+ 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10879 "00000000" // /* MW 1 */
+ 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10881 "00000000" // /* MW 1 */
+ 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10883 "00000000" // /* MW 1 */
+ 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10885 "00000000" // /* MW 1 */
+ 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10887 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10889 "00001000" // /* MW 3 */
+ 10890 "01010001" // /* MW 2 */
+ 10891 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10893 "00010000" // /* MW 9 */
+ 10894 "00110000" // /* MW 8 */
+ 10895 "00110010" // /* MW 7 */
+ 10896 "11110001" // /* MW 6 */
+ 10897 "00000001" // /* MW 5 */
+ 10898 "00000000" // /* MW 4 */
+ 10899 "11010000" // /* MW 3 */
+ 10900 "11001110" // /* MW 2 */
+ 10901 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6 first
+ 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10903 "00110110" // /* MW 3 */
+ 10904 "00000110" // /* MW 2 */
+ 10905 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+ 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10907 "01010110" // /* MW 3 */
+ 10908 "00000110" // /* MW 2 */
+ 10909 "00000010" // /* MW 1 */
+ 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10911 "00000000" // /* MW 1 */
+ 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10913 "00000000" // /* MW 1 */
+ 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10915 "00000000" // /* MW 1 */
+ 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10917 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10919 "00110001" // /* MW 3 */
+ 10920 "00100001" // /* MW 2 */
+ 10921 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10923 "00010001" // /* MW 3 */
+ 10924 "11100110" // /* MW 2 */
+ 10925 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 16 first
+ 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10927 "00101000" // /* MW 3 */
+ 10928 "01100001" // /* MW 2 */
+ 10929 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+ 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */
+ 10931 "00000001" // /* MW 5 */
+ 10932 "01000000" // /* MW 4 */
+ 10933 "01101000" // /* MW 3 */
+ 10934 "00010101" // /* MW 2 */
+ 10935 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14
+ 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10947 "00000001" // /* MW 3 */
+ 10948 "00100000" // /* MW 2 */
+ 10949 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14 first
+ 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10951 "00000000" // /* MW 9 */
+ 10952 "00000000" // /* MW 8 */
+ 10953 "00000000" // /* MW 7 */
+ 10954 "10000000" // /* MW 6 */
+ 10955 "00010001" // /* MW 5 */
+ 10956 "00000110" // /* MW 4 */
+ 10957 "11110110" // /* MW 3 */
+ 10958 "00101100" // /* MW 2 */
+ 10959 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 229
+ 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10961 "00111001" // /* MW 3 */
+ 10962 "11110100" // /* MW 2 */
+ 10963 "00000111" // /* MW 1 */
+ 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10965 "00011001" // /* MW 3 */
+ 10966 "11111011" // /* MW 2 */
+ 10967 "00000111" // /* MW 1 */
+ 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10969 "00000000" // /* MW 1 */
+ 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10971 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10973 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10975 "11110001" // /* MW 3 */
+ 10976 "11111101" // /* MW 2 */
+ 10977 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10979 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10981 "00000000" // /* MW 3 */
+ 10982 "00101000" // /* MW 2 */
+ 10983 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10985 "10100000" // /* MW 3 */
+ 10986 "01100111" // /* MW 2 */
+ 10987 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229
+.delay_slot
+ 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10989 "00000001" // /* MW 5 */
+ 10990 "00000000" // /* MW 4 */
+ 10991 "00000000" // /* MW 3 */
+ 10992 "11111000" // /* MW 2 */
+ 10993 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10995 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10997 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 10999 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 205 first
+.src_ref 3 "elementwise_binary_shared.h" 211 24 first
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.function_start
+ 11008 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11009 "01011000" // /* MW 9 */
+ 11010 "00000000" // /* MW 8 */
+ 11011 "00001000" // /* MW 7 */
+ 11012 "00001011" // /* MW 6 */
+ 11013 "00100000" // /* MW 5 */
+ 11014 "00001000" // /* MW 4 */
+ 11015 "11010000" // /* MW 3 */
+ 11016 "10000101" // /* MW 2 */
+ 11017 "00100011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+ 11018 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11019 "00000001" // /* MW 3 */
+ 11020 "10000000" // /* MW 2 */
+ 11021 "00010111" // /* MW 1 */
+ 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11023 "00000000" // /* MW 1 */
+ 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11025 "00000000" // /* MW 1 */
+ 11026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11027 "00000000" // /* MW 1 */
+ 11028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11029 "00000000" // /* MW 1 */
+ 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11031 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 211 22 first
+ 11032 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11033 "00101001" // /* MW 3 */
+ 11034 "00011100" // /* MW 2 */
+ 11035 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 24 first
+ 11036 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11037 "00101110" // /* MW 3 */
+ 11038 "00011100" // /* MW 2 */
+ 11039 "00000001" // /* MW 1 */
+ 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11041 "00000000" // /* MW 1 */
+ 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11043 "00000000" // /* MW 1 */
+ 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11045 "00000000" // /* MW 1 */
+ 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11047 "00000000" // /* MW 1 */
+ 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11049 "00000000" // /* MW 1 */
+ 11050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11051 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 22
+ 11052 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11053 "00101001" // /* MW 3 */
+ 11054 "00011100" // /* MW 2 */
+ 11055 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 24 first
+ 11056 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11057 "00101110" // /* MW 3 */
+ 11058 "00000100" // /* MW 2 */
+ 11059 "00000001" // /* MW 1 */
+ 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11061 "00000000" // /* MW 1 */
+ 11062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11063 "00000000" // /* MW 1 */
+ 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11065 "00000000" // /* MW 1 */
+ 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11067 "00000000" // /* MW 1 */
+ 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11069 "00000000" // /* MW 1 */
+ 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11071 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 22
+ 11072 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11073 "00101001" // /* MW 3 */
+ 11074 "00011100" // /* MW 2 */
+ 11075 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 24 first
+ 11076 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11077 "01110110" // /* MW 3 */
+ 11078 "00010100" // /* MW 2 */
+ 11079 "00000001" // /* MW 1 */
+ 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11081 "00000000" // /* MW 1 */
+ 11082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11083 "00000000" // /* MW 1 */
+ 11084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11085 "00000000" // /* MW 1 */
+ 11086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11087 "00000000" // /* MW 1 */
+ 11088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11089 "00000000" // /* MW 1 */
+ 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11091 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 22
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11092 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11093 "01110001" // /* MW 3 */
+ 11094 "01001100" // /* MW 2 */
+ 11095 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 34 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11096 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11097 "00010111" // /* MW 3 */
+ 11098 "00000100" // /* MW 2 */
+ 11099 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 217 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11100 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11101 "00000000" // /* MW 3 */
+ 11102 "00101000" // /* MW 2 */
+ 11103 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11104 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11105 "00000000" // /* MW 5 */
+ 11106 "10111110" // /* MW 4 */
+ 11107 "11110000" // /* MW 3 */
+ 11108 "00000000" // /* MW 2 */
+ 11109 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11110 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11111 "00010100" // /* MW 3 */
+ 11112 "11000010" // /* MW 2 */
+ 11113 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11114 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11115 "00100111" // /* MW 3 */
+ 11116 "01110110" // /* MW 2 */
+ 11117 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11118 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11119 "10000010" // /* MW 3 */
+ 11120 "00000001" // /* MW 2 */
+ 11121 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 11123 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 219
+.src_ref 3 "elementwise_binary_shared.h" 219 first
+.function_start
+ 11136 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11137 "00000001" // /* MW 5 */
+ 11138 "00000000" // /* MW 4 */
+ 11139 "00000000" // /* MW 3 */
+ 11140 "00001000" // /* MW 2 */
+ 11141 "00000000" // /* MW 1 */
+ 11142 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11143 "00111101" // /* MW 3 */
+ 11144 "11111000" // /* MW 2 */
+ 11145 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8 first
+.no_stack_arguments
+ 11146 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */
+ 11147 "00000001" // /* MW 5 */
+ 11148 "00000000" // /* MW 4 */
+ 11149 "10000000" // /* MW 3 */
+ 11150 "00010101" // /* MW 2 */
+ 11151 "00000000" // /* MW 1 */
+.delay_slot
+ 11152 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11153 "10100000" // /* MW 3 */
+ 11154 "00010111" // /* MW 2 */
+ 11155 "00011000" // /* MW 1 */
+.delay_slot
+ 11156 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11157 "00010101" // /* MW 3 */
+ 11158 "11111100" // /* MW 2 */
+ 11159 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8
+.delay_slot
+ 11160 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11161 "11000000" // /* MW 3 */
+ 11162 "11010000" // /* MW 2 */
+ 11163 "00011011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11165 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11167 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.src_ref 8 "add_impl.h" 146 29
+.return_address
+ 11168 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11169 "00001000" // /* MW 9 */
+ 11170 "11000100" // /* MW 8 */
+ 11171 "00110011" // /* MW 7 */
+ 11172 "01101000" // /* MW 6 */
+ 11173 "00000000" // /* MW 5 */
+ 11174 "00000001" // /* MW 4 */
+ 11175 "00100000" // /* MW 3 */
+ 11176 "00000111" // /* MW 2 */
+ 11177 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29
+.src_ref 8 "add_impl.h" 147 37
+.src_ref 8 "add_impl.h" 147 39
+ 11178 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11179 "01011000" // /* MW 9 */
+ 11180 "11111101" // /* MW 8 */
+ 11181 "00000111" // /* MW 7 */
+ 11182 "00001000" // /* MW 6 */
+ 11183 "10000000" // /* MW 5 */
+ 11184 "00000001" // /* MW 4 */
+ 11185 "10000000" // /* MW 3 */
+ 11186 "11100010" // /* MW 2 */
+ 11187 "00000001" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29 first
+.src_ref 8 "add_impl.h" 147 39
+ 11188 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11189 "00000001" // /* MW 9 */
+ 11190 "10100000" // /* MW 8 */
+ 11191 "00000111" // /* MW 7 */
+ 11192 "10000000" // /* MW 6 */
+ 11193 "00010001" // /* MW 5 */
+ 11194 "00001010" // /* MW 4 */
+ 11195 "00100000" // /* MW 3 */
+ 11196 "10111110" // /* MW 2 */
+ 11197 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 50 first
+ 11198 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11199 "01001010" // /* MW 3 */
+ 11200 "00000110" // /* MW 2 */
+ 11201 "00000000" // /* MW 1 */
+ 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11203 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11205 "00000000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 37
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11206 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11207 "00010111" // /* MW 3 */
+ 11208 "00000010" // /* MW 2 */
+ 11209 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11210 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11211 "00000000" // /* MW 3 */
+ 11212 "00101000" // /* MW 2 */
+ 11213 "00010000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11214 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11215 "00000101" // /* MW 3 */
+ 11216 "00100010" // /* MW 2 */
+ 11217 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11218 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11219 "00000001" // /* MW 5 */
+ 11220 "00000000" // /* MW 4 */
+ 11221 "00000000" // /* MW 3 */
+ 11222 "11111000" // /* MW 2 */
+ 11223 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11224 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11225 "00100111" // /* MW 3 */
+ 11226 "01110111" // /* MW 2 */
+ 11227 "00010100" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 39
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11228 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11229 "10000010" // /* MW 3 */
+ 11230 "00100001" // /* MW 2 */
+ 11231 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 11233 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_shared.h" 227 first
+.src_ref 3 "elementwise_binary_shared.h" 232 8 first
+.tail_call
+.function_start
+ 11248 "10000100" // J #9808 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 11249 "00000000" // /* MW 5 */
+ 11250 "00000000" // /* MW 4 */
+ 11251 "00101000" // /* MW 3 */
+ 11252 "00010011" // /* MW 2 */
+ 11253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11255 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11257 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11259 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11261 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 11263 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 11264 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11265 "00000001" // /* MW 5 */
+ 11266 "00100001" // /* MW 4 */
+ 11267 "00000000" // /* MW 3 */
+ 11268 "00000000" // /* MW 2 */
+ 11269 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11270 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11271 "11000000" // /* MW 3 */
+ 11272 "01010000" // /* MW 2 */
+ 11273 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11274 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11275 "10010000" // /* MW 3 */
+ 11276 "01100000" // /* MW 2 */
+ 11277 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 11278 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11279 "00010001" // /* MW 3 */
+ 11280 "00000100" // /* MW 2 */
+ 11281 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 11282 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11283 "00010001" // /* MW 3 */
+ 11284 "00010100" // /* MW 2 */
+ 11285 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0
+ 11287 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 11296 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11297 "00101110" // /* MW 3 */
+ 11298 "00011100" // /* MW 2 */
+ 11299 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 11300 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11301 "00000001" // /* MW 5 */
+ 11302 "00000000" // /* MW 4 */
+ 11303 "00000000" // /* MW 3 */
+ 11304 "00001000" // /* MW 2 */
+ 11305 "00000000" // /* MW 1 */
+ 11306 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11307 "00111101" // /* MW 3 */
+ 11308 "11111100" // /* MW 2 */
+ 11309 "00001111" // /* MW 1 */
+ 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11311 "00000000" // /* MW 1 */
+ 11312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11313 "00000000" // /* MW 1 */
+ 11314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11315 "00000000" // /* MW 1 */
+ 11316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11317 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 11318 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11319 "00101001" // /* MW 3 */
+ 11320 "00011100" // /* MW 2 */
+ 11321 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 11322 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11323 "00101110" // /* MW 3 */
+ 11324 "00011100" // /* MW 2 */
+ 11325 "00000001" // /* MW 1 */
+ 11326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11327 "00000000" // /* MW 1 */
+ 11328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11329 "00000000" // /* MW 1 */
+ 11330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11331 "00000000" // /* MW 1 */
+ 11332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11333 "00000000" // /* MW 1 */
+ 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11335 "00000000" // /* MW 1 */
+ 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11337 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 11338 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11339 "00101001" // /* MW 3 */
+ 11340 "00011100" // /* MW 2 */
+ 11341 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 11342 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11343 "00101110" // /* MW 3 */
+ 11344 "00000100" // /* MW 2 */
+ 11345 "00000001" // /* MW 1 */
+ 11346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11347 "00000000" // /* MW 1 */
+ 11348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11349 "00000000" // /* MW 1 */
+ 11350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11351 "00000000" // /* MW 1 */
+ 11352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11353 "00000000" // /* MW 1 */
+ 11354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11355 "00000000" // /* MW 1 */
+ 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11357 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 11358 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11359 "00101001" // /* MW 3 */
+ 11360 "00011100" // /* MW 2 */
+ 11361 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 11362 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11363 "00101110" // /* MW 3 */
+ 11364 "00010100" // /* MW 2 */
+ 11365 "00000001" // /* MW 1 */
+ 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11367 "00000000" // /* MW 1 */
+ 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11369 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 11370 "00000100" // JL #11264 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11264 delay_slots=5 */
+ 11371 "00000001" // /* MW 5 */
+ 11372 "00000000" // /* MW 4 */
+ 11373 "00000000" // /* MW 3 */
+ 11374 "00010110" // /* MW 2 */
+ 11375 "00000000" // /* MW 1 */
+.delay_slot
+ 11376 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11377 "10011101" // /* MW 3 */
+ 11378 "11111011" // /* MW 2 */
+ 11379 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11383 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 11384 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11385 "00101001" // /* MW 3 */
+ 11386 "11011100" // /* MW 2 */
+ 11387 "00001000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+ 11388 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11389 "11000000" // /* MW 3 */
+ 11390 "01100000" // /* MW 2 */
+ 11391 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.return_address
+ 11392 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11393 "00111001" // /* MW 3 */
+ 11394 "11111100" // /* MW 2 */
+ 11395 "00000111" // /* MW 1 */
+ 11396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11397 "00000000" // /* MW 1 */
+ 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11399 "00000000" // /* MW 1 */
+ 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11401 "00000000" // /* MW 1 */
+ 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11403 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11405 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11406 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11407 "10011001" // /* MW 3 */
+ 11408 "11111011" // /* MW 2 */
+ 11409 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11410 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11411 "00000000" // /* MW 3 */
+ 11412 "00101000" // /* MW 2 */
+ 11413 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11419 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11420 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11421 "00000001" // /* MW 3 */
+ 11422 "00100000" // /* MW 2 */
+ 11423 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "mul_impl.h" 134 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11424 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11425 "01110001" // /* MW 9 */
+ 11426 "00000000" // /* MW 8 */
+ 11427 "00000000" // /* MW 7 */
+ 11428 "00000000" // /* MW 6 */
+ 11429 "11111110" // /* MW 5 */
+ 11430 "00111111" // /* MW 4 */
+ 11431 "00110000" // /* MW 3 */
+ 11432 "11000010" // /* MW 2 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 11433 "11101000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 149 first
+.src_ref 3 "elementwise_binary.h" 156 37
+.src_ref 3 "elementwise_binary.h" 168 8 first
+.function_start
+ 11440 "10111010" // MOVA m0, #32; MOVXM ls, #11616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11441 "00010000" // /* MW 9 */
+ 11442 "10110000" // /* MW 8 */
+ 11443 "01111110" // /* MW 7 */
+ 11444 "00001000" // /* MW 6 */
+ 11445 "00000000" // /* MW 5 */
+ 11446 "00000000" // /* MW 4 */
+ 11447 "10000000" // /* MW 3 */
+ 11448 "00000000" // /* MW 2 */
+ 11449 "00000100" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 37 first
+.src_ref 3 "elementwise_binary.h" 168 8 first
+ 11450 "10111010" // LDA r3, [p3], m0; MOVXM le, #11632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11451 "00010000" // /* MW 9 */
+ 11452 "10111000" // /* MW 8 */
+ 11453 "10111110" // /* MW 7 */
+ 11454 "00001001" // /* MW 6 */
+ 11455 "00000000" // /* MW 5 */
+ 11456 "00000000" // /* MW 4 */
+ 11457 "11010000" // /* MW 3 */
+ 11458 "00001110" // /* MW 2 */
+ 11459 "01100001" // /* MW 1 */
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11460 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11461 "01011000" // /* MW 9 */
+ 11462 "00111100" // /* MW 8 */
+ 11463 "00001011" // /* MW 7 */
+ 11464 "01001000" // /* MW 6 */
+ 11465 "00010111" // /* MW 5 */
+ 11466 "00111110" // /* MW 4 */
+ 11467 "11010000" // /* MW 3 */
+ 11468 "10010000" // /* MW 2 */
+ 11469 "01100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11470 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11471 "00010000" // /* MW 9 */
+ 11472 "00110100" // /* MW 8 */
+ 11473 "00110010" // /* MW 7 */
+ 11474 "11110010" // /* MW 6 */
+ 11475 "00000001" // /* MW 5 */
+ 11476 "00000000" // /* MW 4 */
+ 11477 "11010000" // /* MW 3 */
+ 11478 "10000000" // /* MW 2 */
+ 11479 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11480 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11481 "01000010" // /* MW 3 */
+ 11482 "00000100" // /* MW 2 */
+ 11483 "00000100" // /* MW 1 */
+ 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11485 "00000000" // /* MW 1 */
+ 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11487 "00000000" // /* MW 1 */
+ 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11489 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11490 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11491 "00011101" // /* MW 3 */
+ 11492 "11000010" // /* MW 2 */
+ 11493 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 168 8
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 11494 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11495 "11111001" // /* MW 5 */
+ 11496 "11100001" // /* MW 4 */
+ 11497 "10001010" // /* MW 3 */
+ 11498 "00001110" // /* MW 2 */
+ 11499 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11500 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11501 "01101000" // /* MW 5 */
+ 11502 "01010000" // /* MW 4 */
+ 11503 "01110000" // /* MW 3 */
+ 11504 "00010011" // /* MW 2 */
+ 11505 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11506 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11507 "10000000" // /* MW 7 */
+ 11508 "10111010" // /* MW 6 */
+ 11509 "11101000" // /* MW 5 */
+ 11510 "01010000" // /* MW 4 */
+ 11511 "01110000" // /* MW 3 */
+ 11512 "00011011" // /* MW 2 */
+ 11513 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11514 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11515 "01101000" // /* MW 5 */
+ 11516 "01010000" // /* MW 4 */
+ 11517 "01110000" // /* MW 3 */
+ 11518 "00010011" // /* MW 2 */
+ 11519 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11520 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11521 "11101000" // /* MW 5 */
+ 11522 "01010000" // /* MW 4 */
+ 11523 "01110000" // /* MW 3 */
+ 11524 "00011011" // /* MW 2 */
+ 11525 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11526 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11527 "10011011" // /* MW 3 */
+ 11528 "00001000" // /* MW 2 */
+ 11529 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11530 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11531 "01101000" // /* MW 5 */
+ 11532 "01010000" // /* MW 4 */
+ 11533 "01110000" // /* MW 3 */
+ 11534 "00011011" // /* MW 2 */
+ 11535 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11536 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11537 "11101000" // /* MW 5 */
+ 11538 "01010000" // /* MW 4 */
+ 11539 "01110000" // /* MW 3 */
+ 11540 "00010011" // /* MW 2 */
+ 11541 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11542 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11543 "01000001" // /* MW 9 */
+ 11544 "11100010" // /* MW 8 */
+ 11545 "00000000" // /* MW 7 */
+ 11546 "00011101" // /* MW 6 */
+ 11547 "00110100" // /* MW 5 */
+ 11548 "00101000" // /* MW 4 */
+ 11549 "01110000" // /* MW 3 */
+ 11550 "00011011" // /* MW 2 */
+ 11551 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11552 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11553 "01100001" // /* MW 9 */
+ 11554 "11100000" // /* MW 8 */
+ 11555 "00000001" // /* MW 7 */
+ 11556 "00011101" // /* MW 6 */
+ 11557 "01110100" // /* MW 5 */
+ 11558 "00101000" // /* MW 4 */
+ 11559 "01110000" // /* MW 3 */
+ 11560 "00010011" // /* MW 2 */
+ 11561 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11562 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11563 "01000001" // /* MW 9 */
+ 11564 "11100010" // /* MW 8 */
+ 11565 "00000000" // /* MW 7 */
+ 11566 "00011101" // /* MW 6 */
+ 11567 "00110100" // /* MW 5 */
+ 11568 "00101000" // /* MW 4 */
+ 11569 "01110000" // /* MW 3 */
+ 11570 "00011011" // /* MW 2 */
+ 11571 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11572 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11573 "01100001" // /* MW 9 */
+ 11574 "11100000" // /* MW 8 */
+ 11575 "00000001" // /* MW 7 */
+ 11576 "00011101" // /* MW 6 */
+ 11577 "01110100" // /* MW 5 */
+ 11578 "00101000" // /* MW 4 */
+ 11579 "01110000" // /* MW 3 */
+ 11580 "00010011" // /* MW 2 */
+ 11581 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11582 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11583 "01000001" // /* MW 9 */
+ 11584 "11100010" // /* MW 8 */
+ 11585 "00000000" // /* MW 7 */
+ 11586 "00011101" // /* MW 6 */
+ 11587 "00110100" // /* MW 5 */
+ 11588 "00101000" // /* MW 4 */
+ 11589 "01110000" // /* MW 3 */
+ 11590 "00011011" // /* MW 2 */
+ 11591 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11592 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11593 "01100001" // /* MW 9 */
+ 11594 "11100000" // /* MW 8 */
+ 11595 "00000001" // /* MW 7 */
+ 11596 "00011101" // /* MW 6 */
+ 11597 "01110100" // /* MW 5 */
+ 11598 "00101000" // /* MW 4 */
+ 11599 "01110000" // /* MW 3 */
+ 11600 "00010011" // /* MW 2 */
+ 11601 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11602 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11603 "01000001" // /* MW 13 */
+ 11604 "11100010" // /* MW 12 */
+ 11605 "00000000" // /* MW 11 */
+ 11606 "10001100" // /* MW 10 */
+ 11607 "01110000" // /* MW 9 */
+ 11608 "00001000" // /* MW 8 */
+ 11609 "00000000" // /* MW 7 */
+ 11610 "00000000" // /* MW 6 */
+ 11611 "01101000" // /* MW 5 */
+ 11612 "01010000" // /* MW 4 */
+ 11613 "01110000" // /* MW 3 */
+ 11614 "00011011" // /* MW 2 */
+ 11615 "00100001" // /* MW 1 */
+.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 11616 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11617 "00000011" // /* MW 15 */
+ 11618 "00001111" // /* MW 14 */
+ 11619 "01111000" // /* MW 13 */
+ 11620 "10100101" // /* MW 12 */
+ 11621 "00000001" // /* MW 11 */
+ 11622 "00000000" // /* MW 10 */
+ 11623 "00000000" // /* MW 9 */
+ 11624 "00000000" // /* MW 8 */
+ 11625 "10100011" // /* MW 7 */
+ 11626 "00011100" // /* MW 6 */
+ 11627 "11101010" // /* MW 5 */
+ 11628 "01010000" // /* MW 4 */
+ 11629 "01110000" // /* MW 3 */
+ 11630 "00010011" // /* MW 2 */
+ 11631 "00100001" // /* MW 1 */
+.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11632 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11633 "00010010" // /* MW 15 */
+ 11634 "00000111" // /* MW 14 */
+ 11635 "01111000" // /* MW 13 */
+ 11636 "10100101" // /* MW 12 */
+ 11637 "00000001" // /* MW 11 */
+ 11638 "00000000" // /* MW 10 */
+ 11639 "00000000" // /* MW 9 */
+ 11640 "00000000" // /* MW 8 */
+ 11641 "00100011" // /* MW 7 */
+ 11642 "00011100" // /* MW 6 */
+ 11643 "01101010" // /* MW 5 */
+ 11644 "01010000" // /* MW 4 */
+ 11645 "01110000" // /* MW 3 */
+ 11646 "00011011" // /* MW 2 */
+ 11647 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 11648 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11649 "01100001" // /* MW 7 */
+ 11650 "11100000" // /* MW 6 */
+ 11651 "00000001" // /* MW 5 */
+ 11652 "00000010" // /* MW 4 */
+ 11653 "01100000" // /* MW 3 */
+ 11654 "10010100" // /* MW 2 */
+ 11655 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11656 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11657 "01000001" // /* MW 7 */
+ 11658 "11100010" // /* MW 6 */
+ 11659 "00000000" // /* MW 5 */
+ 11660 "00000010" // /* MW 4 */
+ 11661 "01100000" // /* MW 3 */
+ 11662 "10000100" // /* MW 2 */
+ 11663 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11664 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11665 "01100001" // /* MW 7 */
+ 11666 "11100000" // /* MW 6 */
+ 11667 "00000001" // /* MW 5 */
+ 11668 "00000010" // /* MW 4 */
+ 11669 "01100000" // /* MW 3 */
+ 11670 "10010100" // /* MW 2 */
+ 11671 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11672 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11673 "01000001" // /* MW 7 */
+ 11674 "11100010" // /* MW 6 */
+ 11675 "00000000" // /* MW 5 */
+ 11676 "00000010" // /* MW 4 */
+ 11677 "01100000" // /* MW 3 */
+ 11678 "10000100" // /* MW 2 */
+ 11679 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11680 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11681 "01100001" // /* MW 7 */
+ 11682 "11100000" // /* MW 6 */
+ 11683 "00000001" // /* MW 5 */
+ 11684 "00000010" // /* MW 4 */
+ 11685 "01100000" // /* MW 3 */
+ 11686 "10010100" // /* MW 2 */
+ 11687 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11688 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11689 "01000001" // /* MW 7 */
+ 11690 "11100010" // /* MW 6 */
+ 11691 "00000000" // /* MW 5 */
+ 11692 "00000010" // /* MW 4 */
+ 11693 "01100000" // /* MW 3 */
+ 11694 "10000100" // /* MW 2 */
+ 11695 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11696 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11697 "01100001" // /* MW 7 */
+ 11698 "11100000" // /* MW 6 */
+ 11699 "00000001" // /* MW 5 */
+ 11700 "00000010" // /* MW 4 */
+ 11701 "01100000" // /* MW 3 */
+ 11702 "10010100" // /* MW 2 */
+ 11703 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11704 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11705 "00100011" // /* MW 3 */
+ 11706 "00011100" // /* MW 2 */
+ 11707 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 172 4 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11708 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11709 "00000000" // /* MW 5 */
+ 11710 "01010000" // /* MW 4 */
+ 11711 "01100000" // /* MW 3 */
+ 11712 "10010100" // /* MW 2 */
+ 11713 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11714 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11715 "00100011" // /* MW 3 */
+ 11716 "00011100" // /* MW 2 */
+ 11717 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11718 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11719 "10100011" // /* MW 3 */
+ 11720 "00011100" // /* MW 2 */
+ 11721 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 11722 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11723 "00100011" // /* MW 3 */
+ 11724 "00011100" // /* MW 2 */
+ 11725 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 11726 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11727 "10100011" // /* MW 3 */
+ 11728 "00011100" // /* MW 2 */
+ 11729 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0
+ 11731 "00000000" // /* MW 1 */
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.src_ref 7 "superkernels.cpp" 369 first
+.src_ref 7 "superkernels.cpp" 374 6
+.function_start
+ 11744 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11745 "10000000" // /* MW 5 */
+ 11746 "11001000" // /* MW 4 */
+ 11747 "11001000" // /* MW 3 */
+ 11748 "00000111" // /* MW 2 */
+ 11749 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+ 11750 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11751 "11000001" // /* MW 5 */
+ 11752 "10110101" // /* MW 4 */
+ 11753 "11011000" // /* MW 3 */
+ 11754 "11000010" // /* MW 2 */
+ 11755 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 369
+ 11756 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11757 "00000001" // /* MW 5 */
+ 11758 "00000000" // /* MW 4 */
+ 11759 "00000000" // /* MW 3 */
+ 11760 "00001000" // /* MW 2 */
+ 11761 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 22 first
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11762 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11763 "01111001" // /* MW 9 */
+ 11764 "01100000" // /* MW 8 */
+ 11765 "11001010" // /* MW 7 */
+ 11766 "10000001" // /* MW 6 */
+ 11767 "00010100" // /* MW 5 */
+ 11768 "00100011" // /* MW 4 */
+ 11769 "10110000" // /* MW 3 */
+ 11770 "00111010" // /* MW 2 */
+ 11771 "11111111" // /* MW 1 */
+ 11772 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11773 "01110000" // /* MW 7 */
+ 11774 "11010000" // /* MW 6 */
+ 11775 "00001011" // /* MW 5 */
+ 11776 "00000000" // /* MW 4 */
+ 11777 "10110000" // /* MW 3 */
+ 11778 "10000011" // /* MW 2 */
+ 11779 "11111101" // /* MW 1 */
+ 11780 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11781 "00010101" // /* MW 3 */
+ 11782 "11111100" // /* MW 2 */
+ 11783 "00001111" // /* MW 1 */
+ 11784 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11785 "00111101" // /* MW 3 */
+ 11786 "11110000" // /* MW 2 */
+ 11787 "00001111" // /* MW 1 */
+ 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11789 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+.src_ref 7 "superkernels.cpp" 374 16 first
+ 11790 "10000100" // JNZ r16, #11936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11936 delay_slots=5 */
+ 11791 "00000001" // /* MW 5 */
+ 11792 "01000000" // /* MW 4 */
+ 11793 "01010000" // /* MW 3 */
+ 11794 "00010111" // /* MW 2 */
+ 11795 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 30 first
+.delay_slot
+ 11796 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11797 "11111011" // /* MW 3 */
+ 11798 "01100011" // /* MW 2 */
+ 11799 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11800 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11801 "10100000" // /* MW 5 */
+ 11802 "11001000" // /* MW 4 */
+ 11803 "11000100" // /* MW 3 */
+ 11804 "00000111" // /* MW 2 */
+ 11805 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11806 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11807 "01110000" // /* MW 7 */
+ 11808 "01100000" // /* MW 6 */
+ 11809 "00110111" // /* MW 5 */
+ 11810 "00000001" // /* MW 4 */
+ 11811 "00110000" // /* MW 3 */
+ 11812 "11000110" // /* MW 2 */
+ 11813 "01000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 11814 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11815 "11000000" // /* MW 3 */
+ 11816 "11010110" // /* MW 2 */
+ 11817 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 7 "superkernels.cpp" 379 28
+.src_ref 7 "superkernels.cpp" 381 42
+.src_ref 7 "superkernels.cpp" 393 2
+.delay_slot
+ 11818 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11819 "00010001" // /* MW 9 */
+ 11820 "11000000" // /* MW 8 */
+ 11821 "10110010" // /* MW 7 */
+ 11822 "11110011" // /* MW 6 */
+ 11823 "00000001" // /* MW 5 */
+ 11824 "00000000" // /* MW 4 */
+ 11825 "10110000" // /* MW 3 */
+ 11826 "10100011" // /* MW 2 */
+ 11827 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11828 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11829 "00010001" // /* MW 9 */
+ 11830 "00110100" // /* MW 8 */
+ 11831 "00110010" // /* MW 7 */
+ 11832 "11110001" // /* MW 6 */
+ 11833 "00000001" // /* MW 5 */
+ 11834 "00000000" // /* MW 4 */
+ 11835 "01100000" // /* MW 3 */
+ 11836 "10010001" // /* MW 2 */
+ 11837 "00010011" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11838 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11839 "00010000" // /* MW 9 */
+ 11840 "00110010" // /* MW 8 */
+ 11841 "00110010" // /* MW 7 */
+ 11842 "11110001" // /* MW 6 */
+ 11843 "00000001" // /* MW 5 */
+ 11844 "00000000" // /* MW 4 */
+ 11845 "11100000" // /* MW 3 */
+ 11846 "11000000" // /* MW 2 */
+ 11847 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11849 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11850 "00000100" // JL #11296 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */
+ 11851 "00000001" // /* MW 5 */
+ 11852 "00000000" // /* MW 4 */
+ 11853 "00010000" // /* MW 3 */
+ 11854 "00010110" // /* MW 2 */
+ 11855 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11857 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11859 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11860 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11861 "00110001" // /* MW 3 */
+ 11862 "00100000" // /* MW 2 */
+ 11863 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 11864 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11865 "00000101" // /* MW 3 */
+ 11866 "00100000" // /* MW 2 */
+ 11867 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 11868 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11869 "00010001" // /* MW 3 */
+ 11870 "00000110" // /* MW 2 */
+ 11871 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 381 42 first
+.return_address
+ 11872 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11873 "00010000" // /* MW 9 */
+ 11874 "00101000" // /* MW 8 */
+ 11875 "10110010" // /* MW 7 */
+ 11876 "11110000" // /* MW 6 */
+ 11877 "00000001" // /* MW 5 */
+ 11878 "00000000" // /* MW 4 */
+ 11879 "11010000" // /* MW 3 */
+ 11880 "11000010" // /* MW 2 */
+ 11881 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 390 48
+ 11882 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11883 "00010000" // /* MW 9 */
+ 11884 "00101010" // /* MW 8 */
+ 11885 "10110010" // /* MW 7 */
+ 11886 "11110001" // /* MW 6 */
+ 11887 "00000001" // /* MW 5 */
+ 11888 "00000000" // /* MW 4 */
+ 11889 "11010000" // /* MW 3 */
+ 11890 "11000110" // /* MW 2 */
+ 11891 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 28 first
+.src_ref 7 "superkernels.cpp" 382 16
+.src_ref 7 "superkernels.cpp" 391 48
+ 11892 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11893 "00010000" // /* MW 9 */
+ 11894 "00101110" // /* MW 8 */
+ 11895 "10110010" // /* MW 7 */
+ 11896 "11110000" // /* MW 6 */
+ 11897 "00000001" // /* MW 5 */
+ 11898 "00000000" // /* MW 4 */
+ 11899 "01010000" // /* MW 3 */
+ 11900 "11001011" // /* MW 2 */
+ 11901 "11101010" // /* MW 1 */
+ 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11903 "00000000" // /* MW 1 */
+ 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11905 "00000000" // /* MW 1 */
+ 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11907 "00000000" // /* MW 1 */
+ 11908 "10000100" // J #11952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11952 delay_slots=5 */
+ 11909 "00000000" // /* MW 5 */
+ 11910 "00000000" // /* MW 4 */
+ 11911 "01011000" // /* MW 3 */
+ 11912 "00010111" // /* MW 2 */
+ 11913 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13
+.delay_slot
+ 11914 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11915 "11000000" // /* MW 5 */
+ 11916 "11001000" // /* MW 4 */
+ 11917 "11000100" // /* MW 3 */
+ 11918 "00000111" // /* MW 2 */
+ 11919 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 27 first
+.delay_slot
+ 11920 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11921 "00001111" // /* MW 3 */
+ 11922 "01100001" // /* MW 2 */
+ 11923 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13 first
+.delay_slot
+ 11924 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11925 "01010001" // /* MW 3 */
+ 11926 "00000110" // /* MW 2 */
+ 11927 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16 first
+.delay_slot
+ 11928 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11929 "00010001" // /* MW 3 */
+ 11930 "00000110" // /* MW 2 */
+ 11931 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 382 16 first
+.delay_slot
+ 11932 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11933 "00010001" // /* MW 3 */
+ 11934 "00000110" // /* MW 2 */
+ 11935 "00001001" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192
+.src_ref 7 "superkernels.cpp" 390 48
+ 11936 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11937 "10101000" // /* MW 5 */
+ 11938 "11001000" // /* MW 4 */
+ 11939 "11000110" // /* MW 3 */
+ 11940 "00000111" // /* MW 2 */
+ 11941 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48
+ 11942 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11943 "00010000" // /* MW 9 */
+ 11944 "00101110" // /* MW 8 */
+ 11945 "10110010" // /* MW 7 */
+ 11946 "11110000" // /* MW 6 */
+ 11947 "00000001" // /* MW 5 */
+ 11948 "00000000" // /* MW 4 */
+ 11949 "11110000" // /* MW 3 */
+ 11950 "00101100" // /* MW 2 */
+ 11951 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 11952 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11953 "10000110" // /* MW 3 */
+ 11954 "01100111" // /* MW 2 */
+ 11955 "00011000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11956 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11957 "00010000" // /* MW 9 */
+ 11958 "00100000" // /* MW 8 */
+ 11959 "00110010" // /* MW 7 */
+ 11960 "11110001" // /* MW 6 */
+ 11961 "00000001" // /* MW 5 */
+ 11962 "00000000" // /* MW 4 */
+ 11963 "11010000" // /* MW 3 */
+ 11964 "11101110" // /* MW 2 */
+ 11965 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 11966 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11967 "00010110" // /* MW 3 */
+ 11968 "11111110" // /* MW 2 */
+ 11969 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 11970 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11971 "00110110" // /* MW 3 */
+ 11972 "11111110" // /* MW 2 */
+ 11973 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+ 11974 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11975 "01010110" // /* MW 3 */
+ 11976 "00000110" // /* MW 2 */
+ 11977 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 11978 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11979 "01110110" // /* MW 3 */
+ 11980 "01000110" // /* MW 2 */
+ 11981 "00000000" // /* MW 1 */
+ 11982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11983 "00000000" // /* MW 1 */
+ 11984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11985 "00000000" // /* MW 1 */
+ 11986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11987 "00000000" // /* MW 1 */
+ 11988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11989 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 11990 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11991 "00000010" // /* MW 3 */
+ 11992 "01100001" // /* MW 2 */
+ 11993 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+.src_ref 1 "io_buffer_main.h" 218 20
+ 11994 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11995 "00001110" // /* MW 5 */
+ 11996 "01000000" // /* MW 4 */
+ 11997 "00111001" // /* MW 3 */
+ 11998 "11000010" // /* MW 2 */
+ 11999 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+ 12000 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12001 "00010001" // /* MW 3 */
+ 12002 "00000110" // /* MW 2 */
+ 12003 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+.src_ref 1 "io_buffer_main.h" 395 8
+ 12004 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12005 "11111101" // /* MW 3 */
+ 12006 "11100000" // /* MW 2 */
+ 12007 "00010111" // /* MW 1 */
+ 12008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12009 "00000000" // /* MW 1 */
+ 12010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12011 "00000000" // /* MW 1 */
+ 12012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12013 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 12014 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12015 "00001000" // /* MW 3 */
+ 12016 "11010011" // /* MW 2 */
+ 12017 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 12018 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12019 "00000110" // /* MW 3 */
+ 12020 "01100111" // /* MW 2 */
+ 12021 "00011010" // /* MW 1 */
+ 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12023 "00000000" // /* MW 1 */
+ 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12025 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 12026 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12027 "01110110" // /* MW 3 */
+ 12028 "11111111" // /* MW 2 */
+ 12029 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 12030 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12031 "00110110" // /* MW 3 */
+ 12032 "11111110" // /* MW 2 */
+ 12033 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 12034 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12035 "01010110" // /* MW 3 */
+ 12036 "11111110" // /* MW 2 */
+ 12037 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 47 first
+ 12038 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12039 "01110110" // /* MW 3 */
+ 12040 "01010110" // /* MW 2 */
+ 12041 "00000010" // /* MW 1 */
+ 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12043 "00000000" // /* MW 1 */
+ 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12045 "00000000" // /* MW 1 */
+ 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12047 "00000000" // /* MW 1 */
+ 12048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12049 "00000000" // /* MW 1 */
+ 12050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12051 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 12052 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12053 "00010010" // /* MW 3 */
+ 12054 "10100011" // /* MW 2 */
+ 12055 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 12056 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12057 "00110001" // /* MW 3 */
+ 12058 "00000110" // /* MW 2 */
+ 12059 "00001010" // /* MW 1 */
+ 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12061 "00000000" // /* MW 1 */
+ 12062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12063 "00000000" // /* MW 1 */
+ 12064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12065 "00000000" // /* MW 1 */
+ 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12067 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 12068 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12069 "00001000" // /* MW 3 */
+ 12070 "11010011" // /* MW 2 */
+ 12071 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46
+.src_ref 7 "superkernels.cpp" 391 46
+.src_ref 1 "io_buffer_main.h" 324 32
+ 12072 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12073 "01111001" // /* MW 9 */
+ 12074 "01100000" // /* MW 8 */
+ 12075 "11001110" // /* MW 7 */
+ 12076 "00101001" // /* MW 6 */
+ 12077 "00000000" // /* MW 5 */
+ 12078 "00000001" // /* MW 4 */
+ 12079 "01100000" // /* MW 3 */
+ 12080 "00010001" // /* MW 2 */
+ 12081 "11010001" // /* MW 1 */
+ 12082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12083 "00000000" // /* MW 1 */
+ 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12085 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+ 12086 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12087 "00011001" // /* MW 3 */
+ 12088 "11101110" // /* MW 2 */
+ 12089 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 48 first
+ 12090 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12091 "00111011" // /* MW 5 */
+ 12092 "11011000" // /* MW 4 */
+ 12093 "11011111" // /* MW 3 */
+ 12094 "11000110" // /* MW 2 */
+ 12095 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48 first
+.src_ref 7 "superkernels.cpp" 393 2
+ 12096 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12097 "10000001" // /* MW 5 */
+ 12098 "11011101" // /* MW 4 */
+ 12099 "11010110" // /* MW 3 */
+ 12100 "11010010" // /* MW 2 */
+ 12101 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 12102 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12103 "01010110" // /* MW 3 */
+ 12104 "01001110" // /* MW 2 */
+ 12105 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 12106 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12107 "00011110" // /* MW 3 */
+ 12108 "01011101" // /* MW 2 */
+ 12109 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12110 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12111 "11000000" // /* MW 3 */
+ 12112 "01100000" // /* MW 2 */
+ 12113 "00011111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12115 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12116 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12117 "01110110" // /* MW 3 */
+ 12118 "00000110" // /* MW 2 */
+ 12119 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12121 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 393 2 first
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 12122 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */
+ 12123 "00000001" // /* MW 5 */
+ 12124 "00000000" // /* MW 4 */
+ 12125 "01011000" // /* MW 3 */
+ 12126 "00010110" // /* MW 2 */
+ 12127 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12128 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12129 "11000000" // /* MW 3 */
+ 12130 "11010100" // /* MW 2 */
+ 12131 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 12132 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12133 "00001101" // /* MW 3 */
+ 12134 "01100011" // /* MW 2 */
+ 12135 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46 first
+.delay_slot
+ 12136 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12137 "00001101" // /* MW 3 */
+ 12138 "00100001" // /* MW 2 */
+ 12139 "00010101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46
+.delay_slot
+ 12140 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12141 "01000001" // /* MW 3 */
+ 12142 "01101001" // /* MW 2 */
+ 12143 "00011001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12145 "00000000" // /* MW 15 */
+ 12146 "00000000" // /* MW 14 */
+ 12147 "10101000" // /* MW 13 */
+ 12148 "11100010" // /* MW 12 */
+ 12149 "00110100" // /* MW 11 */
+ 12150 "00000000" // /* MW 10 */
+ 12151 "00000000" // /* MW 9 */
+ 12152 "00000000" // /* MW 8 */
+ 12153 "01011011" // /* MW 7 */
+ 12154 "00000001" // /* MW 6 */
+ 12155 "00100000" // /* MW 5 */
+ 12156 "00000000" // /* MW 4 */
+ 12157 "11110000" // /* MW 3 */
+ 12158 "00101100" // /* MW 2 */
+ 12159 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 32 first
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 40
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.return_address
+ 12160 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12161 "01111000" // /* MW 9 */
+ 12162 "11010000" // /* MW 8 */
+ 12163 "10110011" // /* MW 7 */
+ 12164 "00101000" // /* MW 6 */
+ 12165 "00000000" // /* MW 5 */
+ 12166 "00000001" // /* MW 4 */
+ 12167 "11010000" // /* MW 3 */
+ 12168 "11000110" // /* MW 2 */
+ 12169 "11001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19
+ 12170 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12171 "11000000" // /* MW 5 */
+ 12172 "11001000" // /* MW 4 */
+ 12173 "11001100" // /* MW 3 */
+ 12174 "00000111" // /* MW 2 */
+ 12175 "00000000" // /* MW 1 */
+ 12176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12177 "00000000" // /* MW 1 */
+ 12178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12179 "00000000" // /* MW 1 */
+ 12180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12181 "00000000" // /* MW 1 */
+ 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12183 "00000000" // /* MW 1 */
+ 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12185 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 12186 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12187 "00001000" // /* MW 3 */
+ 12188 "01010001" // /* MW 2 */
+ 12189 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 12190 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12191 "00110110" // /* MW 3 */
+ 12192 "11110110" // /* MW 2 */
+ 12193 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 12194 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12195 "00011001" // /* MW 3 */
+ 12196 "11101101" // /* MW 2 */
+ 12197 "00000111" // /* MW 1 */
+ 12198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12199 "00000000" // /* MW 1 */
+ 12200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12201 "00000000" // /* MW 1 */
+ 12202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12203 "00000000" // /* MW 1 */
+ 12204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12205 "00000000" // /* MW 1 */
+ 12206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12207 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 12208 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12209 "00010001" // /* MW 3 */
+ 12210 "00100011" // /* MW 2 */
+ 12211 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 28
+ 12212 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12213 "01100011" // /* MW 5 */
+ 12214 "11101100" // /* MW 4 */
+ 12215 "11010011" // /* MW 3 */
+ 12216 "11000110" // /* MW 2 */
+ 12217 "01001010" // /* MW 1 */
+ 12218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12219 "00000000" // /* MW 1 */
+ 12220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12221 "00000000" // /* MW 1 */
+ 12222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12223 "00000000" // /* MW 1 */
+ 12224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12225 "00000000" // /* MW 1 */
+ 12226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12227 "00000000" // /* MW 1 */
+ 12228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12229 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 12230 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12231 "00001000" // /* MW 3 */
+ 12232 "01010001" // /* MW 2 */
+ 12233 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+.src_ref 7 "superkernels.cpp" 398 14
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 12234 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12235 "00010000" // /* MW 9 */
+ 12236 "00100000" // /* MW 8 */
+ 12237 "10110010" // /* MW 7 */
+ 12238 "11110000" // /* MW 6 */
+ 12239 "00000001" // /* MW 5 */
+ 12240 "00000000" // /* MW 4 */
+ 12241 "11010000" // /* MW 3 */
+ 12242 "11001110" // /* MW 2 */
+ 12243 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19 first
+ 12244 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12245 "01010110" // /* MW 3 */
+ 12246 "00000110" // /* MW 2 */
+ 12247 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 12248 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12249 "00110110" // /* MW 3 */
+ 12250 "00000110" // /* MW 2 */
+ 12251 "00000001" // /* MW 1 */
+ 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12253 "00000000" // /* MW 1 */
+ 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12255 "00000000" // /* MW 1 */
+ 12256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12257 "00000000" // /* MW 1 */
+ 12258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12259 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 12260 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12261 "00110001" // /* MW 3 */
+ 12262 "00100001" // /* MW 2 */
+ 12263 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 12264 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12265 "00010001" // /* MW 3 */
+ 12266 "11100110" // /* MW 2 */
+ 12267 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 16 first
+ 12268 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12269 "00101000" // /* MW 3 */
+ 12270 "01100001" // /* MW 2 */
+ 12271 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 12272 "10000100" // JNZ r16, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */
+ 12273 "00000001" // /* MW 5 */
+ 12274 "01000000" // /* MW 4 */
+ 12275 "00001000" // /* MW 3 */
+ 12276 "00011000" // /* MW 2 */
+ 12277 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12283 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12285 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12287 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14
+ 12288 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12289 "00000001" // /* MW 3 */
+ 12290 "00100000" // /* MW 2 */
+ 12291 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14 first
+ 12292 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12293 "11000001" // /* MW 11 */
+ 12294 "00001000" // /* MW 10 */
+ 12295 "10000011" // /* MW 9 */
+ 12296 "00000000" // /* MW 8 */
+ 12297 "00000000" // /* MW 7 */
+ 12298 "00000000" // /* MW 6 */
+ 12299 "00100000" // /* MW 5 */
+ 12300 "00000000" // /* MW 4 */
+ 12301 "11110000" // /* MW 3 */
+ 12302 "00101100" // /* MW 2 */
+ 12303 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560
+.src_ref 7 "superkernels.cpp" 400
+ 12304 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12305 "00111001" // /* MW 3 */
+ 12306 "11110000" // /* MW 2 */
+ 12307 "00000111" // /* MW 1 */
+ 12308 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12309 "11110001" // /* MW 3 */
+ 12310 "11111101" // /* MW 2 */
+ 12311 "00000111" // /* MW 1 */
+ 12312 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12313 "10011001" // /* MW 3 */
+ 12314 "11110111" // /* MW 2 */
+ 12315 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 12316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12317 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 12318 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12319 "11010001" // /* MW 3 */
+ 12320 "11111001" // /* MW 2 */
+ 12321 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12323 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12325 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12326 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12327 "00000000" // /* MW 3 */
+ 12328 "00101000" // /* MW 2 */
+ 12329 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12330 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12331 "00001011" // /* MW 3 */
+ 12332 "10001110" // /* MW 2 */
+ 12333 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400
+.delay_slot
+ 12334 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12335 "00000001" // /* MW 5 */
+ 12336 "00000000" // /* MW 4 */
+ 12337 "00000000" // /* MW 3 */
+ 12338 "11111000" // /* MW 2 */
+ 12339 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12341 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12343 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0
+ 12345 "00000000" // /* MW 1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0
+.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.src_ref 2 "conv2d_dw_bf16_params.h" 211 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.function_start
+ 12352 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12353 "00010000" // /* MW 9 */
+ 12354 "11100000" // /* MW 8 */
+ 12355 "10110011" // /* MW 7 */
+ 12356 "11110000" // /* MW 6 */
+ 12357 "00000001" // /* MW 5 */
+ 12358 "00000000" // /* MW 4 */
+ 12359 "11010000" // /* MW 3 */
+ 12360 "10000101" // /* MW 2 */
+ 12361 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12362 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12363 "01011000" // /* MW 9 */
+ 12364 "00000000" // /* MW 8 */
+ 12365 "00001000" // /* MW 7 */
+ 12366 "01001011" // /* MW 6 */
+ 12367 "00000000" // /* MW 5 */
+ 12368 "00000001" // /* MW 4 */
+ 12369 "11010000" // /* MW 3 */
+ 12370 "10000001" // /* MW 2 */
+ 12371 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 211
+ 12372 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12373 "00000001" // /* MW 5 */
+ 12374 "00000000" // /* MW 4 */
+ 12375 "00000000" // /* MW 3 */
+ 12376 "00001000" // /* MW 2 */
+ 12377 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32
+ 12378 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12379 "00010001" // /* MW 9 */
+ 12380 "11100000" // /* MW 8 */
+ 12381 "10110011" // /* MW 7 */
+ 12382 "11110011" // /* MW 6 */
+ 12383 "00000001" // /* MW 5 */
+ 12384 "00000000" // /* MW 4 */
+ 12385 "10110000" // /* MW 3 */
+ 12386 "11110011" // /* MW 2 */
+ 12387 "11111110" // /* MW 1 */
+ 12388 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12389 "00111101" // /* MW 3 */
+ 12390 "11111100" // /* MW 2 */
+ 12391 "00001111" // /* MW 1 */
+ 12392 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12393 "11110101" // /* MW 3 */
+ 12394 "11111001" // /* MW 2 */
+ 12395 "00001111" // /* MW 1 */
+ 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12397 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12398 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12399 "00101001" // /* MW 3 */
+ 12400 "00011100" // /* MW 2 */
+ 12401 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12402 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12403 "00001001" // /* MW 3 */
+ 12404 "00011100" // /* MW 2 */
+ 12405 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12406 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12407 "00101110" // /* MW 3 */
+ 12408 "00000100" // /* MW 2 */
+ 12409 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12410 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12411 "00001110" // /* MW 3 */
+ 12412 "00010100" // /* MW 2 */
+ 12413 "00000000" // /* MW 1 */
+ 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12415 "00000000" // /* MW 1 */
+ 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12417 "00000000" // /* MW 1 */
+ 12418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12419 "00000000" // /* MW 1 */
+ 12420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12421 "00000000" // /* MW 1 */
+ 12422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12423 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12424 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12425 "00101001" // /* MW 3 */
+ 12426 "00000100" // /* MW 2 */
+ 12427 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12428 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12429 "00001001" // /* MW 3 */
+ 12430 "00010100" // /* MW 2 */
+ 12431 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first
+ 12432 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12433 "00101010" // /* MW 3 */
+ 12434 "01011110" // /* MW 2 */
+ 12435 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 52
+ 12436 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12437 "01001010" // /* MW 3 */
+ 12438 "11101110" // /* MW 2 */
+ 12439 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12440 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12441 "00101010" // /* MW 3 */
+ 12442 "11101100" // /* MW 2 */
+ 12443 "00000111" // /* MW 1 */
+ 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12445 "00000000" // /* MW 1 */
+ 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12447 "00000000" // /* MW 1 */
+ 12448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12449 "00000000" // /* MW 1 */
+ 12450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12451 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.no_stack_arguments
+ 12452 "00000100" // JL #15664 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */
+ 12453 "00000001" // /* MW 5 */
+ 12454 "00000000" // /* MW 4 */
+ 12455 "10011000" // /* MW 3 */
+ 12456 "00011110" // /* MW 2 */
+ 12457 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 38
+.delay_slot
+ 12458 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12459 "01000011" // /* MW 5 */
+ 12460 "10111110" // /* MW 4 */
+ 12461 "10111000" // /* MW 3 */
+ 12462 "11001010" // /* MW 2 */
+ 12463 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+.delay_slot
+ 12464 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12465 "00010001" // /* MW 5 */
+ 12466 "11000010" // /* MW 4 */
+ 12467 "10110000" // /* MW 3 */
+ 12468 "10000110" // /* MW 2 */
+ 12469 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12470 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12471 "00010101" // /* MW 5 */
+ 12472 "11101111" // /* MW 4 */
+ 12473 "10110111" // /* MW 3 */
+ 12474 "01000010" // /* MW 2 */
+ 12475 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12476 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12477 "11110001" // /* MW 3 */
+ 12478 "00100010" // /* MW 2 */
+ 12479 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12480 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12481 "00000000" // /* MW 15 */
+ 12482 "00000000" // /* MW 14 */
+ 12483 "01111000" // /* MW 13 */
+ 12484 "10100101" // /* MW 12 */
+ 12485 "00000001" // /* MW 11 */
+ 12486 "10010000" // /* MW 10 */
+ 12487 "00001000" // /* MW 9 */
+ 12488 "00011110" // /* MW 8 */
+ 12489 "01011011" // /* MW 7 */
+ 12490 "00000001" // /* MW 6 */
+ 12491 "00100000" // /* MW 5 */
+ 12492 "00000000" // /* MW 4 */
+ 12493 "11110000" // /* MW 3 */
+ 12494 "00101100" // /* MW 2 */
+ 12495 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.return_address
+ 12496 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12497 "00000010" // /* MW 5 */
+ 12498 "01000000" // /* MW 4 */
+ 12499 "00100000" // /* MW 3 */
+ 12500 "11010010" // /* MW 2 */
+ 12501 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first
+ 12502 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12503 "01000011" // /* MW 5 */
+ 12504 "01001000" // /* MW 4 */
+ 12505 "01011000" // /* MW 3 */
+ 12506 "11000101" // /* MW 2 */
+ 12507 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 52
+ 12508 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12509 "01101010" // /* MW 3 */
+ 12510 "11101110" // /* MW 2 */
+ 12511 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12512 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12513 "00110001" // /* MW 3 */
+ 12514 "11101100" // /* MW 2 */
+ 12515 "00000111" // /* MW 1 */
+ 12516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12517 "00000000" // /* MW 1 */
+ 12518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12519 "00000000" // /* MW 1 */
+ 12520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12521 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+ 12522 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12523 "01000110" // /* MW 3 */
+ 12524 "11101001" // /* MW 2 */
+ 12525 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+ 12526 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12527 "00001010" // /* MW 3 */
+ 12528 "00110111" // /* MW 2 */
+ 12529 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first
+ 12530 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12531 "01100011" // /* MW 5 */
+ 12532 "11000110" // /* MW 4 */
+ 12533 "10111000" // /* MW 3 */
+ 12534 "01001110" // /* MW 2 */
+ 12535 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.no_stack_arguments
+ 12536 "00111010" // ST r17, [sp, #-32]; JL #15664 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */
+ 12537 "01000001" // /* MW 9 */
+ 12538 "00000000" // /* MW 8 */
+ 12539 "00000000" // /* MW 7 */
+ 12540 "10100110" // /* MW 6 */
+ 12541 "00000111" // /* MW 5 */
+ 12542 "00000000" // /* MW 4 */
+ 12543 "10110000" // /* MW 3 */
+ 12544 "01000110" // /* MW 2 */
+ 12545 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12546 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12547 "00100010" // /* MW 3 */
+ 12548 "10101001" // /* MW 2 */
+ 12549 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12550 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12551 "00001010" // /* MW 3 */
+ 12552 "01110111" // /* MW 2 */
+ 12553 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.delay_slot
+ 12554 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12555 "00010001" // /* MW 3 */
+ 12556 "00100101" // /* MW 2 */
+ 12557 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12558 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12559 "01110000" // /* MW 3 */
+ 12560 "00100110" // /* MW 2 */
+ 12561 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 87
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12562 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12563 "01100000" // /* MW 13 */
+ 12564 "00101011" // /* MW 12 */
+ 12565 "00000000" // /* MW 11 */
+ 12566 "00001001" // /* MW 10 */
+ 12567 "10011000" // /* MW 9 */
+ 12568 "00111101" // /* MW 8 */
+ 12569 "00100010" // /* MW 7 */
+ 12570 "01000001" // /* MW 6 */
+ 12571 "00100100" // /* MW 5 */
+ 12572 "00000000" // /* MW 4 */
+ 12573 "11110000" // /* MW 3 */
+ 12574 "00101100" // /* MW 2 */
+ 12575 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+.return_address
+ 12576 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12577 "01011000" // /* MW 9 */
+ 12578 "01000010" // /* MW 8 */
+ 12579 "00000000" // /* MW 7 */
+ 12580 "11001000" // /* MW 6 */
+ 12581 "00110111" // /* MW 5 */
+ 12582 "00111111" // /* MW 4 */
+ 12583 "00100000" // /* MW 3 */
+ 12584 "00001110" // /* MW 2 */
+ 12585 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12586 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12587 "01011000" // /* MW 9 */
+ 12588 "11111100" // /* MW 8 */
+ 12589 "00101001" // /* MW 7 */
+ 12590 "00001000" // /* MW 6 */
+ 12591 "10000000" // /* MW 5 */
+ 12592 "00000001" // /* MW 4 */
+ 12593 "00100000" // /* MW 3 */
+ 12594 "11000010" // /* MW 2 */
+ 12595 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53
+ 12596 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12597 "01011000" // /* MW 9 */
+ 12598 "00000010" // /* MW 8 */
+ 12599 "10001000" // /* MW 7 */
+ 12600 "10001000" // /* MW 6 */
+ 12601 "01100000" // /* MW 5 */
+ 12602 "00000000" // /* MW 4 */
+ 12603 "00100000" // /* MW 3 */
+ 12604 "11011010" // /* MW 2 */
+ 12605 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+ 12606 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12607 "01011000" // /* MW 9 */
+ 12608 "00010111" // /* MW 8 */
+ 12609 "10001000" // /* MW 7 */
+ 12610 "00001011" // /* MW 6 */
+ 12611 "01010001" // /* MW 5 */
+ 12612 "00000000" // /* MW 4 */
+ 12613 "01010000" // /* MW 3 */
+ 12614 "01000101" // /* MW 2 */
+ 12615 "11100001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76
+ 12616 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12617 "01011000" // /* MW 9 */
+ 12618 "00100000" // /* MW 8 */
+ 12619 "10000000" // /* MW 7 */
+ 12620 "01001000" // /* MW 6 */
+ 12621 "00100111" // /* MW 5 */
+ 12622 "00111111" // /* MW 4 */
+ 12623 "00100000" // /* MW 3 */
+ 12624 "01010110" // /* MW 2 */
+ 12625 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12626 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12627 "01011000" // /* MW 9 */
+ 12628 "00000001" // /* MW 8 */
+ 12629 "01001000" // /* MW 7 */
+ 12630 "11001011" // /* MW 6 */
+ 12631 "01110000" // /* MW 5 */
+ 12632 "00000001" // /* MW 4 */
+ 12633 "00100000" // /* MW 3 */
+ 12634 "01111010" // /* MW 2 */
+ 12635 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41
+ 12636 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12637 "01011000" // /* MW 9 */
+ 12638 "11000000" // /* MW 8 */
+ 12639 "11101111" // /* MW 7 */
+ 12640 "00001011" // /* MW 6 */
+ 12641 "11010000" // /* MW 5 */
+ 12642 "00000101" // /* MW 4 */
+ 12643 "10000000" // /* MW 3 */
+ 12644 "11000000" // /* MW 2 */
+ 12645 "11101001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12646 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12647 "00100001" // /* MW 3 */
+ 12648 "00101000" // /* MW 2 */
+ 12649 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12650 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12651 "00000110" // /* MW 3 */
+ 12652 "11000111" // /* MW 2 */
+ 12653 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+ 12654 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12655 "00000010" // /* MW 5 */
+ 12656 "00110110" // /* MW 4 */
+ 12657 "01010000" // /* MW 3 */
+ 12658 "11110001" // /* MW 2 */
+ 12659 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69
+ 12660 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12661 "11110101" // /* MW 5 */
+ 12662 "00111111" // /* MW 4 */
+ 12663 "01001011" // /* MW 3 */
+ 12664 "00101000" // /* MW 2 */
+ 12665 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12666 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12667 "00011101" // /* MW 5 */
+ 12668 "00100000" // /* MW 4 */
+ 12669 "11110001" // /* MW 3 */
+ 12670 "11100001" // /* MW 2 */
+ 12671 "01111000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12672 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12673 "01110000" // /* MW 3 */
+ 12674 "00101000" // /* MW 2 */
+ 12675 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+ 12676 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12677 "00000001" // /* MW 5 */
+ 12678 "10100000" // /* MW 4 */
+ 12679 "10010000" // /* MW 3 */
+ 12680 "00000000" // /* MW 2 */
+ 12681 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first
+ 12682 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12683 "00000001" // /* MW 5 */
+ 12684 "10110100" // /* MW 4 */
+ 12685 "10111101" // /* MW 3 */
+ 12686 "11100111" // /* MW 2 */
+ 12687 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first
+ 12688 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12689 "00000010" // /* MW 5 */
+ 12690 "10100011" // /* MW 4 */
+ 12691 "10110000" // /* MW 3 */
+ 12692 "00001101" // /* MW 2 */
+ 12693 "01111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 70
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first
+ 12694 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12695 "11111111" // /* MW 5 */
+ 12696 "00110101" // /* MW 4 */
+ 12697 "10110000" // /* MW 3 */
+ 12698 "11001101" // /* MW 2 */
+ 12699 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first
+ 12700 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12701 "00001111" // /* MW 3 */
+ 12702 "11001101" // /* MW 2 */
+ 12703 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first
+ 12704 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12705 "00011111" // /* MW 3 */
+ 12706 "11011111" // /* MW 2 */
+ 12707 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 79
+ 12708 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12709 "11111111" // /* MW 5 */
+ 12710 "10110011" // /* MW 4 */
+ 12711 "11111001" // /* MW 3 */
+ 12712 "01101011" // /* MW 2 */
+ 12713 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first
+ 12714 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12715 "00000111" // /* MW 3 */
+ 12716 "00110111" // /* MW 2 */
+ 12717 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first
+ 12718 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12719 "11011111" // /* MW 5 */
+ 12720 "10010000" // /* MW 4 */
+ 12721 "00110111" // /* MW 3 */
+ 12722 "11010110" // /* MW 2 */
+ 12723 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+ 12724 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12725 "01010010" // /* MW 3 */
+ 12726 "00111000" // /* MW 2 */
+ 12727 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first
+ 12728 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12729 "00101101" // /* MW 3 */
+ 12730 "00100101" // /* MW 2 */
+ 12731 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+ 12732 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12733 "00111111" // /* MW 5 */
+ 12734 "11001000" // /* MW 4 */
+ 12735 "00111000" // /* MW 3 */
+ 12736 "01001010" // /* MW 2 */
+ 12737 "11100101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first
+ 12738 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12739 "11111011" // /* MW 5 */
+ 12740 "01110010" // /* MW 4 */
+ 12741 "00111111" // /* MW 3 */
+ 12742 "11110010" // /* MW 2 */
+ 12743 "11111001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 47
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first
+ 12744 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12745 "00011111" // /* MW 5 */
+ 12746 "01110000" // /* MW 4 */
+ 12747 "00111001" // /* MW 3 */
+ 12748 "11110010" // /* MW 2 */
+ 12749 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first
+ 12750 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12751 "11111011" // /* MW 5 */
+ 12752 "11001110" // /* MW 4 */
+ 12753 "00111001" // /* MW 3 */
+ 12754 "11001110" // /* MW 2 */
+ 12755 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first
+ 12756 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12757 "11101010" // /* MW 5 */
+ 12758 "10110011" // /* MW 4 */
+ 12759 "10111001" // /* MW 3 */
+ 12760 "00110101" // /* MW 2 */
+ 12761 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first
+ 12762 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12763 "01011011" // /* MW 5 */
+ 12764 "01111011" // /* MW 4 */
+ 12765 "00111001" // /* MW 3 */
+ 12766 "11111110" // /* MW 2 */
+ 12767 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12768 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12769 "11100010" // /* MW 5 */
+ 12770 "00110011" // /* MW 4 */
+ 12771 "11111001" // /* MW 3 */
+ 12772 "00100001" // /* MW 2 */
+ 12773 "10010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first
+ 12774 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12775 "00000100" // /* MW 5 */
+ 12776 "11110011" // /* MW 4 */
+ 12777 "00111111" // /* MW 3 */
+ 12778 "10000010" // /* MW 2 */
+ 12779 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first
+ 12780 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12781 "01101101" // /* MW 3 */
+ 12782 "11111111" // /* MW 2 */
+ 12783 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 73
+ 12784 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12785 "11111111" // /* MW 5 */
+ 12786 "10111111" // /* MW 4 */
+ 12787 "00111001" // /* MW 3 */
+ 12788 "01100110" // /* MW 2 */
+ 12789 "11110000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+ 12790 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12791 "11011011" // /* MW 5 */
+ 12792 "11000110" // /* MW 4 */
+ 12793 "00111000" // /* MW 3 */
+ 12794 "10000110" // /* MW 2 */
+ 12795 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first
+ 12796 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12797 "11111111" // /* MW 5 */
+ 12798 "00110001" // /* MW 4 */
+ 12799 "00111001" // /* MW 3 */
+ 12800 "10100100" // /* MW 2 */
+ 12801 "11000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12802 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12803 "11000011" // /* MW 5 */
+ 12804 "11011011" // /* MW 4 */
+ 12805 "00110011" // /* MW 3 */
+ 12806 "11011010" // /* MW 2 */
+ 12807 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12808 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12809 "01011011" // /* MW 5 */
+ 12810 "01000011" // /* MW 4 */
+ 12811 "00111000" // /* MW 3 */
+ 12812 "11001010" // /* MW 2 */
+ 12813 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12814 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12815 "01011011" // /* MW 5 */
+ 12816 "11111100" // /* MW 4 */
+ 12817 "00111001" // /* MW 3 */
+ 12818 "10011110" // /* MW 2 */
+ 12819 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12820 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12821 "11000001" // /* MW 5 */
+ 12822 "11011010" // /* MW 4 */
+ 12823 "00111110" // /* MW 3 */
+ 12824 "11001110" // /* MW 2 */
+ 12825 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+ 12826 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12827 "11110010" // /* MW 5 */
+ 12828 "10111111" // /* MW 4 */
+ 12829 "00011110" // /* MW 3 */
+ 12830 "00100000" // /* MW 2 */
+ 12831 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12832 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12833 "10100011" // /* MW 5 */
+ 12834 "01000011" // /* MW 4 */
+ 12835 "00111000" // /* MW 3 */
+ 12836 "11011010" // /* MW 2 */
+ 12837 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140
+ 12838 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12839 "01011001" // /* MW 9 */
+ 12840 "11111111" // /* MW 8 */
+ 12841 "00001111" // /* MW 7 */
+ 12842 "01101110" // /* MW 6 */
+ 12843 "01101101" // /* MW 5 */
+ 12844 "00011111" // /* MW 4 */
+ 12845 "00110000" // /* MW 3 */
+ 12846 "11000010" // /* MW 2 */
+ 12847 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first
+ 12848 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12849 "10000001" // /* MW 5 */
+ 12850 "01101010" // /* MW 4 */
+ 12851 "00111110" // /* MW 3 */
+ 12852 "11001010" // /* MW 2 */
+ 12853 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+ 12854 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12855 "11000011" // /* MW 5 */
+ 12856 "01010010" // /* MW 4 */
+ 12857 "00111010" // /* MW 3 */
+ 12858 "11101010" // /* MW 2 */
+ 12859 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41
+ 12860 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12861 "00001000" // /* MW 11 */
+ 12862 "00010000" // /* MW 10 */
+ 12863 "01101101" // /* MW 9 */
+ 12864 "10110010" // /* MW 8 */
+ 12865 "00001000" // /* MW 7 */
+ 12866 "10101011" // /* MW 6 */
+ 12867 "01110001" // /* MW 5 */
+ 12868 "00011110" // /* MW 4 */
+ 12869 "00000111" // /* MW 3 */
+ 12870 "00010001" // /* MW 2 */
+ 12871 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first
+ 12872 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12873 "01110001" // /* MW 3 */
+ 12874 "00011110" // /* MW 2 */
+ 12875 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first
+ 12876 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12877 "11111011" // /* MW 5 */
+ 12878 "01010010" // /* MW 4 */
+ 12879 "00111000" // /* MW 3 */
+ 12880 "11000110" // /* MW 2 */
+ 12881 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+ 12882 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12883 "10000011" // /* MW 5 */
+ 12884 "01000010" // /* MW 4 */
+ 12885 "00111100" // /* MW 3 */
+ 12886 "11000010" // /* MW 2 */
+ 12887 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first
+ 12888 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12889 "11111011" // /* MW 5 */
+ 12890 "01010010" // /* MW 4 */
+ 12891 "00111001" // /* MW 3 */
+ 12892 "11000110" // /* MW 2 */
+ 12893 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12894 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12895 "10000011" // /* MW 5 */
+ 12896 "01000010" // /* MW 4 */
+ 12897 "00111100" // /* MW 3 */
+ 12898 "11000010" // /* MW 2 */
+ 12899 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first
+ 12900 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12901 "01010001" // /* MW 3 */
+ 12902 "00011110" // /* MW 2 */
+ 12903 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first
+ 12904 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12905 "00110001" // /* MW 3 */
+ 12906 "00011110" // /* MW 2 */
+ 12907 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first
+ 12908 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12909 "00010001" // /* MW 3 */
+ 12910 "00001010" // /* MW 2 */
+ 12911 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first
+ 12912 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12913 "00001010" // /* MW 3 */
+ 12914 "00000110" // /* MW 2 */
+ 12915 "00000111" // /* MW 1 */
+ 12916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12917 "00000000" // /* MW 1 */
+ 12918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12919 "00000000" // /* MW 1 */
+ 12920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12921 "00000000" // /* MW 1 */
+ 12922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12923 "00000000" // /* MW 1 */
+ 12924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12925 "00000000" // /* MW 1 */
+ 12926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12927 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 58
+ 12928 "10000100" // JZ r16, #12960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12960 delay_slots=5 */
+ 12929 "00000001" // /* MW 5 */
+ 12930 "00000000" // /* MW 4 */
+ 12931 "01010000" // /* MW 3 */
+ 12932 "00011001" // /* MW 2 */
+ 12933 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12934 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12935 "01100000" // /* MW 3 */
+ 12936 "00111011" // /* MW 2 */
+ 12937 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12938 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12939 "00000000" // /* MW 5 */
+ 12940 "10100000" // /* MW 4 */
+ 12941 "00001001" // /* MW 3 */
+ 12942 "01111111" // /* MW 2 */
+ 12943 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12945 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12947 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12949 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12950 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12951 "00000001" // /* MW 9 */
+ 12952 "00100110" // /* MW 8 */
+ 12953 "00000000" // /* MW 7 */
+ 12954 "00000000" // /* MW 6 */
+ 12955 "01011011" // /* MW 5 */
+ 12956 "00000001" // /* MW 4 */
+ 12957 "11110000" // /* MW 3 */
+ 12958 "00101100" // /* MW 2 */
+ 12959 "00000000" // /* MW 1 */
+.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267
+ 12960 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12961 "00010000" // /* MW 9 */
+ 12962 "00110100" // /* MW 8 */
+ 12963 "00110010" // /* MW 7 */
+ 12964 "11110000" // /* MW 6 */
+ 12965 "00000001" // /* MW 5 */
+ 12966 "00000000" // /* MW 4 */
+ 12967 "00100000" // /* MW 3 */
+ 12968 "10000111" // /* MW 2 */
+ 12969 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12970 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12971 "11100010" // /* MW 5 */
+ 12972 "00000100" // /* MW 4 */
+ 12973 "01010000" // /* MW 3 */
+ 12974 "11000000" // /* MW 2 */
+ 12975 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39
+ 12976 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12977 "11101001" // /* MW 5 */
+ 12978 "00000010" // /* MW 4 */
+ 12979 "00100001" // /* MW 3 */
+ 12980 "10000011" // /* MW 2 */
+ 12981 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12982 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12983 "00100101" // /* MW 5 */
+ 12984 "00000001" // /* MW 4 */
+ 12985 "00100000" // /* MW 3 */
+ 12986 "00111110" // /* MW 2 */
+ 12987 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+ 12988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12989 "00000001" // /* MW 5 */
+ 12990 "00000000" // /* MW 4 */
+ 12991 "00000000" // /* MW 3 */
+ 12992 "11111000" // /* MW 2 */
+ 12993 "11111111" // /* MW 1 */
+ 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12995 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 12996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12997 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 12998 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12999 "00010111" // /* MW 3 */
+ 13000 "00000010" // /* MW 2 */
+ 13001 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13002 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13003 "01000001" // /* MW 5 */
+ 13004 "01110000" // /* MW 4 */
+ 13005 "00001111" // /* MW 3 */
+ 13006 "00000000" // /* MW 2 */
+ 13007 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13008 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13009 "00010110" // /* MW 3 */
+ 13010 "01000000" // /* MW 2 */
+ 13011 "00001000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13012 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13013 "11000000" // /* MW 3 */
+ 13014 "01100000" // /* MW 2 */
+ 13015 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13016 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13017 "00000001" // /* MW 3 */
+ 13018 "00000001" // /* MW 2 */
+ 13019 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0
+ 13023 "00000000" // /* MW 1 */
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 2 "conv2d_dw_bf16.h" 199 first
+.function_start
+ 13024 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13025 "11000000" // /* MW 3 */
+ 13026 "01010110" // /* MW 2 */
+ 13027 "00011100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 82
+ 13028 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13029 "10101001" // /* MW 5 */
+ 13030 "00000001" // /* MW 4 */
+ 13031 "11011110" // /* MW 3 */
+ 13032 "10010011" // /* MW 2 */
+ 13033 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 1 "io_buffer_main.h" 125 25
+ 13034 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13035 "00000010" // /* MW 5 */
+ 13036 "11010001" // /* MW 4 */
+ 13037 "11010110" // /* MW 3 */
+ 13038 "10000011" // /* MW 2 */
+ 13039 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 204 82 first
+ 13040 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13041 "10001010" // /* MW 3 */
+ 13042 "11101000" // /* MW 2 */
+ 13043 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4 first
+ 13044 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13045 "01000110" // /* MW 3 */
+ 13046 "11111101" // /* MW 2 */
+ 13047 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13048 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13049 "00100110" // /* MW 3 */
+ 13050 "00111101" // /* MW 2 */
+ 13051 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13052 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13053 "01000110" // /* MW 3 */
+ 13054 "11111111" // /* MW 2 */
+ 13055 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13056 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13057 "00100110" // /* MW 3 */
+ 13058 "00101111" // /* MW 2 */
+ 13059 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13060 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13061 "00000110" // /* MW 3 */
+ 13062 "00101101" // /* MW 2 */
+ 13063 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4 first
+ 13064 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13065 "01000110" // /* MW 3 */
+ 13066 "11111100" // /* MW 2 */
+ 13067 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13068 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13069 "00100110" // /* MW 3 */
+ 13070 "00111100" // /* MW 2 */
+ 13071 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13072 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13073 "01000110" // /* MW 3 */
+ 13074 "11111110" // /* MW 2 */
+ 13075 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13076 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13077 "00100110" // /* MW 3 */
+ 13078 "00101110" // /* MW 2 */
+ 13079 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13080 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13081 "00000110" // /* MW 3 */
+ 13082 "00101100" // /* MW 2 */
+ 13083 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4 first
+ 13084 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13085 "11000110" // /* MW 3 */
+ 13086 "11111100" // /* MW 2 */
+ 13087 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13088 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13089 "10100110" // /* MW 3 */
+ 13090 "00111100" // /* MW 2 */
+ 13091 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13092 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13093 "11000110" // /* MW 3 */
+ 13094 "11111110" // /* MW 2 */
+ 13095 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13096 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13097 "10100110" // /* MW 3 */
+ 13098 "00101110" // /* MW 2 */
+ 13099 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13100 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13101 "10000110" // /* MW 3 */
+ 13102 "00101100" // /* MW 2 */
+ 13103 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4 first
+ 13104 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13105 "11000110" // /* MW 3 */
+ 13106 "11111111" // /* MW 2 */
+ 13107 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+ 13108 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13109 "10100110" // /* MW 3 */
+ 13110 "00101111" // /* MW 2 */
+ 13111 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13112 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13113 "00010000" // /* MW 9 */
+ 13114 "00110100" // /* MW 8 */
+ 13115 "00110010" // /* MW 7 */
+ 13116 "11110010" // /* MW 6 */
+ 13117 "00000001" // /* MW 5 */
+ 13118 "00000000" // /* MW 4 */
+ 13119 "11010000" // /* MW 3 */
+ 13120 "11110000" // /* MW 2 */
+ 13121 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 13122 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13123 "10000001" // /* MW 5 */
+ 13124 "11000101" // /* MW 4 */
+ 13125 "01011000" // /* MW 3 */
+ 13126 "10011000" // /* MW 2 */
+ 13127 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13128 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13129 "00010000" // /* MW 3 */
+ 13130 "00001111" // /* MW 2 */
+ 13131 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+ 13132 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13133 "01011000" // /* MW 11 */
+ 13134 "00000000" // /* MW 10 */
+ 13135 "01100000" // /* MW 9 */
+ 13136 "01101010" // /* MW 8 */
+ 13137 "00100000" // /* MW 7 */
+ 13138 "00000000" // /* MW 6 */
+ 13139 "01101000" // /* MW 5 */
+ 13140 "00111011" // /* MW 4 */
+ 13141 "01110000" // /* MW 3 */
+ 13142 "10000101" // /* MW 2 */
+ 13143 "10000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43 first
+.src_ref 2 "conv2d_dw_bf16.h" 225 4 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13144 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 13145 "01100000" // /* MW 13 */
+ 13146 "00001001" // /* MW 12 */
+ 13147 "01100010" // /* MW 11 */
+ 13148 "00001011" // /* MW 10 */
+ 13149 "00010000" // /* MW 9 */
+ 13150 "11100000" // /* MW 8 */
+ 13151 "00101101" // /* MW 7 */
+ 13152 "00000100" // /* MW 6 */
+ 13153 "11101001" // /* MW 5 */
+ 13154 "00111000" // /* MW 4 */
+ 13155 "11010000" // /* MW 3 */
+ 13156 "10111000" // /* MW 2 */
+ 13157 "01111111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13158 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13159 "01110010" // /* MW 9 */
+ 13160 "10010000" // /* MW 8 */
+ 13161 "10000000" // /* MW 7 */
+ 13162 "00000010" // /* MW 6 */
+ 13163 "01001011" // /* MW 5 */
+ 13164 "00001100" // /* MW 4 */
+ 13165 "11010001" // /* MW 3 */
+ 13166 "10110100" // /* MW 2 */
+ 13167 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13168 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13169 "01111110" // /* MW 9 */
+ 13170 "11000000" // /* MW 8 */
+ 13171 "11100001" // /* MW 7 */
+ 13172 "00000011" // /* MW 6 */
+ 13173 "10010000" // /* MW 5 */
+ 13174 "10101011" // /* MW 4 */
+ 13175 "11010001" // /* MW 3 */
+ 13176 "00110000" // /* MW 2 */
+ 13177 "01101101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 13178 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13179 "01011110" // /* MW 9 */
+ 13180 "10010000" // /* MW 8 */
+ 13181 "00000111" // /* MW 7 */
+ 13182 "00000010" // /* MW 6 */
+ 13183 "11110100" // /* MW 5 */
+ 13184 "11110000" // /* MW 4 */
+ 13185 "11010001" // /* MW 3 */
+ 13186 "00001010" // /* MW 2 */
+ 13187 "01111001" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13188 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13189 "10000010" // /* MW 5 */
+ 13190 "00000000" // /* MW 4 */
+ 13191 "01010000" // /* MW 3 */
+ 13192 "00011110" // /* MW 2 */
+ 13193 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+ 13194 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13195 "00010000" // /* MW 11 */
+ 13196 "11111000" // /* MW 10 */
+ 13197 "01111001" // /* MW 9 */
+ 13198 "00001100" // /* MW 8 */
+ 13199 "00000000" // /* MW 7 */
+ 13200 "00000000" // /* MW 6 */
+ 13201 "01001011" // /* MW 5 */
+ 13202 "00010000" // /* MW 4 */
+ 13203 "11010110" // /* MW 3 */
+ 13204 "11000000" // /* MW 2 */
+ 13205 "01101001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+ 13206 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13207 "00010000" // /* MW 11 */
+ 13208 "00101000" // /* MW 10 */
+ 13209 "10111010" // /* MW 9 */
+ 13210 "00001101" // /* MW 8 */
+ 13211 "00000000" // /* MW 7 */
+ 13212 "00000000" // /* MW 6 */
+ 13213 "01001011" // /* MW 5 */
+ 13214 "00010000" // /* MW 4 */
+ 13215 "11010010" // /* MW 3 */
+ 13216 "10010010" // /* MW 2 */
+ 13217 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13218 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13219 "00000101" // /* MW 5 */
+ 13220 "01100001" // /* MW 4 */
+ 13221 "10000100" // /* MW 3 */
+ 13222 "00010110" // /* MW 2 */
+ 13223 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+ 13224 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13225 "10001010" // /* MW 3 */
+ 13226 "00000000" // /* MW 2 */
+ 13227 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 244 4
+ 13228 "10111010" // LDA r5, [p3]; MOVXM p3, #13456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13229 "00010000" // /* MW 9 */
+ 13230 "01001000" // /* MW 8 */
+ 13231 "10110010" // /* MW 7 */
+ 13232 "00001101" // /* MW 6 */
+ 13233 "00000000" // /* MW 5 */
+ 13234 "00000000" // /* MW 4 */
+ 13235 "11010000" // /* MW 3 */
+ 13236 "10010110" // /* MW 2 */
+ 13237 "01100000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+ 13238 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13239 "10101000" // /* MW 9 */
+ 13240 "00000001" // /* MW 8 */
+ 13241 "10001110" // /* MW 7 */
+ 13242 "00001010" // /* MW 6 */
+ 13243 "00010100" // /* MW 5 */
+ 13244 "00000000" // /* MW 4 */
+ 13245 "11110000" // /* MW 3 */
+ 13246 "00101100" // /* MW 2 */
+ 13247 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.src_ref 2 "conv2d_dw_bf16.h" 271 12
+.src_ref 2 "conv2d_dw_bf16.h" 272 12
+.src_ref 2 "conv2d_dw_bf16.h" 273 12
+.src_ref 2 "conv2d_dw_bf16.h" 274 12
+.src_ref 2 "conv2d_dw_bf16.h" 275 12
+.src_ref 2 "conv2d_dw_bf16.h" 276 12
+.src_ref 2 "conv2d_dw_bf16.h" 277 12
+ 13248 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13249 "00000000" // /* MW 15 */
+ 13250 "00000000" // /* MW 14 */
+ 13251 "01111000" // /* MW 13 */
+ 13252 "10111001" // /* MW 12 */
+ 13253 "00001110" // /* MW 11 */
+ 13254 "00001000" // /* MW 10 */
+ 13255 "00110110" // /* MW 9 */
+ 13256 "00000000" // /* MW 8 */
+ 13257 "01011011" // /* MW 7 */
+ 13258 "00000001" // /* MW 6 */
+ 13259 "00100000" // /* MW 5 */
+ 13260 "00000000" // /* MW 4 */
+ 13261 "00000000" // /* MW 3 */
+ 13262 "10010001" // /* MW 2 */
+ 13263 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13264 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13265 "01101010" // /* MW 15 */
+ 13266 "01100011" // /* MW 14 */
+ 13267 "10101100" // /* MW 13 */
+ 13268 "00000011" // /* MW 12 */
+ 13269 "00001110" // /* MW 11 */
+ 13270 "00000010" // /* MW 10 */
+ 13271 "11010100" // /* MW 9 */
+ 13272 "00001101" // /* MW 8 */
+ 13273 "01001011" // /* MW 7 */
+ 13274 "00010000" // /* MW 6 */
+ 13275 "00100000" // /* MW 5 */
+ 13276 "00000000" // /* MW 4 */
+ 13277 "11110000" // /* MW 3 */
+ 13278 "00101100" // /* MW 2 */
+ 13279 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13280 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13281 "00011010" // /* MW 15 */
+ 13282 "01001000" // /* MW 14 */
+ 13283 "11001100" // /* MW 13 */
+ 13284 "00111111" // /* MW 12 */
+ 13285 "10111001" // /* MW 11 */
+ 13286 "11011010" // /* MW 10 */
+ 13287 "00101111" // /* MW 9 */
+ 13288 "00000100" // /* MW 8 */
+ 13289 "01001011" // /* MW 7 */
+ 13290 "00010000" // /* MW 6 */
+ 13291 "00100101" // /* MW 5 */
+ 13292 "00000000" // /* MW 4 */
+ 13293 "11010000" // /* MW 3 */
+ 13294 "10100011" // /* MW 2 */
+ 13295 "01000000" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+.loop_nesting 1
+ 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13297 "01101110" // /* MW 9 */
+ 13298 "10000001" // /* MW 8 */
+ 13299 "10000100" // /* MW 7 */
+ 13300 "00000010" // /* MW 6 */
+ 13301 "11110100" // /* MW 5 */
+ 13302 "11110000" // /* MW 4 */
+ 13303 "01110001" // /* MW 3 */
+ 13304 "10110011" // /* MW 2 */
+ 13305 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13307 "00000001" // /* MW 9 */
+ 13308 "10001001" // /* MW 8 */
+ 13309 "10001010" // /* MW 7 */
+ 13310 "01000110" // /* MW 6 */
+ 13311 "00001011" // /* MW 5 */
+ 13312 "10011100" // /* MW 4 */
+ 13313 "11101010" // /* MW 3 */
+ 13314 "00111000" // /* MW 2 */
+ 13315 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13317 "00000001" // /* MW 9 */
+ 13318 "00110101" // /* MW 8 */
+ 13319 "10001001" // /* MW 7 */
+ 13320 "11000110" // /* MW 6 */
+ 13321 "10000110" // /* MW 5 */
+ 13322 "00110000" // /* MW 4 */
+ 13323 "01101010" // /* MW 3 */
+ 13324 "10110001" // /* MW 2 */
+ 13325 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13327 "00000110" // /* MW 3 */
+ 13328 "10001001" // /* MW 2 */
+ 13329 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13331 "10100001" // /* MW 7 */
+ 13332 "01001000" // /* MW 6 */
+ 13333 "10001100" // /* MW 5 */
+ 13334 "11000110" // /* MW 4 */
+ 13335 "10001110" // /* MW 3 */
+ 13336 "10110000" // /* MW 2 */
+ 13337 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13339 "10100001" // /* MW 7 */
+ 13340 "00110110" // /* MW 6 */
+ 13341 "10001010" // /* MW 5 */
+ 13342 "01000110" // /* MW 4 */
+ 13343 "00001111" // /* MW 3 */
+ 13344 "10011100" // /* MW 2 */
+ 13345 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13347 "00001110" // /* MW 3 */
+ 13348 "10001001" // /* MW 2 */
+ 13349 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13351 "11100001" // /* MW 7 */
+ 13352 "10010010" // /* MW 6 */
+ 13353 "10001011" // /* MW 5 */
+ 13354 "01000110" // /* MW 4 */
+ 13355 "00000011" // /* MW 3 */
+ 13356 "00011100" // /* MW 2 */
+ 13357 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13359 "11100001" // /* MW 7 */
+ 13360 "01010110" // /* MW 6 */
+ 13361 "10001000" // /* MW 5 */
+ 13362 "01000110" // /* MW 4 */
+ 13363 "00000111" // /* MW 3 */
+ 13364 "00011100" // /* MW 2 */
+ 13365 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13366 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13367 "01101110" // /* MW 9 */
+ 13368 "01000001" // /* MW 8 */
+ 13369 "00011000" // /* MW 7 */
+ 13370 "00000001" // /* MW 6 */
+ 13371 "00010000" // /* MW 5 */
+ 13372 "00000000" // /* MW 4 */
+ 13373 "11110000" // /* MW 3 */
+ 13374 "00101100" // /* MW 2 */
+ 13375 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13377 "01101010" // /* MW 15 */
+ 13378 "01100011" // /* MW 14 */
+ 13379 "01111100" // /* MW 13 */
+ 13380 "10100101" // /* MW 12 */
+ 13381 "00000001" // /* MW 11 */
+ 13382 "00000000" // /* MW 10 */
+ 13383 "00000000" // /* MW 9 */
+ 13384 "00000000" // /* MW 8 */
+ 13385 "01011011" // /* MW 7 */
+ 13386 "00000001" // /* MW 6 */
+ 13387 "00100000" // /* MW 5 */
+ 13388 "00000000" // /* MW 4 */
+ 13389 "11110000" // /* MW 3 */
+ 13390 "00101100" // /* MW 2 */
+ 13391 "00000000" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 13392 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13393 "00011010" // /* MW 15 */
+ 13394 "01001000" // /* MW 14 */
+ 13395 "01111100" // /* MW 13 */
+ 13396 "10100101" // /* MW 12 */
+ 13397 "00000001" // /* MW 11 */
+ 13398 "00000000" // /* MW 10 */
+ 13399 "00000000" // /* MW 9 */
+ 13400 "00000000" // /* MW 8 */
+ 13401 "01011011" // /* MW 7 */
+ 13402 "00000001" // /* MW 6 */
+ 13403 "00100000" // /* MW 5 */
+ 13404 "00000000" // /* MW 4 */
+ 13405 "11110000" // /* MW 3 */
+ 13406 "00101100" // /* MW 2 */
+ 13407 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13408 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13409 "01101110" // /* MW 9 */
+ 13410 "10000001" // /* MW 8 */
+ 13411 "10000100" // /* MW 7 */
+ 13412 "00000010" // /* MW 6 */
+ 13413 "10010000" // /* MW 5 */
+ 13414 "01110011" // /* MW 4 */
+ 13415 "11110100" // /* MW 3 */
+ 13416 "00001100" // /* MW 2 */
+ 13417 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13418 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13419 "00000001" // /* MW 7 */
+ 13420 "10001001" // /* MW 6 */
+ 13421 "10001010" // /* MW 5 */
+ 13422 "01000110" // /* MW 4 */
+ 13423 "00001011" // /* MW 3 */
+ 13424 "10011100" // /* MW 2 */
+ 13425 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13426 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13427 "00000001" // /* MW 7 */
+ 13428 "00110101" // /* MW 6 */
+ 13429 "10001001" // /* MW 5 */
+ 13430 "11000110" // /* MW 4 */
+ 13431 "10000110" // /* MW 3 */
+ 13432 "00110000" // /* MW 2 */
+ 13433 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13434 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13435 "00000110" // /* MW 3 */
+ 13436 "10001001" // /* MW 2 */
+ 13437 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13438 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13439 "10100001" // /* MW 7 */
+ 13440 "01001000" // /* MW 6 */
+ 13441 "10001100" // /* MW 5 */
+ 13442 "01000110" // /* MW 4 */
+ 13443 "00001111" // /* MW 3 */
+ 13444 "10011100" // /* MW 2 */
+ 13445 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13446 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13447 "10100001" // /* MW 9 */
+ 13448 "00110110" // /* MW 8 */
+ 13449 "10001010" // /* MW 7 */
+ 13450 "11000010" // /* MW 6 */
+ 13451 "10001110" // /* MW 5 */
+ 13452 "10110000" // /* MW 4 */
+ 13453 "11110100" // /* MW 3 */
+ 13454 "00101100" // /* MW 2 */
+ 13455 "00000000" // /* MW 1 */
+.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13456 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13457 "00011101" // /* MW 5 */
+ 13458 "00010010" // /* MW 4 */
+ 13459 "10001011" // /* MW 3 */
+ 13460 "00011110" // /* MW 2 */
+ 13461 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13462 "01011010" // MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13463 "11100001" // /* MW 9 */
+ 13464 "10010010" // /* MW 8 */
+ 13465 "10001011" // /* MW 7 */
+ 13466 "00000010" // /* MW 6 */
+ 13467 "01010100" // /* MW 5 */
+ 13468 "10110111" // /* MW 4 */
+ 13469 "00000001" // /* MW 3 */
+ 13470 "00000000" // /* MW 2 */
+ 13471 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13472 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13473 "11100001" // /* MW 11 */
+ 13474 "01010110" // /* MW 10 */
+ 13475 "10001000" // /* MW 9 */
+ 13476 "00000010" // /* MW 8 */
+ 13477 "01001111" // /* MW 7 */
+ 13478 "10001111" // /* MW 6 */
+ 13479 "00000001" // /* MW 5 */
+ 13480 "00000000" // /* MW 4 */
+ 13481 "01110000" // /* MW 3 */
+ 13482 "10000101" // /* MW 2 */
+ 13483 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13484 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13485 "01111111" // /* MW 3 */
+ 13486 "01110010" // /* MW 2 */
+ 13487 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13488 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13489 "10011011" // /* MW 3 */
+ 13490 "00011101" // /* MW 2 */
+ 13491 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13492 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13493 "01110100" // /* MW 3 */
+ 13494 "00011100" // /* MW 2 */
+ 13495 "00111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13496 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13497 "10110100" // /* MW 3 */
+ 13498 "01011000" // /* MW 2 */
+ 13499 "00111000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13500 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13501 "10010110" // /* MW 3 */
+ 13502 "00010001" // /* MW 2 */
+ 13503 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13504 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13505 "00010110" // /* MW 3 */
+ 13506 "00010000" // /* MW 2 */
+ 13507 "00001011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13508 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13509 "01101100" // /* MW 3 */
+ 13510 "01010000" // /* MW 2 */
+ 13511 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13512 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13513 "00010100" // /* MW 3 */
+ 13514 "01010011" // /* MW 2 */
+ 13515 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13516 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13517 "01110000" // /* MW 7 */
+ 13518 "00110110" // /* MW 6 */
+ 13519 "10101000" // /* MW 5 */
+ 13520 "00000010" // /* MW 4 */
+ 13521 "01100000" // /* MW 3 */
+ 13522 "01000010" // /* MW 2 */
+ 13523 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13524 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13525 "00000011" // /* MW 3 */
+ 13526 "00011100" // /* MW 2 */
+ 13527 "00011101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13528 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13529 "01110000" // /* MW 7 */
+ 13530 "01000101" // /* MW 6 */
+ 13531 "10000000" // /* MW 5 */
+ 13532 "00000001" // /* MW 4 */
+ 13533 "01100000" // /* MW 3 */
+ 13534 "01010010" // /* MW 2 */
+ 13535 "01000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13536 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13537 "01000001" // /* MW 7 */
+ 13538 "01101101" // /* MW 6 */
+ 13539 "10001100" // /* MW 5 */
+ 13540 "01000110" // /* MW 4 */
+ 13541 "00000111" // /* MW 3 */
+ 13542 "00011100" // /* MW 2 */
+ 13543 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13544 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13545 "01000001" // /* MW 7 */
+ 13546 "00000011" // /* MW 6 */
+ 13547 "10001001" // /* MW 5 */
+ 13548 "11000110" // /* MW 4 */
+ 13549 "10000010" // /* MW 3 */
+ 13550 "00110000" // /* MW 2 */
+ 13551 "00000010" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+.loop_nesting 2
+ 13552 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13553 "01101110" // /* MW 9 */
+ 13554 "10000001" // /* MW 8 */
+ 13555 "10000100" // /* MW 7 */
+ 13556 "00000010" // /* MW 6 */
+ 13557 "11110100" // /* MW 5 */
+ 13558 "11110000" // /* MW 4 */
+ 13559 "01110001" // /* MW 3 */
+ 13560 "10110011" // /* MW 2 */
+ 13561 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13562 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13563 "00000001" // /* MW 9 */
+ 13564 "10001001" // /* MW 8 */
+ 13565 "10001010" // /* MW 7 */
+ 13566 "01000110" // /* MW 6 */
+ 13567 "00001011" // /* MW 5 */
+ 13568 "10011100" // /* MW 4 */
+ 13569 "11101010" // /* MW 3 */
+ 13570 "00111000" // /* MW 2 */
+ 13571 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13572 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13573 "00000001" // /* MW 9 */
+ 13574 "00110101" // /* MW 8 */
+ 13575 "10001001" // /* MW 7 */
+ 13576 "11000110" // /* MW 6 */
+ 13577 "10000110" // /* MW 5 */
+ 13578 "00110000" // /* MW 4 */
+ 13579 "01101010" // /* MW 3 */
+ 13580 "10110001" // /* MW 2 */
+ 13581 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13582 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13583 "00000110" // /* MW 3 */
+ 13584 "10001001" // /* MW 2 */
+ 13585 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13586 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13587 "10100001" // /* MW 7 */
+ 13588 "01001000" // /* MW 6 */
+ 13589 "10001100" // /* MW 5 */
+ 13590 "11000110" // /* MW 4 */
+ 13591 "10001110" // /* MW 3 */
+ 13592 "10110000" // /* MW 2 */
+ 13593 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13594 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13595 "10100001" // /* MW 7 */
+ 13596 "00110110" // /* MW 6 */
+ 13597 "10001010" // /* MW 5 */
+ 13598 "01000110" // /* MW 4 */
+ 13599 "00001111" // /* MW 3 */
+ 13600 "10011100" // /* MW 2 */
+ 13601 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13602 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13603 "00001110" // /* MW 3 */
+ 13604 "10001001" // /* MW 2 */
+ 13605 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13606 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13607 "11100001" // /* MW 7 */
+ 13608 "10010010" // /* MW 6 */
+ 13609 "10001011" // /* MW 5 */
+ 13610 "01000110" // /* MW 4 */
+ 13611 "00000011" // /* MW 3 */
+ 13612 "00011100" // /* MW 2 */
+ 13613 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13614 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13615 "11100001" // /* MW 7 */
+ 13616 "01010110" // /* MW 6 */
+ 13617 "10001000" // /* MW 5 */
+ 13618 "01000110" // /* MW 4 */
+ 13619 "00000111" // /* MW 3 */
+ 13620 "00011100" // /* MW 2 */
+ 13621 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13622 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13623 "00000101" // /* MW 5 */
+ 13624 "01100001" // /* MW 4 */
+ 13625 "11110100" // /* MW 3 */
+ 13626 "00101100" // /* MW 2 */
+ 13627 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13628 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13629 "01000001" // /* MW 3 */
+ 13630 "01101101" // /* MW 2 */
+ 13631 "10001100" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 13632 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13633 "00011010" // /* MW 15 */
+ 13634 "01001000" // /* MW 14 */
+ 13635 "01111100" // /* MW 13 */
+ 13636 "10100101" // /* MW 12 */
+ 13637 "00000001" // /* MW 11 */
+ 13638 "00000000" // /* MW 10 */
+ 13639 "00000000" // /* MW 9 */
+ 13640 "00000000" // /* MW 8 */
+ 13641 "01011011" // /* MW 7 */
+ 13642 "00000001" // /* MW 6 */
+ 13643 "00100000" // /* MW 5 */
+ 13644 "00000000" // /* MW 4 */
+ 13645 "11110000" // /* MW 3 */
+ 13646 "00101100" // /* MW 2 */
+ 13647 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 4 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13648 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 13649 "01101000" // /* MW 11 */
+ 13650 "10000001" // /* MW 10 */
+ 13651 "10000100" // /* MW 9 */
+ 13652 "00000010" // /* MW 8 */
+ 13653 "00100111" // /* MW 7 */
+ 13654 "00000100" // /* MW 6 */
+ 13655 "00100000" // /* MW 5 */
+ 13656 "11100111" // /* MW 4 */
+ 13657 "11111000" // /* MW 3 */
+ 13658 "00001100" // /* MW 2 */
+ 13659 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13660 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13661 "00000001" // /* MW 7 */
+ 13662 "10001001" // /* MW 6 */
+ 13663 "10001010" // /* MW 5 */
+ 13664 "01000110" // /* MW 4 */
+ 13665 "00001011" // /* MW 3 */
+ 13666 "10011100" // /* MW 2 */
+ 13667 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13668 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13669 "00000001" // /* MW 7 */
+ 13670 "00110101" // /* MW 6 */
+ 13671 "10001001" // /* MW 5 */
+ 13672 "11000110" // /* MW 4 */
+ 13673 "10000110" // /* MW 3 */
+ 13674 "00110000" // /* MW 2 */
+ 13675 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13676 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13677 "00000110" // /* MW 3 */
+ 13678 "10001001" // /* MW 2 */
+ 13679 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13680 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13681 "10100001" // /* MW 7 */
+ 13682 "01001000" // /* MW 6 */
+ 13683 "10001100" // /* MW 5 */
+ 13684 "01000110" // /* MW 4 */
+ 13685 "00001111" // /* MW 3 */
+ 13686 "10011100" // /* MW 2 */
+ 13687 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13688 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13689 "10100001" // /* MW 7 */
+ 13690 "00110110" // /* MW 6 */
+ 13691 "10001010" // /* MW 5 */
+ 13692 "11000110" // /* MW 4 */
+ 13693 "10001110" // /* MW 3 */
+ 13694 "10110000" // /* MW 2 */
+ 13695 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13696 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13697 "00001110" // /* MW 3 */
+ 13698 "10001001" // /* MW 2 */
+ 13699 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13700 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13701 "11100001" // /* MW 3 */
+ 13702 "10010010" // /* MW 2 */
+ 13703 "10001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13704 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13705 "11100001" // /* MW 3 */
+ 13706 "01010110" // /* MW 2 */
+ 13707 "10001000" // /* MW 1 */
+ 13708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13709 "00000000" // /* MW 1 */
+ 13710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13711 "00000000" // /* MW 1 */
+ 13712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13713 "00000000" // /* MW 1 */
+ 13714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13715 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+ 13716 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13717 "10010110" // /* MW 3 */
+ 13718 "00010001" // /* MW 2 */
+ 13719 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 290 first
+ 13720 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13721 "00000000" // /* MW 5 */
+ 13722 "01010000" // /* MW 4 */
+ 13723 "11000000" // /* MW 3 */
+ 13724 "00000010" // /* MW 2 */
+ 13725 "01100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13726 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13727 "01101100" // /* MW 3 */
+ 13728 "01010000" // /* MW 2 */
+ 13729 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.delay_slot
+ 13730 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13731 "00010100" // /* MW 3 */
+ 13732 "01010011" // /* MW 2 */
+ 13733 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13734 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13735 "01101100" // /* MW 3 */
+ 13736 "01010000" // /* MW 2 */
+ 13737 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.delay_slot
+ 13738 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13739 "00010011" // /* MW 3 */
+ 13740 "10001010" // /* MW 2 */
+ 13741 "00001010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33
+.delay_slot
+ 13742 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13743 "10010011" // /* MW 3 */
+ 13744 "00111010" // /* MW 2 */
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0
+ 13745 "00001010" // /* MW 1 */
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 444 first
+.src_ref 7 "superkernels.cpp" 449 6
+.function_start
+ 13760 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13761 "10000000" // /* MW 5 */
+ 13762 "11001000" // /* MW 4 */
+ 13763 "11001000" // /* MW 3 */
+ 13764 "00000111" // /* MW 2 */
+ 13765 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6 first
+ 13766 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13767 "01000001" // /* MW 5 */
+ 13768 "00101111" // /* MW 4 */
+ 13769 "11010000" // /* MW 3 */
+ 13770 "11000010" // /* MW 2 */
+ 13771 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 444
+ 13772 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13773 "00000001" // /* MW 5 */
+ 13774 "00000000" // /* MW 4 */
+ 13775 "00000000" // /* MW 3 */
+ 13776 "00010000" // /* MW 2 */
+ 13777 "00000000" // /* MW 1 */
+ 13778 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13779 "01110000" // /* MW 7 */
+ 13780 "01110000" // /* MW 6 */
+ 13781 "00101101" // /* MW 5 */
+ 13782 "00000010" // /* MW 4 */
+ 13783 "10110000" // /* MW 3 */
+ 13784 "00111010" // /* MW 2 */
+ 13785 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+ 13786 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13787 "01110000" // /* MW 7 */
+ 13788 "11110000" // /* MW 6 */
+ 13789 "10101000" // /* MW 5 */
+ 13790 "00000001" // /* MW 4 */
+ 13791 "10110000" // /* MW 3 */
+ 13792 "10110110" // /* MW 2 */
+ 13793 "11111111" // /* MW 1 */
+ 13794 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13795 "00011101" // /* MW 3 */
+ 13796 "11101100" // /* MW 2 */
+ 13797 "00001111" // /* MW 1 */
+ 13798 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13799 "10011101" // /* MW 3 */
+ 13800 "11110111" // /* MW 2 */
+ 13801 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+ 13802 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13803 "01110000" // /* MW 7 */
+ 13804 "01100000" // /* MW 6 */
+ 13805 "11001010" // /* MW 5 */
+ 13806 "00000001" // /* MW 4 */
+ 13807 "10110000" // /* MW 3 */
+ 13808 "00000010" // /* MW 2 */
+ 13809 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6
+.src_ref 7 "superkernels.cpp" 449 16
+ 13810 "10000100" // JNZ r16, #13936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13936 delay_slots=5 */
+ 13811 "00000001" // /* MW 5 */
+ 13812 "01000000" // /* MW 4 */
+ 13813 "00111000" // /* MW 3 */
+ 13814 "00011011" // /* MW 2 */
+ 13815 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 13816 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13817 "11000000" // /* MW 3 */
+ 13818 "11010110" // /* MW 2 */
+ 13819 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 22 first
+.delay_slot
+ 13820 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13821 "10010000" // /* MW 3 */
+ 13822 "01100010" // /* MW 2 */
+ 13823 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 30
+.delay_slot
+ 13824 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13825 "11111011" // /* MW 3 */
+ 13826 "01100011" // /* MW 2 */
+ 13827 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13828 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13829 "10100000" // /* MW 5 */
+ 13830 "11001000" // /* MW 4 */
+ 13831 "11000110" // /* MW 3 */
+ 13832 "00000111" // /* MW 2 */
+ 13833 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13834 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13835 "00110001" // /* MW 3 */
+ 13836 "00000110" // /* MW 2 */
+ 13837 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13838 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13839 "00010001" // /* MW 9 */
+ 13840 "00110100" // /* MW 8 */
+ 13841 "10110010" // /* MW 7 */
+ 13842 "11110000" // /* MW 6 */
+ 13843 "00000001" // /* MW 5 */
+ 13844 "00000000" // /* MW 4 */
+ 13845 "01100000" // /* MW 3 */
+ 13846 "10010001" // /* MW 2 */
+ 13847 "11110000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13848 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13849 "00010000" // /* MW 11 */
+ 13850 "00110010" // /* MW 10 */
+ 13851 "10110010" // /* MW 9 */
+ 13852 "11110000" // /* MW 8 */
+ 13853 "00000001" // /* MW 7 */
+ 13854 "00000000" // /* MW 6 */
+ 13855 "10001011" // /* MW 5 */
+ 13856 "10001000" // /* MW 4 */
+ 13857 "11100000" // /* MW 3 */
+ 13858 "11000000" // /* MW 2 */
+ 13859 "00100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13861 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 13862 "00000100" // JL #12352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12352 delay_slots=5 */
+ 13863 "00000001" // /* MW 5 */
+ 13864 "00000000" // /* MW 4 */
+ 13865 "00100000" // /* MW 3 */
+ 13866 "00011000" // /* MW 2 */
+ 13867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13869 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13871 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13872 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13873 "00110001" // /* MW 3 */
+ 13874 "00100000" // /* MW 2 */
+ 13875 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 13876 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13877 "00000101" // /* MW 3 */
+ 13878 "00100000" // /* MW 2 */
+ 13879 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 13880 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13881 "01110000" // /* MW 7 */
+ 13882 "10100101" // /* MW 6 */
+ 13883 "00000001" // /* MW 5 */
+ 13884 "00000000" // /* MW 4 */
+ 13885 "00110000" // /* MW 3 */
+ 13886 "11000010" // /* MW 2 */
+ 13887 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+.src_ref 7 "superkernels.cpp" 461 2
+.return_address
+ 13888 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13889 "00000000" // /* MW 7 */
+ 13890 "10000010" // /* MW 6 */
+ 13891 "00110011" // /* MW 5 */
+ 13892 "00000001" // /* MW 4 */
+ 13893 "01100000" // /* MW 3 */
+ 13894 "10010001" // /* MW 2 */
+ 13895 "00110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 17 first
+ 13896 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13897 "00111010" // /* MW 3 */
+ 13898 "00000110" // /* MW 2 */
+ 13899 "00000010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13
+.src_ref 7 "superkernels.cpp" 453 15 first
+ 13900 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13901 "00010000" // /* MW 9 */
+ 13902 "00110000" // /* MW 8 */
+ 13903 "00110010" // /* MW 7 */
+ 13904 "11110001" // /* MW 6 */
+ 13905 "00000001" // /* MW 5 */
+ 13906 "00000000" // /* MW 4 */
+ 13907 "01010000" // /* MW 3 */
+ 13908 "11000011" // /* MW 2 */
+ 13909 "01000100" // /* MW 1 */
+ 13910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13911 "00000000" // /* MW 1 */
+ 13912 "10000100" // J #13952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13952 delay_slots=5 */
+ 13913 "00000000" // /* MW 5 */
+ 13914 "00000000" // /* MW 4 */
+ 13915 "01000000" // /* MW 3 */
+ 13916 "00011011" // /* MW 2 */
+ 13917 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15
+.src_ref 7 "superkernels.cpp" 457 26
+.delay_slot
+ 13918 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13919 "10110000" // /* MW 5 */
+ 13920 "11001000" // /* MW 4 */
+ 13921 "11000110" // /* MW 3 */
+ 13922 "00000111" // /* MW 2 */
+ 13923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13927 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15 first
+.delay_slot
+ 13928 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13929 "00110001" // /* MW 3 */
+ 13930 "00000110" // /* MW 2 */
+ 13931 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13 first
+.delay_slot
+ 13932 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13933 "00010001" // /* MW 3 */
+ 13934 "00000110" // /* MW 2 */
+ 13935 "00001010" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176
+.src_ref 7 "superkernels.cpp" 457 26
+ 13936 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13937 "00000000" // /* MW 15 */
+ 13938 "00000000" // /* MW 14 */
+ 13939 "00010000" // /* MW 13 */
+ 13940 "00101100" // /* MW 12 */
+ 13941 "10110010" // /* MW 11 */
+ 13942 "11110001" // /* MW 10 */
+ 13943 "00000001" // /* MW 9 */
+ 13944 "00000000" // /* MW 8 */
+ 13945 "01011011" // /* MW 7 */
+ 13946 "00000001" // /* MW 6 */
+ 13947 "00100000" // /* MW 5 */
+ 13948 "00000000" // /* MW 4 */
+ 13949 "11110000" // /* MW 3 */
+ 13950 "00101100" // /* MW 2 */
+ 13951 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 13952 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13953 "10000110" // /* MW 3 */
+ 13954 "01100111" // /* MW 2 */
+ 13955 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15
+.src_ref 1 "io_buffer_main.h" 218 49
+ 13956 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13957 "00010000" // /* MW 9 */
+ 13958 "00101000" // /* MW 8 */
+ 13959 "00110010" // /* MW 7 */
+ 13960 "11110010" // /* MW 6 */
+ 13961 "00000001" // /* MW 5 */
+ 13962 "00000000" // /* MW 4 */
+ 13963 "11010000" // /* MW 3 */
+ 13964 "11101110" // /* MW 2 */
+ 13965 "01011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 13966 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13967 "00010110" // /* MW 3 */
+ 13968 "11111110" // /* MW 2 */
+ 13969 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 13970 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13971 "00110110" // /* MW 3 */
+ 13972 "11111110" // /* MW 2 */
+ 13973 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 13974 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13975 "01010110" // /* MW 3 */
+ 13976 "01000110" // /* MW 2 */
+ 13977 "00000010" // /* MW 1 */
+ 13978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13979 "00000000" // /* MW 1 */
+ 13980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13981 "00000000" // /* MW 1 */
+ 13982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13983 "00000000" // /* MW 1 */
+ 13984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13985 "00000000" // /* MW 1 */
+ 13986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13987 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 13988 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13989 "00000010" // /* MW 3 */
+ 13990 "01100001" // /* MW 2 */
+ 13991 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 13992 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13993 "00010001" // /* MW 3 */
+ 13994 "00000110" // /* MW 2 */
+ 13995 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 13996 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13997 "11111101" // /* MW 3 */
+ 13998 "11100000" // /* MW 2 */
+ 13999 "00010111" // /* MW 1 */
+ 14000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14001 "00000000" // /* MW 1 */
+ 14002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14003 "00000000" // /* MW 1 */
+ 14004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14005 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14006 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14007 "00001000" // /* MW 3 */
+ 14008 "10010011" // /* MW 2 */
+ 14009 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11
+.src_ref 7 "superkernels.cpp" 459 47
+.src_ref 7 "superkernels.cpp" 464 6
+.src_ref 7 "superkernels.cpp" 465 16
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 14010 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14011 "00010000" // /* MW 9 */
+ 14012 "00100000" // /* MW 8 */
+ 14013 "10110010" // /* MW 7 */
+ 14014 "11110011" // /* MW 6 */
+ 14015 "00000001" // /* MW 5 */
+ 14016 "00000000" // /* MW 4 */
+ 14017 "00000000" // /* MW 3 */
+ 14018 "00101111" // /* MW 2 */
+ 14019 "00000000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+ 14020 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14021 "11000001" // /* MW 5 */
+ 14022 "00101011" // /* MW 4 */
+ 14023 "00101000" // /* MW 3 */
+ 14024 "00000000" // /* MW 2 */
+ 14025 "00000110" // /* MW 1 */
+ 14026 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14027 "01011010" // /* MW 3 */
+ 14028 "01101000" // /* MW 2 */
+ 14029 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 14030 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14031 "10000001" // /* MW 5 */
+ 14032 "00101001" // /* MW 4 */
+ 14033 "00100111" // /* MW 3 */
+ 14034 "11010011" // /* MW 2 */
+ 14035 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15 first
+ 14036 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14037 "00110110" // /* MW 3 */
+ 14038 "00000110" // /* MW 2 */
+ 14039 "00000100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 26
+.src_ref 7 "superkernels.cpp" 461 2
+ 14040 "10111010" // LDA r16, [p3]; MOVXM p3, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14041 "00010000" // /* MW 9 */
+ 14042 "11100000" // /* MW 8 */
+ 14043 "10110011" // /* MW 7 */
+ 14044 "11110001" // /* MW 6 */
+ 14045 "00000001" // /* MW 5 */
+ 14046 "00000000" // /* MW 4 */
+ 14047 "11010000" // /* MW 3 */
+ 14048 "11000010" // /* MW 2 */
+ 14049 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 14050 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14051 "01010110" // /* MW 3 */
+ 14052 "00000110" // /* MW 2 */
+ 14053 "00000111" // /* MW 1 */
+ 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14055 "00000000" // /* MW 1 */
+ 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14057 "00000000" // /* MW 1 */
+ 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14059 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 14060 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14061 "01110110" // /* MW 3 */
+ 14062 "00000110" // /* MW 2 */
+ 14063 "00000101" // /* MW 1 */
+ 14064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14065 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 24 first
+ 14066 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14067 "00001111" // /* MW 3 */
+ 14068 "01100001" // /* MW 2 */
+ 14069 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 14070 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14071 "00000111" // /* MW 3 */
+ 14072 "10100010" // /* MW 2 */
+ 14073 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+ 14074 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14075 "11111101" // /* MW 3 */
+ 14076 "00100000" // /* MW 2 */
+ 14077 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2 first
+.no_stack_arguments
+ 14078 "00000100" // JL #13024 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13024 delay_slots=5 */
+ 14079 "00000001" // /* MW 5 */
+ 14080 "00000000" // /* MW 4 */
+ 14081 "01110000" // /* MW 3 */
+ 14082 "00011001" // /* MW 2 */
+ 14083 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+.delay_slot
+ 14084 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14085 "00110001" // /* MW 3 */
+ 14086 "00000110" // /* MW 2 */
+ 14087 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+.delay_slot
+ 14088 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14089 "11000001" // /* MW 3 */
+ 14090 "01001001" // /* MW 2 */
+ 14091 "00011000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 201 10 first
+.delay_slot
+ 14092 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14093 "00100101" // /* MW 3 */
+ 14094 "10110100" // /* MW 2 */
+ 14095 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16 first
+.delay_slot
+ 14096 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14097 "00010101" // /* MW 3 */
+ 14098 "10111011" // /* MW 2 */
+ 14099 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+.delay_slot
+ 14100 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14101 "11000001" // /* MW 11 */
+ 14102 "10001010" // /* MW 10 */
+ 14103 "11011111" // /* MW 9 */
+ 14104 "00000011" // /* MW 8 */
+ 14105 "00000000" // /* MW 7 */
+ 14106 "00000000" // /* MW 6 */
+ 14107 "00100000" // /* MW 5 */
+ 14108 "00000000" // /* MW 4 */
+ 14109 "11110000" // /* MW 3 */
+ 14110 "00101100" // /* MW 2 */
+ 14111 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 14112 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14113 "00001010" // /* MW 3 */
+ 14114 "01100111" // /* MW 2 */
+ 14115 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 14116 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14117 "00010110" // /* MW 3 */
+ 14118 "00000110" // /* MW 2 */
+ 14119 "00000010" // /* MW 1 */
+ 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14121 "00000000" // /* MW 1 */
+ 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14123 "00000000" // /* MW 1 */
+ 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14125 "00000000" // /* MW 1 */
+ 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14127 "00000000" // /* MW 1 */
+ 14128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14129 "00000000" // /* MW 1 */
+ 14130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14131 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 14132 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14133 "11111000" // /* MW 3 */
+ 14134 "00010000" // /* MW 2 */
+ 14135 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 14136 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14137 "00010000" // /* MW 9 */
+ 14138 "00110000" // /* MW 8 */
+ 14139 "10110010" // /* MW 7 */
+ 14140 "11110000" // /* MW 6 */
+ 14141 "00000001" // /* MW 5 */
+ 14142 "00000000" // /* MW 4 */
+ 14143 "11010000" // /* MW 3 */
+ 14144 "11000010" // /* MW 2 */
+ 14145 "01011100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19 first
+ 14146 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14147 "01010110" // /* MW 3 */
+ 14148 "00000110" // /* MW 2 */
+ 14149 "00000001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 14150 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14151 "00110110" // /* MW 3 */
+ 14152 "00000110" // /* MW 2 */
+ 14153 "00000111" // /* MW 1 */
+ 14154 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14155 "10011001" // /* MW 3 */
+ 14156 "11110100" // /* MW 2 */
+ 14157 "00000111" // /* MW 1 */
+ 14158 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14159 "11010001" // /* MW 3 */
+ 14160 "11111001" // /* MW 2 */
+ 14161 "00000111" // /* MW 1 */
+ 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14163 "00000000" // /* MW 1 */
+ 14164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14165 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 14166 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14167 "00000001" // /* MW 3 */
+ 14168 "11100001" // /* MW 2 */
+ 14169 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 14170 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14171 "00010001" // /* MW 3 */
+ 14172 "11100110" // /* MW 2 */
+ 14173 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 16 first
+ 14174 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14175 "00101000" // /* MW 3 */
+ 14176 "01100001" // /* MW 2 */
+ 14177 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 14178 "10000100" // JNZ r16, #14208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14208 delay_slots=5 */
+ 14179 "00000001" // /* MW 5 */
+ 14180 "01000000" // /* MW 4 */
+ 14181 "11000000" // /* MW 3 */
+ 14182 "00011011" // /* MW 2 */
+ 14183 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16
+.delay_slot
+ 14184 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14185 "00000001" // /* MW 3 */
+ 14186 "00110000" // /* MW 2 */
+ 14187 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14189 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14191 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14193 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14195 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16 first
+ 14196 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14197 "11000001" // /* MW 11 */
+ 14198 "10001000" // /* MW 10 */
+ 14199 "10000011" // /* MW 9 */
+ 14200 "00000011" // /* MW 8 */
+ 14201 "00000000" // /* MW 7 */
+ 14202 "00000000" // /* MW 6 */
+ 14203 "00100000" // /* MW 5 */
+ 14204 "00000000" // /* MW 4 */
+ 14205 "11110000" // /* MW 3 */
+ 14206 "00101100" // /* MW 2 */
+ 14207 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 467
+ 14208 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14209 "01000001" // /* MW 5 */
+ 14210 "11101101" // /* MW 4 */
+ 14211 "00101110" // /* MW 3 */
+ 14212 "10110110" // /* MW 2 */
+ 14213 "11111111" // /* MW 1 */
+ 14214 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14215 "11110001" // /* MW 3 */
+ 14216 "11110001" // /* MW 2 */
+ 14217 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467 first
+ 14218 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 14219 "00000000" // /* MW 3 */
+ 14220 "00101000" // /* MW 2 */
+ 14221 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+.delay_slot
+ 14222 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14223 "00000001" // /* MW 5 */
+ 14224 "00000000" // /* MW 4 */
+ 14225 "00000000" // /* MW 3 */
+ 14226 "11110000" // /* MW 2 */
+ 14227 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14229 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14233 "00000000" // /* MW 1 */
+.delay_slot
+ 14234 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14235 "11000000" // /* MW 3 */
+ 14236 "01100010" // /* MW 2 */
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 14237 "00011111" // /* MW 1 */
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+.function superkernel_conv_eltbinary _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+.src_ref 7 "superkernels.cpp" 578
+.src_ref 7 "superkernels.cpp" 578 first
+.function_start
+ 14240 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14241 "00000001" // /* MW 5 */
+ 14242 "00000000" // /* MW 4 */
+ 14243 "00000000" // /* MW 3 */
+ 14244 "00001000" // /* MW 2 */
+ 14245 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6
+ 14246 "00111010" // ST p7, [sp, #-8]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14247 "00010001" // /* MW 9 */
+ 14248 "00100000" // /* MW 8 */
+ 14249 "10110010" // /* MW 7 */
+ 14250 "11110011" // /* MW 6 */
+ 14251 "00000001" // /* MW 5 */
+ 14252 "00000000" // /* MW 4 */
+ 14253 "10110000" // /* MW 3 */
+ 14254 "01110011" // /* MW 2 */
+ 14255 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6 first
+ 14256 "10111010" // LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14257 "01110010" // /* MW 9 */
+ 14258 "01110000" // /* MW 8 */
+ 14259 "00101101" // /* MW 7 */
+ 14260 "10000010" // /* MW 6 */
+ 14261 "00011101" // /* MW 5 */
+ 14262 "11111111" // /* MW 4 */
+ 14263 "11010111" // /* MW 3 */
+ 14264 "11000010" // /* MW 2 */
+ 14265 "11100000" // /* MW 1 */
+ 14266 "10011000" // ST p4, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14267 "00011101" // /* MW 3 */
+ 14268 "11110110" // /* MW 2 */
+ 14269 "00001111" // /* MW 1 */
+ 14270 "10011000" // ST p2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14271 "00011101" // /* MW 3 */
+ 14272 "11110001" // /* MW 2 */
+ 14273 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 7 "superkernels.cpp" 590 35
+.src_ref 7 "superkernels.cpp" 599 105
+.src_ref 7 "superkernels.cpp" 629 34
+ 14274 "00000010" // ST lr, [sp, #-20]; MOV p7, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 14275 "01110000" // /* MW 7 */
+ 14276 "01100000" // /* MW 6 */
+ 14277 "10110011" // /* MW 5 */
+ 14278 "00000011" // /* MW 4 */
+ 14279 "10110000" // /* MW 3 */
+ 14280 "10000111" // /* MW 2 */
+ 14281 "11111101" // /* MW 1 */
+ 14282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14283 "00000000" // /* MW 1 */
+ 14284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14285 "00000000" // /* MW 1 */
+ 14286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14287 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6
+.src_ref 7 "superkernels.cpp" 583 16
+ 14288 "10000100" // JNZ r16, #14688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14688 delay_slots=5 */
+ 14289 "00000001" // /* MW 5 */
+ 14290 "01000000" // /* MW 4 */
+ 14291 "10110000" // /* MW 3 */
+ 14292 "00011100" // /* MW 2 */
+ 14293 "10000000" // /* MW 1 */
+.delay_slot
+ 14294 "10011000" // ST p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14295 "00011101" // /* MW 3 */
+ 14296 "11101000" // /* MW 2 */
+ 14297 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 22 first
+.delay_slot
+ 14298 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14299 "10010000" // /* MW 3 */
+ 14300 "01100010" // /* MW 2 */
+ 14301 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 30
+.delay_slot
+ 14302 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14303 "11111011" // /* MW 3 */
+ 14304 "01100011" // /* MW 2 */
+ 14305 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 11
+.delay_slot
+ 14306 "01000100" // MOVXM p6, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14307 "10100000" // /* MW 5 */
+ 14308 "11001000" // /* MW 4 */
+ 14309 "11001100" // /* MW 3 */
+ 14310 "00000111" // /* MW 2 */
+ 14311 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 11
+.delay_slot
+ 14312 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14313 "00110001" // /* MW 3 */
+ 14314 "00000110" // /* MW 2 */
+ 14315 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 14316 "10111010" // MOVA r0, #1; MOVXM p6, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14317 "00010000" // /* MW 9 */
+ 14318 "00110100" // /* MW 8 */
+ 14319 "00110010" // /* MW 7 */
+ 14320 "11110011" // /* MW 6 */
+ 14321 "00000001" // /* MW 5 */
+ 14322 "00000000" // /* MW 4 */
+ 14323 "00000000" // /* MW 3 */
+ 14324 "00100000" // /* MW 2 */
+ 14325 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 14326 "01110110" // ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14327 "00010000" // /* MW 11 */
+ 14328 "00110010" // /* MW 10 */
+ 14329 "00110010" // /* MW 9 */
+ 14330 "11110000" // /* MW 8 */
+ 14331 "00000001" // /* MW 7 */
+ 14332 "00000000" // /* MW 6 */
+ 14333 "10001011" // /* MW 5 */
+ 14334 "10000100" // /* MW 4 */
+ 14335 "11100110" // /* MW 3 */
+ 14336 "11000000" // /* MW 2 */
+ 14337 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 7 "superkernels.cpp" 587 4
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14338 "10111010" // MOVA r1, #0; MOVXM p1, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14339 "00010000" // /* MW 9 */
+ 14340 "00000000" // /* MW 8 */
+ 14341 "10110011" // /* MW 7 */
+ 14342 "11110000" // /* MW 6 */
+ 14343 "00000001" // /* MW 5 */
+ 14344 "00000000" // /* MW 4 */
+ 14345 "00000000" // /* MW 3 */
+ 14346 "00000001" // /* MW 2 */
+ 14347 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 14348 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */
+ 14349 "00000001" // /* MW 5 */
+ 14350 "00000000" // /* MW 4 */
+ 14351 "01100000" // /* MW 3 */
+ 14352 "00000101" // /* MW 2 */
+ 14353 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14355 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14357 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14358 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14359 "00110001" // /* MW 3 */
+ 14360 "00100000" // /* MW 2 */
+ 14361 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 14362 "00101100" // NOPA; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14363 "00001010" // /* MW 5 */
+ 14364 "01000000" // /* MW 4 */
+ 14365 "11110000" // /* MW 3 */
+ 14366 "00101100" // /* MW 2 */
+ 14367 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 14368 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14369 "00000000" // /* MW 15 */
+ 14370 "00000000" // /* MW 14 */
+ 14371 "01111000" // /* MW 13 */
+ 14372 "01100000" // /* MW 12 */
+ 14373 "00110111" // /* MW 11 */
+ 14374 "00000000" // /* MW 10 */
+ 14375 "00000000" // /* MW 9 */
+ 14376 "10000000" // /* MW 8 */
+ 14377 "00010001" // /* MW 7 */
+ 14378 "00000110" // /* MW 6 */
+ 14379 "00100000" // /* MW 5 */
+ 14380 "00000000" // /* MW 4 */
+ 14381 "11110000" // /* MW 3 */
+ 14382 "00101100" // /* MW 2 */
+ 14383 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 590 35
+.src_ref 7 "superkernels.cpp" 591 4
+.return_address
+ 14384 "01100100" // MOVX r16, #1; MOV dj0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14385 "00000001" // /* MW 5 */
+ 14386 "00000001" // /* MW 4 */
+ 14387 "10100001" // /* MW 3 */
+ 14388 "00000000" // /* MW 2 */
+ 14389 "00000100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 590 35 first
+ 14390 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14391 "01010110" // /* MW 3 */
+ 14392 "00000010" // /* MW 2 */
+ 14393 "00000111" // /* MW 1 */
+ 14394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14395 "00000000" // /* MW 1 */
+ 14396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14397 "00000000" // /* MW 1 */
+ 14398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14399 "00000000" // /* MW 1 */
+ 14400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14401 "00000000" // /* MW 1 */
+ 14402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14403 "00000000" // /* MW 1 */
+ 14404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14405 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4 first
+ 14406 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14407 "00000111" // /* MW 3 */
+ 14408 "10100001" // /* MW 2 */
+ 14409 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4
+ 14410 "10000100" // JNZ r16, #14544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14544 delay_slots=5 */
+ 14411 "00000001" // /* MW 5 */
+ 14412 "01000000" // /* MW 4 */
+ 14413 "01101000" // /* MW 3 */
+ 14414 "00011100" // /* MW 2 */
+ 14415 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 105
+.delay_slot
+ 14416 "11111000" // MOV r17, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14417 "11000000" // /* MW 3 */
+ 14418 "01011110" // /* MW 2 */
+ 14419 "00011100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 105 first
+.delay_slot
+ 14420 "00011000" // ADD.NC dc0, r17, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14421 "10010000" // /* MW 3 */
+ 14422 "11001000" // /* MW 2 */
+ 14423 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14427 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14429 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4 first
+ 14430 "10000100" // JNZ r18, #14512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14512 delay_slots=5 */
+ 14431 "00000001" // /* MW 5 */
+ 14432 "01000000" // /* MW 4 */
+ 14433 "01011000" // /* MW 3 */
+ 14434 "00011100" // /* MW 2 */
+ 14435 "10010000" // /* MW 1 */
+.delay_slot
+ 14436 "01000100" // MOVXM r16, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14437 "00000000" // /* MW 5 */
+ 14438 "00101100" // /* MW 4 */
+ 14439 "11001000" // /* MW 3 */
+ 14440 "00000111" // /* MW 2 */
+ 14441 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 27
+.delay_slot
+ 14442 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14443 "00000001" // /* MW 3 */
+ 14444 "00100010" // /* MW 2 */
+ 14445 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14451 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8 first
+.no_stack_arguments
+ 14452 "00111010" // ST p6, [sp, #-28]; JL #11136 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11136 delay_slots=5 */
+ 14453 "01000001" // /* MW 9 */
+ 14454 "00000000" // /* MW 8 */
+ 14455 "00000000" // /* MW 7 */
+ 14456 "01110000" // /* MW 6 */
+ 14457 "00000101" // /* MW 5 */
+ 14458 "00000000" // /* MW 4 */
+ 14459 "10110000" // /* MW 3 */
+ 14460 "11100011" // /* MW 2 */
+ 14461 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 595 38
+.delay_slot
+ 14462 "01000100" // MOVXM p6, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14463 "10000000" // /* MW 5 */
+ 14464 "11001010" // /* MW 4 */
+ 14465 "11001100" // /* MW 3 */
+ 14466 "00000111" // /* MW 2 */
+ 14467 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8
+.delay_slot
+ 14468 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14469 "10000000" // /* MW 5 */
+ 14470 "11001010" // /* MW 4 */
+ 14471 "11000000" // /* MW 3 */
+ 14472 "00000111" // /* MW 2 */
+ 14473 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8
+.delay_slot
+ 14474 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14475 "10000000" // /* MW 3 */
+ 14476 "01100001" // /* MW 2 */
+ 14477 "00011001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14480 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14481 "00000000" // /* MW 15 */
+ 14482 "00000000" // /* MW 14 */
+ 14483 "01111000" // /* MW 13 */
+ 14484 "10100101" // /* MW 12 */
+ 14485 "00000001" // /* MW 11 */
+ 14486 "00000000" // /* MW 10 */
+ 14487 "00000000" // /* MW 9 */
+ 14488 "00000000" // /* MW 8 */
+ 14489 "01011011" // /* MW 7 */
+ 14490 "00000001" // /* MW 6 */
+ 14491 "00100000" // /* MW 5 */
+ 14492 "00000000" // /* MW 4 */
+ 14493 "11110000" // /* MW 3 */
+ 14494 "00101100" // /* MW 2 */
+ 14495 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 595 38 first
+.return_address
+ 14496 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14497 "00010000" // /* MW 9 */
+ 14498 "00000000" // /* MW 8 */
+ 14499 "00001011" // /* MW 7 */
+ 14500 "11110010" // /* MW 6 */
+ 14501 "00000001" // /* MW 5 */
+ 14502 "00000000" // /* MW 4 */
+ 14503 "11010000" // /* MW 3 */
+ 14504 "11000110" // /* MW 2 */
+ 14505 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14506 "00111100" // LDA p6, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14507 "00100000" // /* MW 5 */
+ 14508 "00000000" // /* MW 4 */
+ 14509 "00100000" // /* MW 3 */
+ 14510 "11100011" // /* MW 2 */
+ 14511 "11111100" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272
+ 14512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14513 "00000000" // /* MW 1 */
+ 14514 "10000100" // J #14592 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=14592 delay_slots=5 */
+ 14515 "00000000" // /* MW 5 */
+ 14516 "00000000" // /* MW 4 */
+ 14517 "10000000" // /* MW 3 */
+ 14518 "00011100" // /* MW 2 */
+ 14519 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14521 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14527 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.delay_slot
+ 14528 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14529 "00000000" // /* MW 15 */
+ 14530 "00000000" // /* MW 14 */
+ 14531 "01111000" // /* MW 13 */
+ 14532 "01100000" // /* MW 12 */
+ 14533 "10110110" // /* MW 11 */
+ 14534 "00000000" // /* MW 10 */
+ 14535 "00000000" // /* MW 9 */
+ 14536 "00000000" // /* MW 8 */
+ 14537 "01011011" // /* MW 7 */
+ 14538 "00000001" // /* MW 6 */
+ 14539 "00100000" // /* MW 5 */
+ 14540 "00000000" // /* MW 4 */
+ 14541 "11110000" // /* MW 3 */
+ 14542 "00101100" // /* MW 2 */
+ 14543 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304
+.src_ref 7 "superkernels.cpp" 599 8 first
+.no_stack_arguments
+ 14544 "00111010" // ST p6, [sp, #-28]; JL #11296 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */
+ 14545 "01000001" // /* MW 9 */
+ 14546 "00000000" // /* MW 8 */
+ 14547 "00000000" // /* MW 7 */
+ 14548 "10000100" // /* MW 6 */
+ 14549 "00000101" // /* MW 5 */
+ 14550 "00000000" // /* MW 4 */
+ 14551 "10110000" // /* MW 3 */
+ 14552 "11100011" // /* MW 2 */
+ 14553 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 600 38
+.delay_slot
+ 14554 "01000100" // MOVXM p6, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14555 "00000000" // /* MW 5 */
+ 14556 "11001011" // /* MW 4 */
+ 14557 "11001100" // /* MW 3 */
+ 14558 "00000111" // /* MW 2 */
+ 14559 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 8
+.delay_slot
+ 14560 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14561 "00000000" // /* MW 5 */
+ 14562 "11001011" // /* MW 4 */
+ 14563 "11000000" // /* MW 3 */
+ 14564 "00000111" // /* MW 2 */
+ 14565 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 8
+.delay_slot
+ 14566 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14567 "10000000" // /* MW 3 */
+ 14568 "01100001" // /* MW 2 */
+ 14569 "00011001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14571 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14572 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14573 "01100111" // /* MW 3 */
+ 14574 "00000001" // /* MW 2 */
+ 14575 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 600 38 first
+.return_address
+ 14576 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14577 "00010000" // /* MW 9 */
+ 14578 "00000000" // /* MW 8 */
+ 14579 "00001011" // /* MW 7 */
+ 14580 "11110010" // /* MW 6 */
+ 14581 "00000001" // /* MW 5 */
+ 14582 "00000000" // /* MW 4 */
+ 14583 "11010000" // /* MW 3 */
+ 14584 "11000110" // /* MW 2 */
+ 14585 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14586 "00111100" // LDA p1, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14587 "00100000" // /* MW 5 */
+ 14588 "00000000" // /* MW 4 */
+ 14589 "00100000" // /* MW 3 */
+ 14590 "10010011" // /* MW 2 */
+ 14591 "11111100" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352
+ 14592 "10011000" // ADD.NC p3, r16, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14593 "00000101" // /* MW 3 */
+ 14594 "01101000" // /* MW 2 */
+ 14595 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 35 first
+.src_ref 7 "superkernels.cpp" 611 18
+ 14596 "10111010" // LDA.u8 r19, [p3], #7; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14597 "00010000" // /* MW 9 */
+ 14598 "00101000" // /* MW 8 */
+ 14599 "00110010" // /* MW 7 */
+ 14600 "11110011" // /* MW 6 */
+ 14601 "00000001" // /* MW 5 */
+ 14602 "00000000" // /* MW 4 */
+ 14603 "01010000" // /* MW 3 */
+ 14604 "11001101" // /* MW 2 */
+ 14605 "01101111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 18 first
+ 14606 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14607 "01010110" // /* MW 3 */
+ 14608 "00000110" // /* MW 2 */
+ 14609 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 37 first
+ 14610 "10011000" // LDA.u16 r21, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14611 "10111010" // /* MW 3 */
+ 14612 "00011110" // /* MW 2 */
+ 14613 "00000011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 73
+ 14614 "10011000" // LDA.u16 r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14615 "00011010" // /* MW 3 */
+ 14616 "00000110" // /* MW 2 */
+ 14617 "00000011" // /* MW 1 */
+ 14618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14619 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 110
+ 14620 "10011000" // LDA.u16 r20, [p3, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14621 "10011010" // /* MW 3 */
+ 14622 "00010110" // /* MW 2 */
+ 14623 "00000011" // /* MW 1 */
+ 14624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14625 "00000000" // /* MW 1 */
+ 14626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14627 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 19
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 14628 "01000100" // MOVXM p0, #508996 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14629 "10001000" // /* MW 5 */
+ 14630 "11001000" // /* MW 4 */
+ 14631 "11000000" // /* MW 3 */
+ 14632 "00000111" // /* MW 2 */
+ 14633 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 57
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 14634 "10011000" // MUL r19, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14635 "01011111" // /* MW 3 */
+ 14636 "11100111" // /* MW 2 */
+ 14637 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 19 first
+.src_ref 7 "superkernels.cpp" 611 16
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14638 "00111010" // ST r19, [p0]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14639 "00010001" // /* MW 9 */
+ 14640 "00101110" // /* MW 8 */
+ 14641 "00110010" // /* MW 7 */
+ 14642 "11110001" // /* MW 6 */
+ 14643 "00000001" // /* MW 5 */
+ 14644 "00000000" // /* MW 4 */
+ 14645 "00110000" // /* MW 3 */
+ 14646 "11001110" // /* MW 2 */
+ 14647 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 94 first
+ 14648 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14649 "00001111" // /* MW 3 */
+ 14650 "11100001" // /* MW 2 */
+ 14651 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 27 first
+ 14652 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14653 "00101111" // /* MW 3 */
+ 14654 "01100011" // /* MW 2 */
+ 14655 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 28 first
+ 14656 "10011000" // MUL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14657 "00001111" // /* MW 3 */
+ 14658 "00100001" // /* MW 2 */
+ 14659 "00010101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 13
+.src_ref 7 "superkernels.cpp" 611 16 first
+ 14660 "01110110" // NOPA; ST r17, [p2]; MOVXM p6, #509024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14661 "00010000" // /* MW 11 */
+ 14662 "00110000" // /* MW 10 */
+ 14663 "00110010" // /* MW 9 */
+ 14664 "11110011" // /* MW 8 */
+ 14665 "00000001" // /* MW 7 */
+ 14666 "10000000" // /* MW 6 */
+ 14667 "00110001" // /* MW 5 */
+ 14668 "00000110" // /* MW 4 */
+ 14669 "11110010" // /* MW 3 */
+ 14670 "00101100" // /* MW 2 */
+ 14671 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 13 first
+ 14672 "11100001" // NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14673 "00000000" // /* MW 15 */
+ 14674 "00000000" // /* MW 14 */
+ 14675 "01111000" // /* MW 13 */
+ 14676 "10100101" // /* MW 12 */
+ 14677 "00000001" // /* MW 11 */
+ 14678 "00000000" // /* MW 10 */
+ 14679 "00000000" // /* MW 9 */
+ 14680 "10000000" // /* MW 8 */
+ 14681 "00010001" // /* MW 7 */
+ 14682 "00000110" // /* MW 6 */
+ 14683 "00100110" // /* MW 5 */
+ 14684 "00000000" // /* MW 4 */
+ 14685 "11110000" // /* MW 3 */
+ 14686 "00101100" // /* MW 2 */
+ 14687 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448
+.src_ref 7 "superkernels.cpp" 614 12
+ 14688 "01000100" // MOVXM p0, #509000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14689 "10010000" // /* MW 5 */
+ 14690 "11001000" // /* MW 4 */
+ 14691 "11000000" // /* MW 3 */
+ 14692 "00000111" // /* MW 2 */
+ 14693 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.src_ref 7 "superkernels.cpp" 616 11
+ 14694 "10111010" // LDA r16, [p0]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14695 "00010000" // /* MW 9 */
+ 14696 "00100000" // /* MW 8 */
+ 14697 "00110010" // /* MW 7 */
+ 14698 "11110001" // /* MW 6 */
+ 14699 "00000001" // /* MW 5 */
+ 14700 "00000000" // /* MW 4 */
+ 14701 "11010000" // /* MW 3 */
+ 14702 "11000010" // /* MW 2 */
+ 14703 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13
+.src_ref 7 "superkernels.cpp" 616 11 first
+ 14704 "10111010" // LDA r17, [p2]; MOVXM p6, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14705 "00010000" // /* MW 9 */
+ 14706 "00100110" // /* MW 8 */
+ 14707 "00110010" // /* MW 7 */
+ 14708 "11110011" // /* MW 6 */
+ 14709 "00000001" // /* MW 5 */
+ 14710 "00000000" // /* MW 4 */
+ 14711 "11010000" // /* MW 3 */
+ 14712 "11000110" // /* MW 2 */
+ 14713 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+ 14714 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14715 "01010110" // /* MW 3 */
+ 14716 "00000110" // /* MW 2 */
+ 14717 "00000110" // /* MW 1 */
+ 14718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14719 "00000000" // /* MW 1 */
+ 14720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14721 "00000000" // /* MW 1 */
+ 14722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14723 "00000000" // /* MW 1 */
+ 14724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14725 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 618 6 first
+.src_ref 7 "superkernels.cpp" 618 17 first
+ 14726 "10000100" // JNZ r16, #14832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14832 delay_slots=5 */
+ 14727 "00000001" // /* MW 5 */
+ 14728 "01000000" // /* MW 4 */
+ 14729 "11111000" // /* MW 3 */
+ 14730 "00011100" // /* MW 2 */
+ 14731 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.src_ref 7 "superkernels.cpp" 616 11 first
+.delay_slot
+ 14732 "00100100" // ADD r17, r17, #1; ADD.NC r19, r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14733 "00000001" // /* MW 5 */
+ 14734 "10110000" // /* MW 4 */
+ 14735 "11101001" // /* MW 3 */
+ 14736 "01000000" // /* MW 2 */
+ 14737 "10001100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+.delay_slot
+ 14738 "00011000" // ADD r18, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14739 "00000111" // /* MW 3 */
+ 14740 "10100100" // /* MW 2 */
+ 14741 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 616 11 first
+.delay_slot
+ 14742 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14743 "00110001" // /* MW 3 */
+ 14744 "00000110" // /* MW 2 */
+ 14745 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+.delay_slot
+ 14746 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14747 "01010001" // /* MW 3 */
+ 14748 "00000110" // /* MW 2 */
+ 14749 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.delay_slot
+ 14750 "10011000" // ST r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14751 "01110001" // /* MW 3 */
+ 14752 "00000110" // /* MW 2 */
+ 14753 "00001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14754 "00011000" // LDA r17, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14755 "00110001" // /* MW 3 */
+ 14756 "11110110" // /* MW 2 */
+ 14757 "00000111" // /* MW 1 */
+ 14758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14759 "00000000" // /* MW 1 */
+ 14760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14761 "00000000" // /* MW 1 */
+ 14762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14763 "00000000" // /* MW 1 */
+ 14764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14765 "00000000" // /* MW 1 */
+ 14766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14767 "00000000" // /* MW 1 */
+ 14768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14769 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 14770 "00011000" // ADD.NC p6, r17, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14771 "10000110" // /* MW 3 */
+ 14772 "01101000" // /* MW 2 */
+ 14773 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14774 "10011000" // LDA r27, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14775 "01110110" // /* MW 3 */
+ 14776 "11111111" // /* MW 2 */
+ 14777 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 14778 "10011000" // LDA r17, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14779 "00110110" // /* MW 3 */
+ 14780 "11111110" // /* MW 2 */
+ 14781 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 14782 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14783 "01010110" // /* MW 3 */
+ 14784 "11111110" // /* MW 2 */
+ 14785 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 14786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14787 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 14788 "10011000" // LDA r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14789 "00110110" // /* MW 3 */
+ 14790 "01000110" // /* MW 2 */
+ 14791 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14793 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14795 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14797 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14799 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14800 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14801 "00010010" // /* MW 3 */
+ 14802 "10100011" // /* MW 2 */
+ 14803 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.src_ref 1 "io_buffer_main.h" 395 8
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14804 "01011100" // ST r17, [p6]; MOVX r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14805 "11111010" // /* MW 5 */
+ 14806 "11000001" // /* MW 4 */
+ 14807 "00111111" // /* MW 3 */
+ 14808 "11000110" // /* MW 2 */
+ 14809 "11000000" // /* MW 1 */
+ 14810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14811 "00000000" // /* MW 1 */
+ 14812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14813 "00000000" // /* MW 1 */
+ 14814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14815 "00000000" // /* MW 1 */
+ 14816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14817 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14818 "01111110" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 14819 "01100000" // /* MW 13 */
+ 14820 "00101011" // /* MW 12 */
+ 14821 "00000000" // /* MW 11 */
+ 14822 "10101111" // /* MW 10 */
+ 14823 "00110100" // /* MW 9 */
+ 14824 "00000000" // /* MW 8 */
+ 14825 "00001000" // /* MW 7 */
+ 14826 "01010011" // /* MW 6 */
+ 14827 "00100100" // /* MW 5 */
+ 14828 "00000000" // /* MW 4 */
+ 14829 "11110000" // /* MW 3 */
+ 14830 "00101100" // /* MW 2 */
+ 14831 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592
+ 14832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14833 "00000000" // /* MW 1 */
+ 14834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14835 "00000000" // /* MW 1 */
+ 14836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14837 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.src_ref 1 "io_buffer_main.h" 125 25
+ 14838 "00011000" // LDA p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14839 "00011001" // /* MW 3 */
+ 14840 "11110101" // /* MW 2 */
+ 14841 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14842 "00011000" // LDA p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14843 "00011001" // /* MW 3 */
+ 14844 "11101000" // /* MW 2 */
+ 14845 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2 first
+.no_stack_arguments
+ 14846 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */
+ 14847 "00000001" // /* MW 5 */
+ 14848 "00000000" // /* MW 4 */
+ 14849 "10111000" // /* MW 3 */
+ 14850 "00001000" // /* MW 2 */
+ 14851 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.delay_slot
+ 14852 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14853 "00000000" // /* MW 5 */
+ 14854 "11001100" // /* MW 4 */
+ 14855 "11000110" // /* MW 3 */
+ 14856 "00000111" // /* MW 2 */
+ 14857 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14859 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14861 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14863 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 14864 "11100001" // NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14865 "00000000" // /* MW 15 */
+ 14866 "00000000" // /* MW 14 */
+ 14867 "01111000" // /* MW 13 */
+ 14868 "10100101" // /* MW 12 */
+ 14869 "00000001" // /* MW 11 */
+ 14870 "00000000" // /* MW 10 */
+ 14871 "00000000" // /* MW 9 */
+ 14872 "00000000" // /* MW 8 */
+ 14873 "10001011" // /* MW 7 */
+ 14874 "10001000" // /* MW 6 */
+ 14875 "00100110" // /* MW 5 */
+ 14876 "00000000" // /* MW 4 */
+ 14877 "11110000" // /* MW 3 */
+ 14878 "00101100" // /* MW 2 */
+ 14879 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6
+.src_ref 1 "io_buffer_main.h" 218 49
+.return_address
+ 14880 "10111010" // LDA r16, [sp, #-16]; MOVXM p1, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14881 "00010000" // /* MW 9 */
+ 14882 "00100100" // /* MW 8 */
+ 14883 "10110010" // /* MW 7 */
+ 14884 "11110000" // /* MW 6 */
+ 14885 "00000001" // /* MW 5 */
+ 14886 "00000000" // /* MW 4 */
+ 14887 "00100000" // /* MW 3 */
+ 14888 "01000010" // /* MW 2 */
+ 14889 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6 first
+.src_ref 7 "superkernels.cpp" 623 20
+ 14890 "10111010" // LDA r17, [p1]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14891 "00010000" // /* MW 9 */
+ 14892 "00100010" // /* MW 8 */
+ 14893 "10110010" // /* MW 7 */
+ 14894 "11110000" // /* MW 6 */
+ 14895 "00000001" // /* MW 5 */
+ 14896 "00000000" // /* MW 4 */
+ 14897 "11010000" // /* MW 3 */
+ 14898 "11000110" // /* MW 2 */
+ 14899 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 20
+ 14900 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14901 "01010110" // /* MW 3 */
+ 14902 "00000110" // /* MW 2 */
+ 14903 "00000001" // /* MW 1 */
+ 14904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14905 "00000000" // /* MW 1 */
+ 14906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14907 "00000000" // /* MW 1 */
+ 14908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14909 "00000000" // /* MW 1 */
+ 14910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14911 "00000000" // /* MW 1 */
+ 14912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14913 "00000000" // /* MW 1 */
+ 14914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14915 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 17
+ 14916 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14917 "00101000" // /* MW 3 */
+ 14918 "01100011" // /* MW 2 */
+ 14919 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6
+ 14920 "10000100" // JNZ r17, #15264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15264 delay_slots=5 */
+ 14921 "00000001" // /* MW 5 */
+ 14922 "01000000" // /* MW 4 */
+ 14923 "11010000" // /* MW 3 */
+ 14924 "00011101" // /* MW 2 */
+ 14925 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14933 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14935 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 629 34
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.src_ref 1 "io_buffer_main.h" 395 8
+ 14936 "10111010" // MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14937 "00001000" // /* MW 9 */
+ 14938 "00000011" // /* MW 8 */
+ 14939 "10110100" // /* MW 7 */
+ 14940 "11101000" // /* MW 6 */
+ 14941 "00010111" // /* MW 5 */
+ 14942 "00111111" // /* MW 4 */
+ 14943 "10000000" // /* MW 3 */
+ 14944 "00000010" // /* MW 2 */
+ 14945 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 52
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14946 "10111010" // LDA r27, [p1], #-4; MOVXM p0, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14947 "00010000" // /* MW 9 */
+ 14948 "00101110" // /* MW 8 */
+ 14949 "00110010" // /* MW 7 */
+ 14950 "11110000" // /* MW 6 */
+ 14951 "00000001" // /* MW 5 */
+ 14952 "00000000" // /* MW 4 */
+ 14953 "11010000" // /* MW 3 */
+ 14954 "11101110" // /* MW 2 */
+ 14955 "00111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 14956 "10011000" // LDA r18, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14957 "01010110" // /* MW 3 */
+ 14958 "11111110" // /* MW 2 */
+ 14959 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 14960 "10011000" // LDA r19, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14961 "01110110" // /* MW 3 */
+ 14962 "11111110" // /* MW 2 */
+ 14963 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 47 first
+ 14964 "10011000" // LDA r20, [p1, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14965 "10010110" // /* MW 3 */
+ 14966 "01010110" // /* MW 2 */
+ 14967 "00000001" // /* MW 1 */
+ 14968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14969 "00000000" // /* MW 1 */
+ 14970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14971 "00000000" // /* MW 1 */
+ 14972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14973 "00000000" // /* MW 1 */
+ 14974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14975 "00000000" // /* MW 1 */
+ 14976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14977 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 14978 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14979 "00100010" // /* MW 3 */
+ 14980 "11100101" // /* MW 2 */
+ 14981 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50
+.src_ref 7 "superkernels.cpp" 630 3
+.src_ref 1 "io_buffer_main.h" 218 20
+ 14982 "01011100" // ST r18, [p1]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14983 "00001010" // /* MW 5 */
+ 14984 "01000000" // /* MW 4 */
+ 14985 "00110000" // /* MW 3 */
+ 14986 "11001010" // /* MW 2 */
+ 14987 "00100000" // /* MW 1 */
+ 14988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14989 "00000000" // /* MW 1 */
+ 14990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14991 "00000000" // /* MW 1 */
+ 14992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14993 "00000000" // /* MW 1 */
+ 14994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14995 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14996 "00011000" // ACQ r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14997 "00011000" // /* MW 3 */
+ 14998 "00010011" // /* MW 2 */
+ 14999 "00010101" // /* MW 1 */
+ 15000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15001 "00000000" // /* MW 1 */
+ 15002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15003 "00000000" // /* MW 1 */
+ 15004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15005 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 52 first
+ 15006 "10011000" // LDA r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15007 "01110110" // /* MW 3 */
+ 15008 "00000110" // /* MW 2 */
+ 15009 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 629 34 first
+ 15010 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15011 "01010110" // /* MW 3 */
+ 15012 "00000010" // /* MW 2 */
+ 15013 "00000111" // /* MW 1 */
+ 15014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15015 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 15016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15017 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 15018 "10011000" // LDA p0, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15019 "00011110" // /* MW 3 */
+ 15020 "01011100" // /* MW 2 */
+ 15021 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15023 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15025 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50 first
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 32
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15026 "10111010" // LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15027 "01111000" // /* MW 9 */
+ 15028 "01100000" // /* MW 8 */
+ 15029 "00110001" // /* MW 7 */
+ 15030 "01101100" // /* MW 6 */
+ 15031 "00111000" // /* MW 5 */
+ 15032 "00100111" // /* MW 4 */
+ 15033 "11010000" // /* MW 3 */
+ 15034 "11000110" // /* MW 2 */
+ 15035 "00101001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15036 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15037 "00000111" // /* MW 3 */
+ 15038 "10100001" // /* MW 2 */
+ 15039 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15040 "10000100" // JNZ r16, #15120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15120 delay_slots=5 */
+ 15041 "00000001" // /* MW 5 */
+ 15042 "01000000" // /* MW 4 */
+ 15043 "10001000" // /* MW 3 */
+ 15044 "00011101" // /* MW 2 */
+ 15045 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 32
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 15046 "00011000" // MOVS p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15047 "10001011" // /* MW 3 */
+ 15048 "10000000" // /* MW 2 */
+ 15049 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15051 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15053 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15055 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50 first
+.delay_slot
+ 15056 "00000010" // ST p1, [sp, #-16]; ADD.NC p1, r19, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 15057 "10100000" // /* MW 7 */
+ 15058 "11100010" // /* MW 6 */
+ 15059 "10110100" // /* MW 5 */
+ 15060 "00000000" // /* MW 4 */
+ 15061 "10110000" // /* MW 3 */
+ 15062 "00010011" // /* MW 2 */
+ 15063 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3 first
+ 15064 "10000100" // JNZ r18, #15152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15152 delay_slots=5 */
+ 15065 "00000001" // /* MW 5 */
+ 15066 "01000000" // /* MW 4 */
+ 15067 "10011000" // /* MW 3 */
+ 15068 "00011101" // /* MW 2 */
+ 15069 "10010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15073 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15075 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15077 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15079 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8 first
+.no_stack_arguments
+ 15080 "00000100" // JL #11248 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11248 delay_slots=5 */
+ 15081 "00000001" // /* MW 5 */
+ 15082 "00000000" // /* MW 4 */
+ 15083 "11111000" // /* MW 3 */
+ 15084 "00010101" // /* MW 2 */
+ 15085 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8
+.delay_slot
+ 15086 "01000100" // MOVXM p3, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15087 "10000000" // /* MW 5 */
+ 15088 "11001010" // /* MW 4 */
+ 15089 "11000110" // /* MW 3 */
+ 15090 "00000111" // /* MW 2 */
+ 15091 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15093 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15095 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15097 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8
+.delay_slot
+ 15098 "11010100" // NOPA; MOV p2, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15099 "10000001" // /* MW 5 */
+ 15100 "11000001" // /* MW 4 */
+ 15101 "11110100" // /* MW 3 */
+ 15102 "00101100" // /* MW 2 */
+ 15103 "00000000" // /* MW 1 */
+.return_address
+ 15104 "10000100" // J #15152 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15152 delay_slots=5 */
+ 15105 "00000000" // /* MW 5 */
+ 15106 "00000000" // /* MW 4 */
+ 15107 "10011000" // /* MW 3 */
+ 15108 "00011101" // /* MW 2 */
+ 15109 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15111 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15113 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15115 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15117 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15119 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880
+.src_ref 7 "superkernels.cpp" 637 8 first
+.no_stack_arguments
+ 15120 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */
+ 15121 "00000001" // /* MW 5 */
+ 15122 "00000000" // /* MW 4 */
+ 15123 "01011000" // /* MW 3 */
+ 15124 "00010110" // /* MW 2 */
+ 15125 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 637 8
+.delay_slot
+ 15126 "01000100" // MOVXM p3, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15127 "00000000" // /* MW 5 */
+ 15128 "11001011" // /* MW 4 */
+ 15129 "11000110" // /* MW 3 */
+ 15130 "00000111" // /* MW 2 */
+ 15131 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 637 8
+.delay_slot
+ 15132 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15133 "11000000" // /* MW 3 */
+ 15134 "01100000" // /* MW 2 */
+ 15135 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15137 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15139 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15140 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 15141 "10000001" // /* MW 11 */
+ 15142 "10101101" // /* MW 10 */
+ 15143 "00000000" // /* MW 9 */
+ 15144 "00000000" // /* MW 8 */
+ 15145 "00000000" // /* MW 7 */
+ 15146 "00000000" // /* MW 6 */
+ 15147 "00100000" // /* MW 5 */
+ 15148 "00000000" // /* MW 4 */
+ 15149 "11110000" // /* MW 3 */
+ 15150 "00101100" // /* MW 2 */
+ 15151 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.return_address
+ 15152 "00011000" // LDA p1, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15153 "10011001" // /* MW 3 */
+ 15154 "11110000" // /* MW 2 */
+ 15155 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+ 15156 "00101100" // LDA p0, [sp, #-12]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15157 "00001010" // /* MW 5 */
+ 15158 "01000100" // /* MW 4 */
+ 15159 "00100000" // /* MW 3 */
+ 15160 "10000011" // /* MW 2 */
+ 15161 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15
+.src_ref 1 "io_buffer_main.h" 324 32 first
+ 15162 "10111010" // LDA r16, [p7, #16]; MOVXM p7, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15163 "00010000" // /* MW 9 */
+ 15164 "00100100" // /* MW 8 */
+ 15165 "10110010" // /* MW 7 */
+ 15166 "11110011" // /* MW 6 */
+ 15167 "00000001" // /* MW 5 */
+ 15168 "00000000" // /* MW 4 */
+ 15169 "11010000" // /* MW 3 */
+ 15170 "11000010" // /* MW 2 */
+ 15171 "11101000" // /* MW 1 */
+ 15172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15173 "00000000" // /* MW 1 */
+ 15174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15175 "00000000" // /* MW 1 */
+ 15176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15177 "00000000" // /* MW 1 */
+ 15178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15179 "00000000" // /* MW 1 */
+ 15180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15181 "00000000" // /* MW 1 */
+ 15182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15183 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 15184 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15185 "00011000" // /* MW 3 */
+ 15186 "00010001" // /* MW 2 */
+ 15187 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 15188 "10011000" // LDA r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15189 "01010110" // /* MW 3 */
+ 15190 "11110110" // /* MW 2 */
+ 15191 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 15192 "10011000" // LDA r16, [p0, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15193 "00010110" // /* MW 3 */
+ 15194 "01010110" // /* MW 2 */
+ 15195 "00000000" // /* MW 1 */
+ 15196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15197 "00000000" // /* MW 1 */
+ 15198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15199 "00000000" // /* MW 1 */
+ 15200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15201 "00000000" // /* MW 1 */
+ 15202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15203 "00000000" // /* MW 1 */
+ 15204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15205 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 15206 "10011000" // SUB r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15207 "00100001" // /* MW 3 */
+ 15208 "01100101" // /* MW 2 */
+ 15209 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 15210 "10011000" // ST r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15211 "01010001" // /* MW 3 */
+ 15212 "11110110" // /* MW 2 */
+ 15213 "00001001" // /* MW 1 */
+ 15214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15215 "00000000" // /* MW 1 */
+ 15216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15217 "00000000" // /* MW 1 */
+ 15218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15219 "00000000" // /* MW 1 */
+ 15220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15221 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 15222 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15223 "00011000" // /* MW 3 */
+ 15224 "00010001" // /* MW 2 */
+ 15225 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 15226 "10011000" // LDA r18, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15227 "01010110" // /* MW 3 */
+ 15228 "11100110" // /* MW 2 */
+ 15229 "00000110" // /* MW 1 */
+ 15230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15231 "00000000" // /* MW 1 */
+ 15232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15233 "00000000" // /* MW 1 */
+ 15234 "10000100" // J #15280 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15280 delay_slots=5 */
+ 15235 "00000000" // /* MW 5 */
+ 15236 "00000000" // /* MW 4 */
+ 15237 "11011000" // /* MW 3 */
+ 15238 "00011101" // /* MW 2 */
+ 15239 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15241 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15243 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15
+.src_ref 7 "superkernels.cpp" 649 14
+.delay_slot
+ 15244 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15245 "00000001" // /* MW 3 */
+ 15246 "00100000" // /* MW 2 */
+ 15247 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15 first
+.src_ref 1 "io_buffer_main.h" 327 32
+.delay_slot
+ 15248 "01011100" // ST r16, [p7]; SUB r17, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15249 "01000011" // /* MW 5 */
+ 15250 "11000110" // /* MW 4 */
+ 15251 "00111000" // /* MW 3 */
+ 15252 "11000010" // /* MW 2 */
+ 15253 "11100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28 first
+.delay_slot
+ 15254 "01111010" // NOPA; ST r17, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15255 "00000000" // /* MW 9 */
+ 15256 "00000000" // /* MW 8 */
+ 15257 "00000000" // /* MW 7 */
+ 15258 "10000000" // /* MW 6 */
+ 15259 "00110001" // /* MW 5 */
+ 15260 "11100110" // /* MW 4 */
+ 15261 "11110110" // /* MW 3 */
+ 15262 "00101100" // /* MW 2 */
+ 15263 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024
+.src_ref 7 "superkernels.cpp" 649 14
+ 15264 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 15265 "00000000" // /* MW 15 */
+ 15266 "00000000" // /* MW 14 */
+ 15267 "01111000" // /* MW 13 */
+ 15268 "10100101" // /* MW 12 */
+ 15269 "00000001" // /* MW 11 */
+ 15270 "00001000" // /* MW 10 */
+ 15271 "00000000" // /* MW 9 */
+ 15272 "00000001" // /* MW 8 */
+ 15273 "01011011" // /* MW 7 */
+ 15274 "00000001" // /* MW 6 */
+ 15275 "00100000" // /* MW 5 */
+ 15276 "00000000" // /* MW 4 */
+ 15277 "11110000" // /* MW 3 */
+ 15278 "00101100" // /* MW 2 */
+ 15279 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040
+.src_ref 7 "superkernels.cpp" 648 19
+.src_ref 7 "superkernels.cpp" 651
+ 15280 "10111010" // LDA lr, [sp, #-20]; MOVXM p7, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15281 "00010000" // /* MW 9 */
+ 15282 "00110000" // /* MW 8 */
+ 15283 "10110010" // /* MW 7 */
+ 15284 "11110011" // /* MW 6 */
+ 15285 "00000001" // /* MW 5 */
+ 15286 "00000000" // /* MW 4 */
+ 15287 "00100000" // /* MW 3 */
+ 15288 "10000111" // /* MW 2 */
+ 15289 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+.src_ref 7 "superkernels.cpp" 648 19 first
+.src_ref 7 "superkernels.cpp" 649 14
+ 15290 "10111010" // LDA r18, [p7]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15291 "00010000" // /* MW 9 */
+ 15292 "00100000" // /* MW 8 */
+ 15293 "00110010" // /* MW 7 */
+ 15294 "11110011" // /* MW 6 */
+ 15295 "00000001" // /* MW 5 */
+ 15296 "00000000" // /* MW 4 */
+ 15297 "11010000" // /* MW 3 */
+ 15298 "11001010" // /* MW 2 */
+ 15299 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+ 15300 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15301 "00110110" // /* MW 3 */
+ 15302 "00000110" // /* MW 2 */
+ 15303 "00000110" // /* MW 1 */
+ 15304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15305 "00000000" // /* MW 1 */
+ 15306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15307 "00000000" // /* MW 1 */
+ 15308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15309 "00000000" // /* MW 1 */
+ 15310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15311 "00000000" // /* MW 1 */
+ 15312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15313 "00000000" // /* MW 1 */
+ 15314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15315 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 16
+ 15316 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15317 "00101000" // /* MW 3 */
+ 15318 "01100011" // /* MW 2 */
+ 15319 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+ 15320 "10000100" // JNZ r17, #15344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15344 delay_slots=5 */
+ 15321 "00000001" // /* MW 5 */
+ 15322 "01000000" // /* MW 4 */
+ 15323 "11111000" // /* MW 3 */
+ 15324 "00011101" // /* MW 2 */
+ 15325 "10001000" // /* MW 1 */
+.delay_slot
+ 15326 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15327 "10011001" // /* MW 3 */
+ 15328 "11111011" // /* MW 2 */
+ 15329 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15331 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15333 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15335 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15337 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 649 14 first
+ 15338 "00001100" // NOPA; ST r16, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15339 "00100011" // /* MW 5 */
+ 15340 "00001100" // /* MW 4 */
+ 15341 "11111100" // /* MW 3 */
+ 15342 "00101100" // /* MW 2 */
+ 15343 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104
+ 15344 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15345 "00011001" // /* MW 3 */
+ 15346 "11111111" // /* MW 2 */
+ 15347 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 651 first
+ 15348 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 15349 "00000000" // /* MW 3 */
+ 15350 "00101000" // /* MW 2 */
+ 15351 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 651
+.delay_slot
+ 15352 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15353 "00000001" // /* MW 5 */
+ 15354 "00000000" // /* MW 4 */
+ 15355 "00000000" // /* MW 3 */
+ 15356 "11111000" // /* MW 2 */
+ 15357 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15359 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15361 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15363 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0
+ 15365 "00000000" // /* MW 1 */
+.label __Z13_b896_wrapperPPv___func_begin0
+.label _Z13_b896_wrapperPPv
+.function _b896_wrapper _Z13_b896_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 21 first
+.src_ref 0 "0_0_reloadable5.cc" 23 79
+.function_start
+ 15376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15377 "11000000" // /* MW 3 */
+ 15378 "01100000" // /* MW 2 */
+ 15379 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 23 79 first
+ 15380 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15381 "00011110" // /* MW 3 */
+ 15382 "00011100" // /* MW 2 */
+ 15383 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 24 79 first
+ 15384 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15385 "10011110" // /* MW 3 */
+ 15386 "00101100" // /* MW 2 */
+ 15387 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 26 81 first
+ 15388 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15389 "10011110" // /* MW 3 */
+ 15390 "11110101" // /* MW 2 */
+ 15391 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 25 47 first
+ 15392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15393 "00011110" // /* MW 3 */
+ 15394 "00000101" // /* MW 2 */
+ 15395 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 22 4 first
+.tail_call
+ 15396 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */
+ 15397 "00000000" // /* MW 5 */
+ 15398 "00000000" // /* MW 4 */
+ 15399 "01110000" // /* MW 3 */
+ 15400 "00001101" // /* MW 2 */
+ 15401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b896_wrapperPPv__end
+.label __Z13_b896_wrapperPPv___func_end0
+ 15411 "00000000" // /* MW 1 */
+.label __Z13_b901_wrapperPPv___func_begin0
+.label _Z13_b901_wrapperPPv
+.function _b901_wrapper _Z13_b901_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 30 first
+.src_ref 0 "0_0_reloadable5.cc" 32 79
+.function_start
+ 15424 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15425 "11000000" // /* MW 3 */
+ 15426 "01100000" // /* MW 2 */
+ 15427 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 32 79 first
+ 15428 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15429 "00011110" // /* MW 3 */
+ 15430 "00101100" // /* MW 2 */
+ 15431 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 34 81 first
+ 15432 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15433 "00011110" // /* MW 3 */
+ 15434 "11110101" // /* MW 2 */
+ 15435 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 33 47 first
+ 15436 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15437 "10011110" // /* MW 3 */
+ 15438 "00000100" // /* MW 2 */
+ 15439 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 31 4 first
+.tail_call
+ 15440 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */
+ 15441 "00000000" // /* MW 5 */
+ 15442 "00000000" // /* MW 4 */
+ 15443 "00011000" // /* MW 3 */
+ 15444 "00010000" // /* MW 2 */
+ 15445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15451 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b901_wrapperPPv__end
+.label __Z13_b901_wrapperPPv___func_end0
+ 15455 "00000000" // /* MW 1 */
+.label __Z13_b906_wrapperPPv___func_begin0
+.label _Z13_b906_wrapperPPv
+.function _b906_wrapper _Z13_b906_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 38 first
+.src_ref 0 "0_0_reloadable5.cc" 40 79
+.function_start
+ 15456 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15457 "11000000" // /* MW 3 */
+ 15458 "01100000" // /* MW 2 */
+ 15459 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 40 79 first
+ 15460 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15461 "00011110" // /* MW 3 */
+ 15462 "00101100" // /* MW 2 */
+ 15463 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 42 81 first
+ 15464 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15465 "00011110" // /* MW 3 */
+ 15466 "11110101" // /* MW 2 */
+ 15467 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 41 47 first
+ 15468 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15469 "10011110" // /* MW 3 */
+ 15470 "00000100" // /* MW 2 */
+ 15471 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 39 4 first
+.tail_call
+ 15472 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */
+ 15473 "00000000" // /* MW 5 */
+ 15474 "00000000" // /* MW 4 */
+ 15475 "11001000" // /* MW 3 */
+ 15476 "00010001" // /* MW 2 */
+ 15477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15483 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15485 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b906_wrapperPPv__end
+.label __Z13_b906_wrapperPPv___func_end0
+ 15487 "00000000" // /* MW 1 */
+.label __Z13_b881_wrapperPPv___func_begin0
+.label _Z13_b881_wrapperPPv
+.function _b881_wrapper _Z13_b881_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 46 first
+.src_ref 0 "0_0_reloadable5.cc" 48 79
+.function_start
+ 15488 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15489 "11000000" // /* MW 3 */
+ 15490 "01100000" // /* MW 2 */
+ 15491 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 48 79 first
+ 15492 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15493 "00011110" // /* MW 3 */
+ 15494 "00101100" // /* MW 2 */
+ 15495 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 50 81 first
+ 15496 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15497 "00011110" // /* MW 3 */
+ 15498 "11110101" // /* MW 2 */
+ 15499 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 49 47 first
+ 15500 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15501 "10011110" // /* MW 3 */
+ 15502 "00000100" // /* MW 2 */
+ 15503 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 47 4 first
+.tail_call
+ 15504 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */
+ 15505 "00000000" // /* MW 5 */
+ 15506 "00000000" // /* MW 4 */
+ 15507 "10001000" // /* MW 3 */
+ 15508 "00010100" // /* MW 2 */
+ 15509 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15511 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15513 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15515 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15517 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b881_wrapperPPv__end
+.label __Z13_b881_wrapperPPv___func_end0
+ 15519 "00000000" // /* MW 1 */
+.label __Z13_b891_wrapperPPv___func_begin0
+.label _Z13_b891_wrapperPPv
+.function _b891_wrapper _Z13_b891_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 54 first
+.src_ref 0 "0_0_reloadable5.cc" 56 79
+.function_start
+ 15520 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15521 "11000000" // /* MW 3 */
+ 15522 "01100000" // /* MW 2 */
+ 15523 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 56 79 first
+ 15524 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15525 "00011110" // /* MW 3 */
+ 15526 "00111100" // /* MW 2 */
+ 15527 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 57 47 first
+ 15528 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15529 "10011110" // /* MW 3 */
+ 15530 "11101100" // /* MW 2 */
+ 15531 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 59 81 first
+ 15532 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15533 "10011110" // /* MW 3 */
+ 15534 "00010101" // /* MW 2 */
+ 15535 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 58 80 first
+ 15536 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15537 "00011110" // /* MW 3 */
+ 15538 "00000101" // /* MW 2 */
+ 15539 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 55 4 first
+.tail_call
+ 15540 "10000100" // J #11744 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */
+ 15541 "00000000" // /* MW 5 */
+ 15542 "00000000" // /* MW 4 */
+ 15543 "11110000" // /* MW 3 */
+ 15544 "00010110" // /* MW 2 */
+ 15545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15549 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15551 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15553 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b891_wrapperPPv__end
+.label __Z13_b891_wrapperPPv___func_end0
+ 15555 "00000000" // /* MW 1 */
+.label __Z13_b924_wrapperPPv___func_begin0
+.label _Z13_b924_wrapperPPv
+.function _b924_wrapper _Z13_b924_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 63 first
+.src_ref 0 "0_0_reloadable5.cc" 65 79
+.function_start
+ 15568 "11111000" // MOV p3, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15569 "11000000" // /* MW 3 */
+ 15570 "01100000" // /* MW 2 */
+ 15571 "00011011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 65 79 first
+ 15572 "10011000" // LDA p0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15573 "00011110" // /* MW 3 */
+ 15574 "00011100" // /* MW 2 */
+ 15575 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 66 79 first
+ 15576 "10011000" // LDA p1, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15577 "10011110" // /* MW 3 */
+ 15578 "00011100" // /* MW 2 */
+ 15579 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 67 80 first
+ 15580 "10011000" // LDA p2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15581 "00011110" // /* MW 3 */
+ 15582 "00101101" // /* MW 2 */
+ 15583 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 69 81 first
+ 15584 "10011000" // LDA p4, [p3, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15585 "00011110" // /* MW 3 */
+ 15586 "11110110" // /* MW 2 */
+ 15587 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 68 47 first
+ 15588 "10011000" // LDA p3, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15589 "10011110" // /* MW 3 */
+ 15590 "00000101" // /* MW 2 */
+ 15591 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 64 4 first
+.tail_call
+ 15592 "10000100" // J #14240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=14240 delay_slots=5 */
+ 15593 "00000000" // /* MW 5 */
+ 15594 "00000000" // /* MW 4 */
+ 15595 "11010000" // /* MW 3 */
+ 15596 "00011011" // /* MW 2 */
+ 15597 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15599 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15601 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15603 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15605 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b924_wrapperPPv__end
+.label __Z13_b924_wrapperPPv___func_end0
+ 15607 "00000000" // /* MW 1 */
+.label __Z13_b919_wrapperPPv___func_begin0
+.label _Z13_b919_wrapperPPv
+.function _b919_wrapper _Z13_b919_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 73 first
+.src_ref 0 "0_0_reloadable5.cc" 75 79
+.function_start
+ 15616 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15617 "11000000" // /* MW 3 */
+ 15618 "01100000" // /* MW 2 */
+ 15619 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 75 79 first
+ 15620 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15621 "00011110" // /* MW 3 */
+ 15622 "00011100" // /* MW 2 */
+ 15623 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 76 79 first
+ 15624 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15625 "10011110" // /* MW 3 */
+ 15626 "00101100" // /* MW 2 */
+ 15627 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 78 81 first
+ 15628 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15629 "10011110" // /* MW 3 */
+ 15630 "11110101" // /* MW 2 */
+ 15631 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 77 47 first
+ 15632 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15633 "00011110" // /* MW 3 */
+ 15634 "00000101" // /* MW 2 */
+ 15635 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 74 4 first
+.tail_call
+ 15636 "10000100" // J #13760 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13760 delay_slots=5 */
+ 15637 "00000000" // /* MW 5 */
+ 15638 "00000000" // /* MW 4 */
+ 15639 "11100000" // /* MW 3 */
+ 15640 "00011010" // /* MW 2 */
+ 15641 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15643 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15647 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15649 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b919_wrapperPPv__end
+.label __Z13_b919_wrapperPPv___func_end0
+ 15651 "00000000" // /* MW 1 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 115 4 first
+.function_start
+ 15664 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15665 "01000001" // /* MW 5 */
+ 15666 "10100000" // /* MW 4 */
+ 15667 "00101111" // /* MW 3 */
+ 15668 "11000000" // /* MW 2 */
+ 15669 "00000000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15670 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15671 "00011100" // /* MW 3 */
+ 15672 "11000110" // /* MW 2 */
+ 15673 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15674 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15675 "00011100" // /* MW 3 */
+ 15676 "11000110" // /* MW 2 */
+ 15677 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15678 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15679 "00011100" // /* MW 3 */
+ 15680 "11000110" // /* MW 2 */
+ 15681 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15682 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15683 "00011100" // /* MW 3 */
+ 15684 "11000110" // /* MW 2 */
+ 15685 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15686 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15687 "00011100" // /* MW 3 */
+ 15688 "11000110" // /* MW 2 */
+ 15689 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15690 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15691 "00011100" // /* MW 3 */
+ 15692 "11000110" // /* MW 2 */
+ 15693 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15694 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15695 "00011100" // /* MW 3 */
+ 15696 "11000110" // /* MW 2 */
+ 15697 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15698 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15699 "00011100" // /* MW 3 */
+ 15700 "11000110" // /* MW 2 */
+ 15701 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15702 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15703 "00011100" // /* MW 3 */
+ 15704 "11000110" // /* MW 2 */
+ 15705 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15706 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15707 "00011100" // /* MW 3 */
+ 15708 "11000110" // /* MW 2 */
+ 15709 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15710 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15711 "00011100" // /* MW 3 */
+ 15712 "11000110" // /* MW 2 */
+ 15713 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15714 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15715 "00011100" // /* MW 3 */
+ 15716 "11000110" // /* MW 2 */
+ 15717 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15718 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15719 "00011100" // /* MW 3 */
+ 15720 "11000110" // /* MW 2 */
+ 15721 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15722 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15723 "00011100" // /* MW 3 */
+ 15724 "11000110" // /* MW 2 */
+ 15725 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15726 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15727 "00011100" // /* MW 3 */
+ 15728 "11000110" // /* MW 2 */
+ 15729 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15730 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15731 "00011100" // /* MW 3 */
+ 15732 "11000110" // /* MW 2 */
+ 15733 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15734 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15735 "00011100" // /* MW 3 */
+ 15736 "11000110" // /* MW 2 */
+ 15737 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15738 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15739 "00011100" // /* MW 3 */
+ 15740 "11000110" // /* MW 2 */
+ 15741 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15742 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15743 "00011100" // /* MW 3 */
+ 15744 "11000110" // /* MW 2 */
+ 15745 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15746 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15747 "00011100" // /* MW 3 */
+ 15748 "11000110" // /* MW 2 */
+ 15749 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15750 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15751 "00011100" // /* MW 3 */
+ 15752 "11000110" // /* MW 2 */
+ 15753 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15754 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15755 "00011100" // /* MW 3 */
+ 15756 "11000110" // /* MW 2 */
+ 15757 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15758 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15759 "00011100" // /* MW 3 */
+ 15760 "11000110" // /* MW 2 */
+ 15761 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15762 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15763 "00011100" // /* MW 3 */
+ 15764 "11000110" // /* MW 2 */
+ 15765 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15766 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15767 "00011100" // /* MW 3 */
+ 15768 "11000110" // /* MW 2 */
+ 15769 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15770 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15771 "00011100" // /* MW 3 */
+ 15772 "11000110" // /* MW 2 */
+ 15773 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15774 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15775 "00011100" // /* MW 3 */
+ 15776 "11000110" // /* MW 2 */
+ 15777 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15778 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15779 "00011100" // /* MW 3 */
+ 15780 "11000110" // /* MW 2 */
+ 15781 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 119 first
+ 15782 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 15783 "00000000" // /* MW 3 */
+ 15784 "00101000" // /* MW 2 */
+ 15785 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19 first
+.delay_slot
+ 15786 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15787 "00011100" // /* MW 3 */
+ 15788 "11000110" // /* MW 2 */
+ 15789 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15790 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15791 "00011100" // /* MW 3 */
+ 15792 "11000110" // /* MW 2 */
+ 15793 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15794 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15795 "00011100" // /* MW 3 */
+ 15796 "11000110" // /* MW 2 */
+ 15797 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15798 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15799 "00011100" // /* MW 3 */
+ 15800 "11000110" // /* MW 2 */
+ 15801 "00010000" // /* MW 1 */
+.delay_slot
+ 15802 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15803 "10100000" // /* MW 3 */
+ 15804 "10011111" // /* MW 2 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+ 15805 "00011000" // /* MW 1 */
+.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src"
+.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer"
+.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv"
+.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common"
+.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2"
+.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p"
+.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib"
+.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend"
+.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc"
+.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail"
+.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib"
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.cmico
new file mode 100644
index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.cmico
@@ -0,0 +1 @@
++Mdec
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.lst
new file mode 100644
index 0000000000000000000000000000000000000000..4a0bb9c3b02d8c2df3b5faeb6f4b950508fce7fd
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.lst
@@ -0,0 +1,5518 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me
+
+// Release: ipp V-2024.06-TGT-241219
+
+.text_segment PM 2352
+.entry_point
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function_start
+ 2352 0x00 0xc6 0xd1 0x21 0x41 0xd4 LDA r17, [p0]; MOV r2, r1
+ 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 2364 0xfe 0xf3 0xb0 0x00 0x2b 0xd0 0x70 0x02 ST p7, [sp, #-12]; MOV r1, r15
+ 2372 0xff 0x87 0xb0 0x01 0xe8 0x90 0x70 0x02 ST lr, [sp, #-4]; MOV r15, r2
+ 2380 0xff 0x06 0xb7 0xc1 0xe0 0x5c ST r1, [sp, #-8]; NEZ r16, r15
+ 2386 0x1e 0x98 0x20 0xf8 MOV r26, r16
+ 2390 0x00 0x00 NOPX
+ 2392 0x1f 0x68 0x82 0x18 ADD.NC p7, r17, #4
+ 2396 0x07 0x1e 0x36 0x98 LDA r17, [p7], #4
+ 2400 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12
+ 2404 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8
+ 2408 0x07 0x07 0x76 0x98 LDA r27, [p7]
+ 2412 0x00 0x00 NOPX
+ 2414 0x00 0x00 NOPX
+ 2416 0x00 0x00 NOPX
+ 2418 0x00 0x00 NOPX
+ 2420 0x00 0x00 NOPX
+ 2422 0x00 0x00 NOPX
+ 2424 0x14 0x63 0x32 0x18 SEL.EQZ r17, r17, r19, r27
+ 2428 0x0f 0xd6 0x31 0x98 ST r17, [p7, #-12]
+ 2432 0x17 0xe2 0xfd 0x18 MOVX r17, #-1
+ 2436 0x00 0x00 NOPX
+ 2438 0x00 0x00 NOPX
+ 2440 0x00 0x00 NOPX
+ 2442 0x14 0x97 0x18 0x18 ACQ.COND r18, r17, r26
+ 2446 0x10 0x24 0x09 0x18 MOVX r18, #2
+ 2450 0x14 0x29 0x2d 0x98 LSHL r20, r16, r18
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 2454 0x18 0x8a 0x20 0xf8 MOV dj0, r20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 2458 0x00 0x4e 0xdf 0xd8 0x8b 0x0c LDA r19, [p0, dj0]; ST dj0, [sp, #-20]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2464 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2466 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2468 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2470 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2472 0x10 0x26 0x05 0x18 MOVX r19, #1
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2476 0x14 0xf4 0xfc 0x98 LTU r26, r19, r15
+ 2480 0xfe 0x6a 0xb0 0x03 0xb4 0xc1 0x00 0x02 ST r26, [sp, #-16]; ADD.NC p7, r19, #4
+ 2488 0x07 0x1e 0x76 0x98 LDA r19, [p7], #4
+ 2492 0x07 0x3e 0xb6 0x98 LDA r21, [p7], #12
+ 2496 0x07 0xee 0x96 0x98 LDA r20, [p7], #-8
+ 2500 0x07 0x07 0x76 0x98 LDA r27, [p7]
+ 2504 0x00 0x00 NOPX
+ 2506 0x00 0x00 NOPX
+ 2508 0x00 0x00 NOPX
+ 2510 0x00 0x00 NOPX
+ 2512 0x00 0x00 NOPX
+ 2514 0x00 0x00 NOPX
+ 2516 0x14 0xe7 0x52 0x18 SEL.EQZ r19, r19, r21, r27
+ 2520 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12]
+ 2524 0x00 0x00 NOPX
+ 2526 0x00 0x00 NOPX
+ 2528 0x00 0x00 NOPX
+ 2530 0x00 0x00 NOPX
+ 2532 0x15 0x17 0x18 0x18 ACQ.COND r20, r17, r26
+ 2536 0x10 0x23 0x2d 0x98 LSHL r17, r0, r18
+ 2540 0x18 0x88 0xa0 0xf8 MOV dj0, r17
+ 2544 0x00 0x07 0xce 0xc9 0x00 0x44 MOVXM p7, #509056
+ 2550 0xe0 0x13 0xdf 0xd4 0x2b 0x0c LDA p1, [p7, dj0]; ST r16, [sp, #-24]
+ 2556 0x00 0x00 NOPX
+ 2558 0x00 0x00 NOPX
+ 2560 0x00 0x00 NOPX
+ 2562 0x00 0x00 NOPX
+ 2564 0x00 0x00 NOPX
+ 2566 0x00 0x00 NOPX
+.no_stack_arguments
+ 2568 0x10 0x30 0x40 0x18 JL p1
+.delay_slot
+ 2572 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 2576 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2578 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2580 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2582 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM
+.return_address
+ 2592 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1
+ 2598 0x07 0xeb 0x51 0x18 LDA r26, [sp, #-24]
+ 2602 0x07 0xec 0x41 0x18 LDA dj0, [sp, #-20]
+ 2606 0x07 0xf0 0x29 0x18 LDA el0, [sp, #-16]
+ 2610 0x00 0x00 NOPX
+ 2612 0x00 0x00 NOPX
+ 2614 0x00 0x00 NOPX
+ 2616 0x19 0x68 0x88 0x18 ADD.NC p1, r17, #16
+ 2620 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 2624 0x00 0x00 NOPX
+ 2626 0x00 0x00 NOPX
+ 2628 0x00 0x00 NOPX
+ 2630 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 2632 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 2634 0x1e 0xa0 0x1c 0xf8 MOV r26, el0
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2638 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26
+ 2642 0x3e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p1, #-4]; MOV r27, r15
+ 2648 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0]
+ 2652 0x00 0x00 NOPX
+ 2654 0x00 0x00 NOPX
+ 2656 0x00 0x00 NOPX
+ 2658 0x00 0x00 NOPX
+ 2660 0x00 0x00 NOPX
+ 2662 0x14 0x27 0x11 0x98 SUB r19, r16, r17
+ 2666 0x8c 0x66 0x40 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16
+ 2672 0x00 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p1, #-4]
+ 2678 0x00 0x00 NOPX
+ 2680 0x00 0x00 NOPX
+ 2682 0x00 0x00 NOPX
+ 2684 0x00 0x00 NOPX
+ 2686 0x00 0x00 NOPX
+ 2688 0x00 0x00 NOPX
+ 2690 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26
+ 2694 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+ 2698 0x00 0xf6 0x36 0x98 LDA r17, [p0, #-4]
+ 2702 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12]
+ 2706 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8]
+ 2710 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+ 2716 0x00 0x00 NOPX
+ 2718 0x00 0x00 NOPX
+ 2720 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 2724 0x1e 0xe0 0x1c 0xf8 MOV r27, el0
+.delay_slot
+ 2728 0x14 0x21 0x11 0x98 SUB r16, r16, r17
+.delay_slot
+ 2732 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+.delay_slot
+ 2736 0x08 0xf6 0x11 0x98 ST r16, [p0, #-4]
+.delay_slot
+.swstall delay_slot
+ 2740 0x00 0x00 NOPX
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+
+.text_segment PM 2752
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.function_start
+ 2752 0x03 0x85 0xd0 0x00 0x40 0x88 0x49 0x60 0x78 0xba LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1
+ 2762 0x03 0x81 0xd0 0x3e 0x57 0xe9 0x30 0x82 0x48 0xba LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9
+ 2772 0xff 0x81 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r1, #-4; PADDXM [sp], #64
+ 2782 0x01 0x86 0x07 0xfd 0xb5 0x81 0x00 0x28 0x00 0x10 0x58 0x76 MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16
+ 2794 0x00 0x63 0x07 0xf9 0xd5 0xbf 0x57 0xaa 0x88 0x0f 0x58 0x76 MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15
+ 2806 0xfe 0xbe 0xb0 0x60 0x02 0x5c ST r15, [sp, #-12]; MOVX r24, #0
+ 2812 0x00 0x00 NOPX
+ 2814 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 2818 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 2822 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4
+ 2826 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4
+ 2830 0x00 0x00 NOPX
+ 2832 0x00 0x00 NOPX
+ 2834 0x00 0x00 NOPX
+ 2836 0x00 0x00 NOPX
+ 2838 0x00 0x00 NOPX
+ 2840 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 2844 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 2848 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4
+ 2852 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4
+ 2856 0x00 0x00 NOPX
+ 2858 0x00 0x00 NOPX
+ 2860 0x00 0x00 NOPX
+ 2862 0x00 0x00 NOPX
+ 2864 0x00 0x00 NOPX
+ 2866 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 2870 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 2874 0x00 0x04 0x0e 0x98 LDA eh0, [p0]
+ 2878 0x00 0x14 0x2e 0x98 LDA el0, [p0, #4]
+ 2882 0x00 0x00 NOPX
+ 2884 0x00 0x00 NOPX
+ 2886 0x00 0x00 NOPX
+ 2888 0x00 0x00 NOPX
+ 2890 0x00 0x00 NOPX
+ 2892 0x09 0x04 0x09 0x98 ST eh0, [p1]
+ 2896 0x09 0x14 0x29 0x98 ST el0, [p1, #4]
+ 2900 0x02 0xdd 0xaa 0x98 LDA.u8 r13, [p2], #-3
+ 2904 0x02 0x1e 0x2a 0x98 LDA.u8 r17, [p2], #1
+ 2908 0x02 0xbd 0xca 0x98 LDA.u8 r14, [p2], #-5
+ 2912 0x02 0xfd 0xfa 0x98 LDA.u16 r15, [p2], #-2
+ 2916 0x02 0x0a 0x6a 0x98 LDA.u8 r19, [p2], m0
+ 2920 0x02 0xac 0xea 0x98 LDA.u8 r7, [p2], #-6
+ 2924 0x00 0x00 NOPX
+ 2926 0x13 0x42 0x1d 0x98 LSHL r1, r13, r1
+ 2930 0x0c 0x20 0xf9 0x31 0x01 0x24 EQ r16, r1, r16; ADD.NC r18, r17, #1
+ 2936 0x14 0xa4 0x5d 0x98 LSHL r18, r18, r5
+ 2940 0x13 0xf6 0x47 0x98 EQ r27, r15, r4
+ 2944 0xc1 0x4a 0x40 0xb7 0x39 0xe4 SEL.EQZ r5, r24, r5, r27; MOV eh0, r27
+ 2950 0x14 0x7b 0x22 0x18 SEL.EQZ r29, r17, r18, r27
+ 2954 0x11 0xcc 0x67 0x98 EQ r6, r7, r6
+ 2958 0x11 0xb7 0x04 0x98 AND r27, r6, r16
+ 2962 0x7b 0xeb 0xbc 0xbb 0x41 0xe4 LSHL r15, r15, r21; MOV r25, r27
+ 2968 0xfd 0xbe 0xb3 0x9b 0x04 0x5c ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27
+ 2974 0xc8 0x05 0xf8 0x40 0x01 0x84 JNZ r25, #3056
+.delay_slot
+ 2980 0x11 0xb6 0x47 0x98 EQ r27, r6, r4
+.delay_slot
+ 2984 0x13 0x71 0x44 0x98 AND r24, r13, r20
+.delay_slot
+ 2988 0x14 0xfc 0x5d 0x98 LSHL r30, r19, r5
+.delay_slot
+ 2992 0x16 0xe8 0x4d 0x98 LSHL r20, r27, r4
+.delay_slot
+ 2996 0x11 0x8c 0x32 0x18 SEL.EQZ r6, r6, r3, r27
+ 3000 0xd8 0x05 0xf8 0x40 0x01 0x84 JNZ r27, #3056
+.delay_slot
+.swstall delay_slot
+ 3006 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3008 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3010 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3012 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3014 0x00 0x00 NOPX
+ 3016 0x00 0x2f 0x00 0x00 0x01 0x84 0x00 0x00 0x20 0xba MOVA r15, #1; J #3104
+.delay_slot
+ 3026 0x00 0x1a 0x00 0x3e 0x57 0xab 0x88 0x0c 0x58 0xba MOVA r26, #0; MOVX r5, #-3; MOV r28, #12
+.delay_slot
+ 3036 0x05 0x42 0x21 0x20 0x41 0x64 MOVX r21, #4; MOV r2, #16
+.delay_slot
+ 3042 0x10 0x1a 0x0d 0x18 MOVX r13, #3
+.delay_slot
+ 3046 0x10 0x0e 0x3d 0x18 MOVX r7, #15
+.delay_slot
+ 3050 0x00 0x2c 0xff 0x91 0xe2 0x2c NOPA; MOVX r4, #-4
+.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ 3056 0x02 0x02 0x81 0x82 0x0b 0x01 0x50 0x88 0x8f 0xfc 0x58 0x76 MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4
+ 3068 0x20 0x18 0xe0 0x01 0xa0 0x0b 0x88 0x0c 0x58 0xba ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12
+ 3078 0x02 0x02 0x00 0x3e 0x57 0xa9 0xe8 0x01 0x58 0xba MOVA r2, #16; MOVX r5, #-3; MOV r15, #1
+ 3088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x71 0xe9 0xa8 0x03 0x58 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352
+ 3104 0x5d 0xc5 0x50 0x1b 0xb3 0x3c 0x00 0x3c 0x58 0xba LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60
+ 3114 0x41 0x05 0x50 0x03 0x2d 0x12 0x87 0xcd 0x58 0xba LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51
+ 3124 0x00 0x57 0x00 0x3b 0xda 0x91 0x80 0x37 0x58 0xba MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55
+ 3134 0x01 0x03 0x00 0x2b 0xb0 0x3d 0x07 0xbc 0x58 0xba MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68
+ 3144 0x40 0x10 0x00 0x1f 0x6c 0x6c 0x80 0x70 0x58 0xba MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112
+ 3154 0xb5 0x92 0x08 0x1e 0x5d 0x64 EXTEND.u8 r22, r22; MOV m4, #-105
+ 3160 0xfe 0x5a 0xb0 0x2d 0x61 0x6f 0x80 0x31 0x59 0x3a ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49
+ 3170 0xf7 0xba 0x3c 0x1f 0x05 0x64 SUB r30, r30, r29; MOV m6, #-63
+ 3176 0x13 0xc2 0x11 0x98 SUB r1, r15, r1
+ 3180 0x8f 0xc3 0xf0 0xa0 0x1d 0x64 MUL r31, r17, r1; MOV r1, #7
+ 3186 0x16 0xa3 0x21 0x98 SUB r17, r26, r18
+ 3190 0x17 0xfe 0x1d 0x98 LSHL r31, r31, r1
+ 3194 0x55 0x7e 0x30 0x3b 0xf1 0xee 0x80 0x57 0x59 0x3a ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87
+ 3204 0x4d 0x55 0x50 0x2f 0x30 0x3d 0x87 0xb2 0x58 0xba LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3214 0xfd 0x4e 0xb9 0xcc 0x7b 0x5c ST r19, [sp, #-24]; LSHL r19, r19, r3
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3220 0x49 0x54 0xe0 0x3f 0x6b 0x2d 0x00 0xf6 0x58 0xba ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3230 0x45 0x43 0x50 0x27 0x38 0x10 0x87 0x50 0x58 0xba LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3240 0x15 0xfe 0x67 0x98 EQ r31, r23, r6
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3244 0x10 0xe0 0x67 0x98 EQ r16, r3, r6
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3248 0x17 0xf7 0x05 0x98 OR r27, r31, r16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3252 0x11 0xeb 0x54 0x98 AND r21, r7, r21
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3256 0xf7 0xa3 0xd8 0xa0 0x61 0x64 ASHL r30, r30, r17; MOV r17, #24
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3262 0xfc 0x42 0xb0 0x1f 0x29 0x6f 0xcf 0x80 0x49 0x3a ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1
+ 3272 0x43 0xea 0x3f 0x46 0x3b 0x5c ST r26, [p2], #4; LSHL r17, r30, r17
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3278 0x51 0x6a 0x30 0x02 0x00 0xa8 0x50 0x02 ST r26, [p2], m4; MOV m4, #168
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3286 0x5d 0x49 0x57 0xe7 0xf5 0xa7 0xb0 0x2c 0x0d 0xce 0x78 0x76 LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3298 0x16 0xe3 0x15 0x98 OR r17, r27, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3302 0x16 0xb7 0x81 0x98 SUB r27, r26, r24
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3306 0x14 0xb0 0x90 0x18 EXTEND.u8 r24, r18
+ 3310 0x00 0x00 NOPX
+ 3312 0x00 0x00 NOPX
+ 3314 0x00 0x00 NOPX
+ 3316 0x13 0xe5 0x21 0x98 SUB r18, r15, r18
+ 3320 0x0a 0xca 0x51 0x98 ST r18, [p2], m6
+ 3324 0x02 0xaa 0x4a 0x98 LDA.u8 r18, [p2], m5
+ 3328 0x00 0x00 NOPX
+ 3330 0x00 0x00 NOPX
+ 3332 0x00 0x00 NOPX
+ 3334 0x00 0x00 NOPX
+ 3336 0x00 0x00 NOPX
+ 3338 0x00 0x00 NOPX
+ 3340 0x14 0xa4 0xe1 0x98 SUB r18, r18, r14
+ 3344 0x14 0xa5 0xbe 0x98 ASHL r18, r18, r27
+ 3348 0x14 0xa4 0x2d 0x98 LSHL r18, r18, r2
+ 3352 0x00 0x01 0x0d 0xa0 0x00 0x44 MOVXM r27, #65536
+ 3358 0x16 0xe5 0x20 0x98 ADD r18, r27, r18
+ 3362 0x00 0xff 0x0d 0xa0 0x00 0x44 MOVXM r27, #16711680
+ 3368 0xde 0xe4 0x99 0x3f 0xc1 0x64 AND r27, r27, r18; MOV r18, #-16
+ 3374 0xde 0xe2 0xb8 0xbf 0xe1 0x64 OR r27, r27, r17; MOV r17, #-8
+ 3380 0x43 0xee 0x39 0xce 0x3b 0x5c ST r27, [p2], #4; LSHL r19, r19, r17
+ 3386 0x16 0xb5 0x31 0x98 SUB r26, r26, r19
+ 3390 0x15 0x29 0xad 0x98 LSHL r20, r20, r26
+ 3394 0x13 0xb5 0x65 0x98 OR r26, r14, r22
+ 3398 0x4d 0x6a 0x3f 0x69 0x20 0x5c ST r26, [p2], m3; EXTEND.u8 r26, r30
+ 3404 0x49 0x65 0x50 0x37 0x49 0x6f 0xce 0xa8 0xa8 0xba LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3414 0xb5 0xa3 0xb8 0xa3 0xf9 0x64 LSHL r22, r22, r17; MOV r17, #254
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3420 0x45 0x64 0xed 0x6b 0x1f 0x2c ST.s8 r25, [p2], m1; MUL r26, r26, r24
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3426 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3428 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3430 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3432 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3434 0x86 0x5f 0xbd 0xb5 0xca 0xa4 LSHL r25, r16, r15; ADD.NC r27, r21, r25
+ 3440 0xf8 0x06 0xf8 0x40 0x01 0x84 JNZ r31, #3568
+.delay_slot
+ 3446 0x9d 0x41 0xed 0xbb 0xf2 0xa4 ADD r21, r19, #3; ADD.NC r27, r27, r30
+.delay_slot
+ 3452 0x16 0xeb 0x5d 0x98 LSHL r21, r27, r21
+.delay_slot
+ 3456 0x16 0x63 0x14 0x98 AND r17, r25, r17
+.delay_slot
+ 3460 0x51 0x46 0x30 0x0d 0xbe 0x3e 0x28 0x01 0x59 0x3a ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1
+.delay_slot
+ 3470 0x18 0x9b 0x9c 0xf8 MOV el1, r27
+ 3474 0x07 0xe3 0x91 0x18 LDA r28, [sp, #-32]
+ 3478 0x00 0x00 NOPX
+ 3480 0x00 0x00 NOPX
+ 3482 0x00 0x00 NOPX
+ 3484 0x00 0x00 NOPX
+ 3486 0x00 0x00 NOPX
+ 3488 0x00 0x00 NOPX
+ 3490 0xe0 0x06 0xf8 0x40 0x01 0x84 JNZ r28, #3568
+.delay_slot
+.swstall delay_slot
+ 3496 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3498 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3500 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3502 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3504 0x00 0x00 NOPX
+ 3506 0x00 0xbc 0x00 0x01 0x10 0x8b 0x28 0x40 0x58 0xba MOVA r28, #5; MOVX r17, #4; MOV r25, #64
+ 3516 0x14 0x7e 0xd2 0x18 SEL.EQZ r31, r17, r13, r27
+ 3520 0x16 0x76 0x67 0x98 EQ r27, r25, r6
+ 3524 0xff 0x38 0x4f 0xa0 0x01 0x64 SEL.EQZ r28, r31, r28, r27; MOV r31, #0
+ 3530 0x10 0x32 0x50 0x18 EXTEND.s8 r25, r0
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3534 0x16 0x7d 0xef 0x98 MUL r30, r25, r30
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3538 0xce 0xe3 0x5d 0xc4 0x39 0xe4 LT r27, r25, r17; MOV r27, el1
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3544 0x13 0xe3 0x82 0x18 SEL.EQZ r17, r15, r24, r27
+ 3548 0x14 0x63 0xef 0x98 MUL r17, r17, r30
+ 3552 0x17 0xf9 0xc1 0x98 SUB r28, r31, r28
+ 3556 0x14 0x63 0xce 0x98 ASHL r17, r17, r28
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3560 0x00 0x23 0x14 0x81 0x00 0x00 0x1c 0x22 EXTEND.u8 r17, r17; NOPV
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3568 0x00 0x19 0x00 0x3f 0xc7 0xeb 0x70 0x0e 0x78 0xba MOVA r25, #0; MOVX r28, #-1; MOV r27, el0
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3578 0x16 0x7f 0xc2 0x18 SEL.EQZ r31, r25, r28, r27
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3582 0xfd 0x6e 0x20 0x21 0x04 0x83 0x4f 0x74 0xa8 0xba LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3592 0x09 0x1e 0x00 0x29 0x44 0x83 0xa8 0x09 0x58 0xba MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3602 0x17 0x73 0xe2 0x18 SEL.EQZ r25, r29, r30, r27
+ 3606 0x15 0xf9 0x88 0x98 NE r28, r23, r24
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 3610 0x17 0x7b 0x3d 0x98 LSHL r29, r29, r19
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 3614 0xfd 0xde 0x20 0x00 0x00 0x03 0x0a 0x04 0x10 0xba LDA r23, [sp, #-20]; MOVXM r24, #1032
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3624 0xcc 0xe7 0xbf 0x3a 0xff 0x24 LSHL r19, r25, r19; ADD.NC r30, r26, #-1
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3630 0x14 0xcf 0xe6 0x18 MAC r7, r7, r19, r30
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3634 0xb4 0xd2 0x0b 0xa8 0x29 0x64 EXTEND.u8 r19, r22; MOV r23, #522
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3640 0xbd 0xb0 0x4d 0x21 0x01 0x64 SEL.EQZ r22, r23, r24, r27; MOV r26, #64
+ 3646 0x31 0xb5 0x1d 0xc2 0x39 0xe4 NE r6, r6, r26; MOV r27, eh0
+ 3652 0x11 0xcf 0x24 0x98 AND r7, r7, r18
+ 3656 0xbd 0xde 0x4d 0xa6 0x41 0xe4 SEL.EQZ r23, r23, r15, r27; MOV r27, r6
+ 3662 0x29 0x08 0x49 0x20 0x7d 0x64 SEL.EQZ r4, r5, r4, r27; MOV r18, #31
+ 3668 0x15 0xef 0x24 0x98 AND r23, r23, r18
+ 3672 0xdc 0x1e 0x00 0x20 0x42 0x6e 0x4f 0x70 0x58 0xba MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144
+ 3682 0x17 0xbd 0x22 0x18 SEL.EQZ r30, r30, r18, r27
+ 3686 0x12 0x05 0x00 0x2f 0xa9 0xfe 0x09 0x20 0x58 0xba MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288
+ 3696 0x14 0x20 0x52 0x18 SEL.EQZ r16, r16, r5, r27
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 3700 0x8e 0x09 0xfd 0xbd 0xf2 0xa4 MUL r24, r17, r4; ADD.NC r27, r29, r30
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 3706 0x84 0x3f 0xbd 0xc4 0x39 0xe4 LSHL r16, r16, r31; MOV r27, el1
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3712 0xfb 0xee 0xb7 0x6b 0x5f 0x5c ST r27, [sp, #-36]; MUL r26, r14, r26
+ 3718 0x10 0x84 0x32 0x18 SEL.EQZ r2, r2, r3, r27
+ 3722 0x13 0x7e 0x0c 0x98 LTU r31, r13, r0
+ 3726 0x15 0x31 0x8f 0x98 MUL r24, r20, r24
+ 3730 0x17 0xf7 0xc5 0x98 OR r27, r31, r28
+ 3734 0x10 0x03 0x07 0xee 0x95 0xb7 0xc0 0xee 0x89 0x00 0x58 0x76 MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256
+ 3746 0x1d 0x28 0x40 0xb7 0x39 0xe4 SEL.EQZ r20, r3, r20, r27; MOV eh0, r27
+ 3752 0x00 0x00 0x0f 0xac 0x0c 0x44 MOVXM r31, #1542
+ 3758 0xfd 0x12 0xb0 0x1f 0xb0 0x3c 0x89 0x3f 0xc9 0x3a ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1
+ 3768 0xed 0x8c 0x82 0x1c 0x91 0xad 0xff 0x92 0xcc 0x7f 0xc8 0x76 MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1
+ 3780 0x4d 0x5a 0x30 0x2b 0x57 0xef 0x70 0x8e 0x79 0x3a ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0
+ 3790 0x02 0xd9 0x02 0x1f 0x51 0xa9 0x4e 0x0e 0x00 0x58 0x58 0x76 MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 3802 0x51 0x62 0x3f 0xe3 0x24 0x5c ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 3808 0xfc 0x6e 0x22 0xef 0x91 0xab 0xce 0x0f 0x69 0x90 0x78 0x76 LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3820 0x22 0xf3 0x00 0x29 0xce 0x12 0x8c 0xff 0xc8 0xba MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3830 0xf7 0x1d 0x00 0x3b 0xea 0x73 0x70 0x8e 0x78 0xba MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3840 0xfc 0xee 0x2e 0xca 0x44 0x2c LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3846 0x04 0x1f 0x00 0x3f 0x39 0x93 0x69 0x90 0x78 0xba MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3856 0x17 0xc4 0x22 0x18 SEL.EQZ r2, r31, r2, r27
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3860 0x10 0xeb 0x51 0x98 SUB r21, r3, r21
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3864 0x47 0x8e 0x30 0x04 0x27 0x90 0x6f 0xc0 0x59 0x3a ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3874 0x17 0x38 0x32 0x18 SEL.EQZ r28, r28, r3, r27
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 3878 0x15 0xfe 0x7f 0x98 MUL r31, r23, r7
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 3882 0xfb 0xc6 0x2e 0x0c 0x64 0x2c LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3888 0x5d 0x8e 0x30 0x35 0x29 0x7c 0x80 0x28 0x59 0x3a ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3898 0x45 0x56 0x31 0x0d 0xe4 0x5c ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3904 0xfe 0x02 0x20 0x06 0x38 0xfe 0xa9 0xfc 0xa8 0xba LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3914 0xfd 0xc6 0x22 0x1f 0x11 0x80 0x05 0x06 0x06 0x7a LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3924 0x4f 0xce 0x30 0x00 0x00 0x3e 0x6f 0xf8 0x11 0x3a ST r19, [p2], #28; MOVXM r19, #65520
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3934 0x43 0xd2 0x30 0x3f 0x49 0xa4 0x4b 0xbf 0xc9 0x3a ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3944 0x43 0xc6 0x31 0x56 0x9c 0x5c ST r17, [p2], #4; MSC r21, r21, r2, r20
+ 3950 0x43 0x8a 0x3f 0x7a 0x81 0x5c ST r2, [p2], #4; ADD r30, r30, r20
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 3956 0x43 0xfa 0x38 0x73 0xe3 0x5c ST r30, [p2], #4; SUB r28, r16, r31
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 3962 0x43 0xd6 0x30 0x2d 0xf8 0x30 0x60 0x00 0x59 0x3a ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3972 0x43 0x8c 0x30 0x3e 0x20 0x7e 0x2c 0x7f 0xc9 0x3a ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1
+ 3982 0x43 0x8c 0x38 0xb8 0x0c 0x5c ST dc0, [p2], #4; MAC r14, r14, r17, r0
+ 3988 0x43 0xda 0x30 0x27 0x01 0x24 0x48 0x00 0x59 0x3a ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0
+ 3998 0x43 0xf2 0x30 0x05 0x1f 0x8f 0x70 0x0e 0x79 0x3a ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0
+ 4008 0x43 0x92 0x3b 0xb9 0xdf 0x5c ST r4, [p2], #4; MUL r14, r23, r14
+ 4014 0x43 0xc6 0x30 0x21 0x0f 0x8c 0x08 0x06 0x59 0x3a ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 4024 0x09 0x00 0x02 0x1e 0x11 0x9b 0x00 0x13 0x69 0x90 0x78 0x76 MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 4036 0xfd 0x16 0x20 0x14 0xa4 0x2c LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4042 0x11 0x63 0xaf 0x98 MUL r17, r5, r26
+ 4046 0x43 0x8c 0x30 0x07 0x08 0x6d 0x07 0xc8 0x59 0x3a ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56
+ 4056 0xfc 0x63 0x02 0x48 0x61 0xa0 0xf7 0xed 0xa8 0xc1 0xc8 0x76 MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7
+ 4068 0x41 0x0a 0x36 0xc0 0x7b 0x5c ST r2, [p2], m0; LSHL r16, r13, r3
+ 4074 0x43 0xda 0x38 0x8e 0x41 0x5c ST r22, [p2], #4; ADD r3, r17, r18
+ 4080 0x43 0xca 0x38 0xc8 0x9c 0x5c ST r18, [p2], #4; MSC r18, r18, r17, r4
+ 4086 0x43 0x92 0x32 0x94 0xdb 0x5c ST r4, [p2], #4; LSHL r5, r5, r6
+ 4092 0x43 0x8e 0x30 0x1a 0x38 0x04 0x0f 0xfd 0x59 0x3a ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3
+ 4102 0x10 0xc0 0x0e 0x98 ASHL r0, r3, r0
+ 4106 0x43 0xca 0x37 0x10 0x1f 0x5c ST r18, [p2], #4; MUL r4, r14, r0
+ 4112 0x43 0x8c 0x30 0x0c 0x3b 0x5c ST dc0, [p2], #4; LSHL r3, r0, r1
+ 4118 0xff 0xb6 0x22 0x1c 0x61 0x80 0x03 0xc6 0x31 0xfa LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3
+ 4128 0xff 0x3a 0x22 0x1c 0x91 0xba 0x70 0x30 0x28 0x3f 0xc8 0x76 LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1
+ 4140 0xfe 0xbe 0x22 0x1c 0x31 0x80 0x01 0x41 0xaf 0xfa LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26
+ 4150 0x43 0x8e 0x30 0x50 0x00 0x5c ST r3, [p2], #4; RET lr
+.delay_slot
+ 4156 0x0a 0x5c 0xf1 0x98 ST r7, [p2], #20
+.delay_slot
+ 4160 0x0a 0x1c 0x11 0x98 ST r0, [p2], #4
+.delay_slot
+ 4164 0x0a 0x1c 0x51 0x98 ST r2, [p2], #4
+.delay_slot
+ 4168 0x0a 0x04 0x51 0x98 ST r2, [p2]
+.delay_slot
+ 4172 0x42 0x8a 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r2, [p2, #4]; PADDXM [sp], #-64
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0
+
+.text_segment PM 4192
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.function_start
+ 4192 0x00 0x03 0x82 0x84 0x8b 0x01 0x80 0x08 0x0a 0x60 0x78 0x76 MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2
+ 4204 0x00 0x06 0x88 0x28 0x28 0x34 0x01 0x36 0x00 0x21 0x20 0x09 0x60 0x7e MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4
+ 4218 0x63 0x94 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA dn1, [p3], #4; MOVXM p4, #509032
+ 4228 0x63 0x90 0xd0 0x00 0x00 0x04 0x78 0x78 0x10 0xba LDA m1, [p3], #4; MOVXM ls, #4336
+ 4238 0x60 0x80 0xd0 0x00 0x00 0x05 0xb8 0x90 0x10 0xba LDA m0, [p3]; MOVXM le, #4384
+ 4248 0x7a 0x82 0xd1 0x00 0x01 0x54 LDA r0, [p3, #-12]; MOV dj0, #0
+ 4254 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4]
+ 4258 0x00 0x00 NOPX
+ 4260 0x00 0x00 NOPX
+ 4262 0x00 0x0a 0x80 0x85 0x01 0xf4 VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1
+ 4268 0x3e 0x30 0x14 0x18 VLDB.POP.512.2D x0, [p0, lf0, r24, d1]
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4272 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4276 0x00 0x0a 0x8a 0xe0 0xfd 0x34 VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4282 0xc6 0x02 0x80 0xf5 0x00 0x1c VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4288 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4292 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4294 0x00 0x2c 0xf0 0x00 0x54 0x00 0x01 0xa5 0x7e 0xba NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4304 0x00 0x2c 0xfc 0x60 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV
+.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4336 0x00 0x2c 0xf8 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4352 0x00 0x2c 0xf0 0x00 0xad 0x80 0x03 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4368 0x00 0x2c 0xfc 0x60 0x29 0x00 0x03 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV
+.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4384 0x00 0x2c 0xf0 0x00 0x23 0x00 0x03 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4400 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4402 0x0d 0x80 0x03 0x18 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4406 0x20 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4414 0x18 0x81 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x0
+ 4418 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0]
+ 4422 0xb0 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1
+ 4430 0x20 0x00 0x60 0x00 0x40 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0
+ 4438 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0]
+ 4442 0xb0 0x00 0x60 0x50 0x00 0x5c VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr
+.delay_slot
+ 4448 0x09 0x00 0x03 0x18 VST.FLUSH.512.CONV [p2, sf, r26]
+.delay_slot
+ 4452 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0]
+.delay_slot
+.swstall delay_slot
+ 4456 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4458 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4460 0x00 0x00 NOPX
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0
+
+.text_segment PM 4464
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.function_start
+ 4464 0xf5 0xe0 0x86 0x3f 0x20 0x00 0x80 0x00 0x00 0x0e 0x91 0x11 0x60 0x7e MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128
+ 4478 0x00 0x73 0x07 0xf1 0x95 0xbf 0xc5 0x0a 0x2b 0x60 0x78 0x76 MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3
+ 4490 0x00 0x19 0x07 0xda 0x35 0x81 0x10 0x29 0x34 0x47 0x08 0x76 MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28
+ 4502 0x40 0xca 0xd7 0xf5 0x35 0x80 0x40 0x03 0xa8 0x00 0x10 0x76 LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216
+ 4514 0x0b 0x18 0x87 0xfd 0xd5 0x80 0x7f 0xff 0xef 0xff 0x90 0x76 MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431
+ 4526 0x00 0xb4 0x07 0xe1 0xb5 0x81 0x61 0x0a 0x07 0xec 0x58 0x76 MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20
+ 4538 0x01 0x95 0x07 0xed 0xf5 0x87 0x77 0xca 0x87 0xc4 0x58 0x76 MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60
+ 4550 0xff 0x73 0xb0 0x03 0x80 0x40 0x50 0x02 ST p7, [sp, #-8]; MOV m7, #64
+ 4558 0x0f 0xe4 0x3d 0x98 ST lr, [sp, #-28]
+ 4562 0x00 0x00 NOPX
+ 4564 0x17 0x59 0x20 0x98 ADD r12, r29, r18
+ 4568 0x41 0x32 0x36 0x77 0x9b 0x5c ST r12, [p2], m0; LSHL r29, r12, r28
+ 4574 0x5b 0xf9 0x5e 0xf2 0x2f 0x2c LDA.u8 r30, [p2], #-3; EQ r28, r29, r17
+ 4580 0x02 0xc9 0x2a 0x98 LDA.u8 r9, [p2], m6
+ 4584 0x00 0x00 NOPX
+ 4586 0x00 0x00 NOPX
+ 4588 0x00 0x00 NOPX
+ 4590 0x00 0x00 NOPX
+ 4592 0x00 0x00 NOPX
+ 4594 0x17 0x77 0xec 0x98 LTU r27, r29, r30
+ 4598 0x16 0x5d 0x32 0x18 SEL.EQZ r14, r25, r19, r27
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4602 0x17 0xf6 0xcc 0x98 LTU r27, r31, r12
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4606 0x51 0x70 0xee 0xb7 0xcf 0x2c ST.s8 r28, [p2], m4; EQ r13, r29, r30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4612 0x13 0x7f 0x1d 0x98 LSHL r31, r13, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4616 0x16 0x58 0xe2 0x18 SEL.EQZ r12, r25, r14, r27
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4620 0x17 0xf9 0xc5 0x98 OR r28, r31, r28
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4624 0x8e 0xfd 0x9e 0x3c 0x62 0xa4 LTU r27, r17, r30; ADD.NC r28, r28, r12
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4630 0x16 0x79 0xc2 0x18 SEL.EQZ r28, r25, r28, r27
+ 4634 0x14 0x7f 0xcc 0x98 LTU r31, r17, r28
+ 4638 0x55 0x7e 0x3e 0xf7 0xd1 0x5c ST r31, [p2], m5; NE r29, r29, r30
+ 4644 0x5d 0x79 0x54 0xb2 0x31 0x2c LDA.u8 r30, [p2], m7; NE r12, r9, r17
+ 4650 0x00 0x00 NOPX
+ 4652 0x00 0x00 NOPX
+ 4654 0x00 0x00 NOPX
+ 4656 0x00 0x00 NOPX
+ 4658 0x00 0x00 NOPX
+ 4660 0x00 0x00 NOPX
+ 4662 0xf5 0xad 0x1f 0xbe 0xfc 0x24 NE r22, r30, r22; ADD.NC r31, r30, #-4
+ 4668 0x60 0x09 0x40 0x40 0x01 0x84 JNZ r12, #4736
+.delay_slot
+ 4674 0x17 0x93 0x48 0x98 NE r9, r30, r20
+.delay_slot
+ 4678 0x17 0xfe 0x90 0x18 EXTEND.u8 r31, r31
+.delay_slot
+ 4682 0x12 0x6d 0x64 0x98 AND r22, r9, r22
+.delay_slot
+ 4686 0x17 0xef 0x7c 0x98 LTU r23, r31, r23
+.delay_slot
+ 4690 0x15 0xe1 0x64 0x98 AND r16, r23, r22
+ 4694 0xe8 0x09 0x40 0x40 0x01 0x84 JNZ r29, #4736
+.delay_slot
+ 4700 0x0f 0xeb 0x1d 0x98 ST p6, [sp, #-24]
+.delay_slot
+.swstall delay_slot
+ 4704 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4706 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4708 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4710 0x00 0x00 NOPX
+ 4712 0x00 0x3b 0x00 0x00 0x02 0x56 0x00 0x00 0x20 0xba MOVA r27, #1; J #4784
+.delay_slot
+ 4722 0x18 0x19 0x9c 0xf8 MOV el0, r25
+.delay_slot
+ 4726 0x10 0x26 0x05 0x18 MOVX r19, #1
+.delay_slot
+.swstall delay_slot
+ 4730 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4732 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4734 0x00 0x00 NOPX
+.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ 4736 0x00 0x95 0x07 0xeb 0x1d 0xab 0xbf 0x3c 0x0c 0xce 0x78 0x76 MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25
+ 4748 0x17 0xab 0x5d 0x98 LSHL r21, r30, r21
+ 4752 0x15 0x6b 0x92 0x18 SEL.EQZ r21, r21, r25, r27
+ 4756 0x14 0xf7 0xe7 0x98 EQ r27, r19, r30
+ 4760 0xac 0xf2 0x4d 0xb0 0x41 0xe4 SEL.EQZ r19, r21, r25, r27; MOV r27, r16
+ 4766 0x16 0x67 0x32 0x18 SEL.EQZ r19, r25, r19, r27
+ 4770 0x17 0x29 0x44 0x98 AND r20, r28, r20
+ 4774 0x15 0x36 0xf0 0x18 NEZ r27, r20
+ 4778 0x00 0x2c 0xf9 0xcf 0x8b 0x2c NOPA; OR r19, r19, r28
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320
+ 4784 0x01 0x90 0x82 0x6f 0x71 0xba 0x02 0x5c 0x10 0x00 0x60 0x76 MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832
+.delay_slot
+ 4796 0x02 0x8a 0x67 0x18 ST.s8 r19, [p2], m4
+.delay_slot
+.swstall delay_slot
+ 4800 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4802 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4804 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4806 0x00 0x00 NOPX
+ 4808 0x00 0xff 0xfa 0x3f 0xfe 0x44 MOVXM r20, #16777215
+ 4814 0x14 0xa5 0x44 0x98 AND r18, r18, r20
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 4818 0x00 0x2c 0xf6 0xec 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r18, [p3, #28]; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4832 0x51 0xd2 0xd0 0x27 0x44 0x82 0xcf 0xfd 0x58 0xba LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 4842 0x00 0x52 0x00 0x29 0x5f 0xfa 0x00 0x24 0x58 0xba MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4852 0x51 0x5a 0xd7 0xd0 0x2d 0xab 0x6b 0x26 0x07 0xcc 0x58 0x76 LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4864 0x51 0x5e 0xd7 0xde 0xd5 0xbf 0x37 0xea 0x00 0xc4 0x58 0x76 LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196
+ 4876 0x02 0xff 0xb6 0x98 LDA r29, [p2], #-4
+ 4880 0x02 0x8b 0xf6 0x98 LDA r31, [p2], m4
+ 4884 0x01 0x06 0xb6 0x98 LDA r21, [p1]
+ 4888 0x00 0xd2 0xda 0x26 0x5b 0x2c LDA r20, [p0]; LSHL r9, r20, r18
+ 4894 0x04 0x07 0xd6 0x98 LDA r30, [p4]
+ 4898 0x15 0xad 0x2d 0x98 LSHL r22, r22, r18
+ 4902 0x00 0x00 NOPX
+ 4904 0x17 0x67 0x3e 0x98 ASHL r19, r29, r19
+ 4908 0x17 0xe3 0x18 0x98 NE r17, r31, r17
+ 4912 0x88 0x09 0xd0 0x40 0x01 0x84 JNZ r17, #5024
+.delay_slot
+ 4918 0xbd 0xa5 0xba 0xb5 0xb2 0xa4 LSHL r22, r23, r18; ADD.NC r21, r21, r22
+.delay_slot
+ 4924 0x9d 0x65 0xb0 0x95 0xb2 0xa4 LSHL r21, r19, r18; ADD.NC dn0, r21, r22
+.delay_slot
+ 4930 0xfa 0x84 0xb0 0x01 0xca 0x68 0xa0 0x02 ST dn0, [sp, #-44]; ADD.NC r14, r9, r20
+.delay_slot
+ 4938 0x1b 0xd0 0x80 0xf8 MOV r15, dn0
+.delay_slot
+ 4942 0x1e 0x6a 0xf9 0x58 ADD.NC p6, r21, r30
+ 4946 0x00 0x07 0xce 0xc8 0xd0 0x44 MOVXM p7, #509032
+ 4952 0xe0 0xc4 0x50 0xb4 0x80 0x2c LDA.s8 r17, [p7]; MOVX vaddSign0, #1
+ 4958 0x00 0x00 NOPX
+ 4960 0xff 0x7f 0x0a 0x20 0x00 0x44 MOVXM r20, #-8454144
+ 4966 0x18 0x02 0x91 0x78 VINSERT.32 x0, x0, #0, r20
+ 4970 0x1d 0x15 0xe0 0xf8 MOV r20, sp
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 4974 0x1f 0x6a 0x5f 0x18 ADD.NC p7, r20, #-66
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 4978 0xe0 0xc6 0xe0 0x01 0x25 0xd4 ST.s16 r17, [p7]; VMOV bmll0, x0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4984 0x14 0x7a 0x80 0x18 MOVX crRnd, r17
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4988 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4992 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4994 0x1c 0x41 0x01 0xb8 VEXTRACT.16 r17, x0, #0, vaddSign0
+ 4998 0x00 0x00 NOPX
+ 5000 0x00 0x00 NOPX
+ 5002 0x07 0x06 0x32 0x98 LDA.s16 r17, [p7]
+ 5006 0x00 0x00 NOPX
+ 5008 0x00 0x00 NOPX
+ 5010 0x00 0x00 NOPX
+ 5012 0x00 0x00 NOPX
+ 5014 0x00 0x00 NOPX
+ 5016 0x00 0x00 NOPX
+ 5018 0x00 0x2c 0xff 0xa4 0x6b 0x0c NOPA; ST r17, [sp, #-48]
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5024 0x0b 0x90 0x81 0x8e 0x0b 0x00 0x01 0xf1 0xb2 0x34 0x10 0x76 MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5036 0x51 0x45 0x50 0x84 0x8b 0x33 0x19 0x92 0x68 0x0b 0x58 0x76 LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5048 0x61 0x96 0x00 0x39 0xb9 0x65 0xaa 0x60 0x78 0xba MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5058 0x03 0x06 0x67 0x18 ST.s8 r19, [p3]
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5062 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 5064 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5070 0x14 0x6b 0x2d 0x98 LSHL r21, r17, r18
+.delay_slot
+ 5074 0x1f 0x6a 0xf9 0x58 ADD.NC p7, r21, r30
+.delay_slot
+ 5078 0x16 0x63 0x11 0x98 SUB r17, r25, r17
+.delay_slot
+ 5082 0x8c 0x65 0xba 0x2c 0x35 0x64 LSHL r17, r17, r18; MOV r20, #781
+.delay_slot
+ 5088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2c 0x9a 0x11 0x8b 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV
+.return_address
+ 5104 0x07 0xd4 0x99 0x18 LDA p1, [sp, #-44]
+.no_stack_arguments
+ 5108 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192
+.delay_slot
+.swstall delay_slot
+ 5114 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5116 0x00 0x00 NOPX
+.delay_slot
+ 5118 0x1b 0x56 0x90 0x18 ADD.NC r13, r13, #32
+.delay_slot
+ 5122 0x1a 0x66 0xa0 0xf8 MOV p2, r13
+.delay_slot
+ 5126 0x00 0x2c 0xf0 0x8f 0x0b 0x00 0x00 0x00 0x00 0x7a NOPA; MOVS p0, r15; NOPX
+.return_address
+ 5136 0xd6 0x9a 0x80 0x01 0x37 0xea 0x33 0x63 0x08 0xba MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116
+ 5146 0x83 0x84 0xd0 0x34 0x62 0x2c LDA dn0, [p4], #4; MOVX r13, #12
+ 5152 0x04 0x1c 0x46 0x98 LDA dj0, [p4], #4
+ 5156 0x04 0x1e 0x26 0x98 LDA dn4, [p4], #4
+ 5160 0x04 0x1e 0x46 0x98 LDA dj4, [p4], #4
+ 5164 0x04 0x1c 0x06 0x98 LDA m0, [p4], #4
+ 5168 0x04 0x1c 0x66 0x98 LDA dc0, [p4], #4
+ 5172 0x04 0x1e 0x66 0x98 LDA dc4, [p4], #4
+ 5176 0x04 0x1e 0xd6 0x98 LDA r22, [p4], #4
+ 5180 0x04 0x1e 0x36 0x98 LDA r17, [p4], #4
+ 5184 0x04 0x1f 0x96 0x98 LDA r28, [p4], #4
+ 5188 0x04 0x1e 0xb6 0x98 LDA r21, [p4], #4
+ 5192 0x04 0x1e 0xf6 0x98 LDA r23, [p4], #4
+ 5196 0x04 0x1d 0x9e 0x98 LDA p3, [p4], #4
+ 5200 0x04 0x1d 0x26 0x98 LDA dn2, [p4], #4
+ 5204 0x04 0x1c 0xa6 0x98 LDA dn1, [p4], #4
+ 5208 0x04 0x1c 0xc6 0x98 LDA dj1, [p4], #4
+ 5212 0x04 0x1e 0xa6 0x98 LDA dn5, [p4], #4
+ 5216 0x04 0x1f 0xd6 0x98 LDA r30, [p4], #4
+ 5220 0x04 0x1f 0xb6 0x98 LDA r29, [p4], #4
+ 5224 0x04 0x1c 0xe6 0x98 LDA dc1, [p4], #4
+ 5228 0x04 0xc2 0x4a 0x98 LDA.u8 r18, [p4, dj6]
+ 5232 0x07 0xd2 0x91 0x18 LDA r20, [sp, #-48]
+ 5236 0x04 0x04 0x56 0x98 LDA r2, [p4]
+ 5240 0x00 0x00 NOPX
+ 5242 0x00 0x00 NOPX
+ 5244 0x00 0x00 NOPX
+ 5246 0x00 0x00 NOPX
+ 5248 0x14 0xe7 0x2c 0x98 LTU r19, r19, r18
+ 5252 0x98 0x0c 0x10 0x40 0x01 0x84 JNZ r19, #6176
+.delay_slot
+ 5258 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.delay_slot
+ 5264 0x02 0x05 0xa7 0x18 ST.s8 r13, [p2]
+.delay_slot
+ 5268 0x1c 0xd1 0x72 0xf8 VBCST.16 x9, r20
+.delay_slot
+.swstall delay_slot
+ 5272 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5274 0x00 0x00 NOPX
+ 5276 0xfb 0x43 0x20 0x1b 0xb9 0x3f 0x80 0x84 0x58 0xba LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132
+ 5286 0x00 0x13 0x00 0x3d 0x20 0x0a 0x00 0x3c 0x58 0xba MOVA r19, #0; MOVX r18, #-128; MOV m4, #60
+ 5296 0xf8 0x14 0x80 0x01 0xa0 0x0b 0xe4 0xd0 0x78 0xba MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19
+ 5306 0xef 0x98 0x82 0x1c 0x4b 0x1b 0xd4 0x01 0xa7 0xc0 0x78 0x76 MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5318 0xfa 0x96 0x26 0x1c 0x4b 0x01 0xf7 0x89 0xe8 0x07 0x58 0x76 LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5330 0xfb 0xca 0x20 0x00 0x00 0x05 0x32 0xa0 0x10 0xba LDA r18, [sp, #-36]; MOVXM p2, #5440
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5340 0xfc 0x36 0x20 0x34 0x69 0x12 0x8b 0x0c 0x58 0xba LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780
+ 5350 0xfc 0x87 0x29 0xd7 0x20 0x01 0x90 0x0b 0x08 0x00 0x58 0xb6 LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0
+ 5362 0x04 0x88 0x16 0x98 LDA r0, [p4], m4
+ 5366 0x04 0xab 0x26 0x98 LDA dn6, [p4], m5
+ 5370 0x04 0x2f 0x76 0x98 LDA r27, [p4], #8
+ 5374 0x04 0x1e 0x86 0x98 LDA m5, [p4], #4
+ 5378 0x04 0x8a 0xc6 0x98 LDA dj5, [p4], m4
+ 5382 0x04 0x9e 0x06 0x98 LDA m4, [p4], #-28
+ 5386 0x04 0x1c 0x36 0x98 LDA r1, [p4], #4
+ 5390 0x99 0x02 0xdd 0x06 0x02 0x94 LDA r0, [p4], m6; ADD.NC dj6, r6, r0
+ 5396 0x04 0x14 0x76 0x98 LDA r3, [p4, #4]
+ 5400 0x04 0x04 0x96 0x98 LDA r4, [p4]
+ 5404 0x19 0xda 0x00 0xf8 MOV r7, m5
+ 5408 0x1a 0x83 0x99 0x58 ADD.NC dj2, r7, r6
+ 5412 0x1c 0x1b 0x00 0xf8 MOV r16, dj5
+ 5416 0x1a 0x0d 0x99 0x58 ADD.NC m2, r27, r6
+ 5420 0x1e 0x03 0xe0 0x18 ADD.NC m6, r7, #-64
+ 5424 0x18 0xff 0xee 0x10 0xc0 0x24 ADD r3, r3, #-1; ADD.NC m7, r16, #-64
+ 5430 0x00 0x2c 0xf0 0x00 0x10 0x00 0x82 0x80 0x7e 0xba NOPA; NOPB; MOV m1, dj2
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976
+.loop_nesting 1
+ 5440 0xc3 0x85 0x71 0x85 0x0b 0x04 0xe7 0xec 0x33 0x90 0x78 0x76 VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14
+ 5452 0x22 0x81 0x78 0x28 0x2b 0x0e 0x4b 0x02 0x33 0x98 0xa0 0xf6 VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12
+ 5464 0xa0 0x39 0x78 0x28 0x2f 0x5a 0x4b 0x03 0xc6 0x80 0x70 0xf6 VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6
+ 5476 0xd9 0x0d 0x74 0x03 0x2b 0x53 0x0b 0x01 0x82 0x00 0x70 0xf6 VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2
+ 5488 0x71 0x41 0x74 0x12 0xd4 0x01 0xc0 0x00 0x5e 0xba VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0
+ 5498 0xc3 0x95 0x78 0x28 0x28 0x00 0x00 0x05 0xbb 0x40 0x10 0xb6 VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760
+ 5510 0xdd 0x1d 0x78 0x28 0x28 0x00 0x00 0x04 0x7b 0x28 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712
+ 5522 0x80 0xb5 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24]
+ 5528 0xc3 0xa5 0x78 0x22 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]
+ 5534 0xd9 0x2d 0x78 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5540 0x22 0x81 0x78 0x28 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5546 0x83 0xbd 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5552 0x80 0xcd 0x74 0x11 0x14 0x02 0x9a 0xc3 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5562 0x80 0xc5 0x78 0x28 0x2c 0x98 0x8b 0x01 0x9a 0xc1 0xe0 0xf6 VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5574 0x02 0x81 0x73 0x00 0x54 0x1d 0x48 0x14 0xe9 0x4a VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5584 0xcf 0x35 0x76 0x94 0x96 0x00 0x00 0x5c 0x58 0x07 0x49 0x2c 0xe9 0x6e VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5598 0x82 0xbd 0x7a 0x38 0x96 0x00 0x00 0x4c 0x90 0x3e 0x4a 0x55 0x09 0x6e VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5612 0x71 0x01 0x74 0x98 0x96 0x00 0x00 0x54 0x90 0x1e 0xf8 0x60 0x3d 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5626 0x22 0x81 0x70 0x04 0xf9 0x64 0x3d 0x62 VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5634 0xa0 0x09 0x70 0x04 0xfa 0x88 0x3d 0x62 VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5642 0x71 0x01 0x70 0x04 0x4b 0x6d 0x09 0x62 VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5650 0x22 0x81 0x74 0x01 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5656 0x3c 0x11 0x14 0x18 VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5660 0xa0 0x09 0x78 0x28 0x2d 0x72 0x7d 0x82 0xfb 0x8c 0x3d 0x66 VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5672 0x71 0x01 0x74 0x14 0x14 0x1d 0xa0 0x06 0x29 0x4a VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5682 0x22 0x81 0x74 0x01 0x28 0x00 0x00 0x58 0xaa 0x0f 0xa2 0x46 0x09 0x4e VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5696 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248
+.loop_nesting 2
+.begin_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5712 0xa0 0x09 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x01 0x49 0x01 0xed 0x1b 0x50 0x4b VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5728 0x71 0x01 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x00 0x31 0x4b VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5744 0x22 0x81 0x74 0x01 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x12 0x30 0x4b VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296
+.end_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5760 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20
+.loop_nesting 1
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5776 0xa0 0x09 0x7c 0xbc 0x96 0x00 0x00 0x54 0x90 0x1e 0xa3 0x6a 0x09 0x6e VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5790 0x71 0x01 0x7e 0x1c 0x96 0x00 0x00 0x7c 0x38 0x07 0xa0 0x06 0x29 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5804 0x61 0x91 0x61 0x55 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5814 0x6a 0xc1 0x61 0x92 0x07 0xc4 0xa1 0x2a 0x29 0x4a MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5824 0xeb 0x81 0x62 0x92 0x03 0xc4 0xa3 0x6a 0x09 0x4a MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5834 0xb3 0x91 0x6f 0x57 0x22 0x8f 0x00 0xe6 0xa0 0x06 0x29 0x66 PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5846 0x93 0x91 0x62 0x06 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5856 0x02 0x92 0x03 0xc6 0xa1 0x2a 0x29 0x62 VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5864 0x01 0x92 0x07 0xc6 0xa3 0x6a 0x09 0x62 VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20
+ 5872 0x1f 0x8b 0x00 0xf8 MOV dj7, dj5
+ 5876 0x03 0x0b 0xa0 0xe6 0xa1 0x2a 0x29 0x62 MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20
+ 5884 0x03 0x88 0xa0 0xe6 0xa0 0x06 0x29 0x62 MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20
+ 5892 0x00 0xf7 0x23 0x05 0x00 0xe6 0xa3 0x6a 0x09 0x4a PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20
+ 5902 0x71 0x89 0x6e 0xd7 0x25 0x82 0xa0 0xe6 0xa2 0x46 0x09 0x66 PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20
+ 5914 0x62 0x89 0x60 0x03 0xc5 0x80 0x70 0x02 MOVS dc3, dc5; MOV dj7, dj5
+ 5922 0xa0 0x41 0x60 0x01 0x81 0x00 0x70 0x02 MOVS dc5, r2; MOV m3, m1
+ 5930 0xb2 0x12 0xc0 0x00 0x87 0x50 0x70 0x02 VCONV.bf16.fp32 x11, cml1; MOV m1, r29
+ 5938 0xa2 0x02 0xc0 0x02 0xc7 0x90 0x70 0x02 VCONV.bf16.fp32 x10, cml0; MOV dj5, r30
+ 5946 0x13 0x91 0x61 0x3b 0x90 0x01 0xc8 0x60 0x76 0xba PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0
+ 5956 0x62 0x0a 0xc0 0x00 0x83 0x00 0x70 0x02 VCONV.bf16.fp32 x6, cmh0; MOV m1, m3
+ 5964 0x52 0x22 0xc0 0x57 0x20 0x24 0x02 0xfa 0x00 0x00 0x60 0x36 PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096
+.delay_slot
+ 5976 0x72 0x1a 0xc0 0x00 0xa9 0x60 0x70 0x02 VCONV.bf16.fp32 x7, cmh1; MOV r5, p1
+.delay_slot
+ 5984 0x82 0x32 0xc0 0x03 0xa7 0xc0 0x70 0x02 VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7
+.delay_slot
+ 5992 0x12 0x3a 0xc5 0x2b 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5
+.delay_slot
+ 6002 0x22 0x2a 0xc0 0x02 0xc2 0x80 0x70 0x02 VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2
+.delay_slot
+ 6010 0xe1 0x89 0x60 0x00 0x4d 0xc0 0x70 0x02 MOVS dc7, dc3; MOV r2, dc5
+ 6018 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9
+ 6022 0x1b 0xbc 0xec 0xf8 VMAX_LT.bf16 x7, r16, x7, x9
+ 6026 0x3c 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9
+ 6034 0xa2 0xba 0x60 0x01 0xda 0x76 0x70 0x02 VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9
+ 6042 0x20 0xd2 0x60 0x00 0x02 0xfe 0x00 0x00 0x21 0x3a VST x10, [p1]; J #6128
+.delay_slot
+ 6052 0x22 0xba 0x60 0x02 0xa2 0x76 0x70 0x02 VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9
+.delay_slot
+ 6060 0x1b 0x8c 0xec 0xf8 VMAX_LT.bf16 x7, r16, x1, x9
+.delay_slot
+ 6064 0x00 0xd2 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9
+.delay_slot
+ 6072 0x02 0xba 0x60 0x00 0x8a 0x76 0x70 0x02 VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9
+.delay_slot
+ 6080 0x00 0x2c 0xf0 0x00 0x24 0xa2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632
+ 6096 0x09 0xe0 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p1, dj7]
+ 6100 0x0d 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p5, #64]
+ 6104 0x09 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p1]
+ 6108 0x09 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p1, #64]
+ 6112 0x08 0x06 0x13 0x18 VST x8, [p0]
+ 6116 0x08 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p0, #64]
+ 6120 0x94 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664
+ 6128 0xe2 0x92 0x6f 0x57 0x20 0x06 0x35 0x01 0x40 0x00 0x58 0x36 PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0
+.delay_slot
+ 6140 0x1b 0x44 0x80 0xf8 MOV dn3, dn2
+.delay_slot
+ 6144 0x1a 0x49 0xa0 0xf8 MOV dn2, r19
+.delay_slot
+ 6148 0xeb 0x72 0x05 0x1e 0x01 0xf4 PADDB.3D [p7], d2; MOV dj2, dj7
+.delay_slot
+ 6154 0x1a 0x4e 0x80 0xf8 MOV dn2, dn7
+.delay_slot
+.swstall delay_slot
+ 6158 0x00 0x00 NOPX
+.loop_nesting 0
+ 6160 0x00 0x0d 0x58 0x00 0x00 0x84 J #6832
+.delay_slot
+.swstall delay_slot
+ 6166 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6168 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6170 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6172 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6174 0x00 0x00 NOPX
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712
+ 6176 0xfb 0x7e 0x22 0x0c 0x8b 0x04 0xe1 0x08 0xb3 0x90 0x78 0x76 LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14
+ 6188 0x07 0x90 0x82 0x56 0x0b 0x1b 0xd4 0x03 0x62 0x40 0x78 0x76 MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2
+ 6200 0x07 0x94 0x00 0x19 0x31 0x89 0x05 0xd0 0x78 0xba MOVA r20, #60; MOVX r19, #780; MOV m2, r23
+ 6210 0xef 0x98 0x86 0x5c 0x0b 0x01 0x20 0xca 0xc7 0x90 0x78 0x76 MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30
+ 6222 0xfa 0x83 0x25 0x02 0x0b 0x01 0x90 0x08 0x87 0x50 0x78 0x76 LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29
+ 6234 0xfb 0xd6 0x20 0x01 0x80 0x0b 0x45 0x50 0x78 0xba LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21
+ 6244 0xfc 0x36 0x20 0x00 0x00 0x05 0x34 0x58 0x10 0xba LDA r13, [sp, #-32]; MOVXM p2, #6320
+ 6254 0xfc 0x87 0x26 0xdf 0x72 0x94 LDA lr, [sp, #-28]; ADD.NC p3, r31, r14
+ 6260 0x03 0x1d 0xc6 0x98 LDA dj3, [p3], #4
+ 6264 0x03 0x8a 0x06 0x98 LDA m4, [p3], m4
+ 6268 0x03 0x9e 0x86 0x98 LDA m5, [p3], #-28
+ 6272 0x03 0x1e 0xd6 0x98 LDA r22, [p3], #4
+ 6276 0x03 0xca 0xf6 0x98 LDA r23, [p3], m6
+ 6280 0x03 0x17 0xb6 0x98 LDA r29, [p3, #4]
+ 6284 0x03 0x07 0x96 0x98 LDA r28, [p3]
+ 6288 0x00 0x00 NOPX
+ 6290 0x1f 0x98 0x00 0xf8 MOV r30, m4
+ 6294 0x1e 0x07 0x00 0xf8 MOV m6, dj3
+ 6298 0x1f 0xdc 0x00 0xf8 MOV r31, m6
+ 6302 0x1b 0x0f 0xe0 0x18 ADD.NC m3, r31, #-64
+ 6306 0xef 0x7f 0xee 0x1e 0xc0 0x24 ADD r29, r29, #-1; ADD.NC m7, r30, #-64
+ 6312 0x00 0x2b 0x60 0x03 0xc7 0x90 0x70 0x02 NOPS; MOV dj7, r30
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856
+.loop_nesting 1
+ 6320 0xc3 0x85 0x7a 0x28 0x28 0x00 0x00 0x8f 0x96 0x02 0x71 0x81 0x60 0x7e VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496
+ 6334 0xcd 0x0d 0x7a 0x28 0x28 0x00 0x00 0x05 0xbc 0xc8 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544
+ 6346 0x02 0x81 0x76 0x05 0x28 0x05 0xe9 0x6e 0xbf 0x3f 0x48 0xb6 VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3
+ 6358 0x55 0x59 0x73 0x01 0x14 0x01 0x47 0x90 0x7e 0xba VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30
+ 6368 0xc3 0x95 0x76 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25]
+ 6374 0xdd 0x1d 0x7a 0x21 0xa8 0x3c VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0]
+ 6380 0xc3 0xa5 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25]
+ 6386 0xcd 0x2d 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]
+ 6392 0xc3 0xb5 0x76 0x00 0xa8 0x3c VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25]
+ 6398 0xdd 0x3d 0x76 0x03 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25]
+ 6404 0x68 0x45 0x76 0x03 0xa8 0x3c VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25]
+ 6410 0x68 0x4d 0x75 0x12 0x14 0x01 0x69 0x2d 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22
+ 6420 0x02 0x81 0x75 0x14 0x14 0x02 0xa9 0x2f 0xee 0xba VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23
+ 6430 0x55 0x01 0x7a 0x28 0x2a 0x11 0xdb 0xc2 0x48 0x0b 0x69 0x66 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9
+ 6442 0x02 0x81 0x75 0x11 0xdf 0xc2 0x49 0x35 0x69 0x4a VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9
+ 6452 0x4a 0x49 0x69 0x48 VMAC.f dm2, dm2, ex4, ex11, r9
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 6456 0x4b 0x75 0x69 0x48 VMAC.f dm3, dm3, ex10, ex11, r9
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 6460 0x06 0x00 0xaa 0x8b 0x5f 0xc6 0xa1 0x84 0x3d 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6470 0x03 0x01 0x94 0x00 0xa0 0x80 0x3d 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6478 0x03 0x01 0xd4 0x00 0xa2 0x88 0x3d 0x62 VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6486 0x55 0x01 0x75 0x12 0x14 0x1d 0xa3 0x8c 0x3d 0x4a VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032
+.loop_nesting 2
+.begin_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6496 0xa2 0x82 0x82 0x16 0xb7 0xb4 VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6502 0x0a 0x28 0x2a 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x4a VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6512 0x06 0x00 0xa9 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6522 0x03 0x01 0x94 0x00 0x9b 0x68 0x09 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6530 0x02 0x81 0x76 0x03 0xa8 0x00 0x00 0x00 0x05 0x6c 0x9a 0x46 0x09 0x6e VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080
+.end_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6544 0x55 0x01 0x7a 0x24 0x28 0x01 0x5b 0x00 0x00 0x01 0x45 0xaf 0xe8 0x00 0x00 0xe1 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV
+.loop_nesting 1
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6560 0x07 0x0c 0xff 0x97 0x25 0x9c 0x8b 0x00 0x85 0xad 0xe0 0xf6 PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6572 0x93 0x91 0x6f 0x17 0x22 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x66 PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6584 0x73 0x91 0x6f 0x97 0x21 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x66 PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6596 0x02 0x88 0xa0 0xe6 0x9b 0x68 0x09 0x62 MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6604 0x02 0xb7 0x20 0x9b 0x80 0xe6 0x9a 0x46 0x09 0x4a PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19
+ 6614 0x19 0x0b 0x5b 0xd8 VSHUFFLE ex2, ex1, ex6, r22
+ 6618 0x1a 0x8b 0x5f 0xd8 VSHUFFLE ex5, ex1, ex6, r23
+ 6622 0x01 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x62 VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19
+ 6630 0x02 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x62 VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19
+ 6638 0x9a 0x46 0x09 0x48 VMAC.f dm2, dm2, ex3, ex0, r19
+ 6642 0x9b 0x68 0x09 0x48 VMAC.f dm3, dm3, ex4, ex0, r19
+ 6646 0x00 0x00 NOPX
+ 6648 0x00 0x00 NOPX
+ 6650 0x0d 0x10 0x16 0x18 VCONV.bf16.fp32 x10, cml0
+ 6654 0x0d 0x90 0x96 0x18 VCONV.bf16.fp32 x11, cml1
+ 6658 0x12 0x1a 0xc0 0x2a 0x03 0x4e 0x00 0x00 0x61 0x3a VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768
+.delay_slot
+ 6668 0x0b 0x10 0x56 0x18 VCONV.bf16.fp32 x6, cmh0
+.delay_slot
+ 6672 0x09 0x11 0x96 0x18 VCONV.bf16.fp32 x2, cml3
+.delay_slot
+ 6676 0x0b 0x91 0xd6 0x18 VCONV.bf16.fp32 x7, cmh3
+.delay_slot
+ 6680 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2
+.delay_slot
+ 6684 0x0c 0x11 0x56 0x18 VCONV.bf16.fp32 x8, cmh2
+ 6688 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9
+ 6692 0x18 0x8c 0xec 0xf8 VMAX_LT.bf16 x1, r16, x1, x9
+ 6696 0xac 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9
+ 6704 0x82 0x8a 0x60 0x00 0x5a 0x76 0x70 0x02 VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9
+ 6712 0xa0 0xd2 0x60 0x00 0x03 0x52 0x00 0x00 0x21 0x3a VST x10, [p5]; J #6800
+.delay_slot
+ 6722 0xa2 0x8a 0x60 0x02 0x8a 0x76 0x70 0x02 VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9
+.delay_slot
+ 6730 0x18 0xbc 0xec 0xf8 VMAX_LT.bf16 x1, r16, x7, x9
+.delay_slot
+ 6734 0x6c 0x52 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9
+.delay_slot
+ 6742 0x00 0x2c 0xf7 0x14 0x53 0x02 0x22 0x76 0x72 0xba NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9
+.delay_slot
+ 6752 0x00 0x2c 0xf0 0x00 0x24 0xe2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304
+ 6768 0x0d 0x60 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p5, dj3]
+ 6772 0x0c 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p4, #64]
+ 6776 0x0d 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p5]
+ 6780 0x0d 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p5, #64]
+ 6784 0x0b 0x61 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p3, dj3]
+ 6788 0x0f 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p7, #64]
+ 6792 0x9c 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336
+ 6800 0x62 0xc2 0x6e 0xf5 0x40 0x5c VST x8, [p3, #64]; JNZD r29, r29, p2
+.delay_slot
+ 6806 0x3f 0x8b 0x90 0x18 PADDB [p7], m4
+.delay_slot
+.swstall delay_slot
+ 6810 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6812 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6814 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6816 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368
+.loop_nesting 0
+ 6832 0x07 0xed 0xf1 0x18 LDA r15, [sp, #-20]
+ 6836 0x07 0xf1 0x91 0x18 LDA r12, [sp, #-16]
+ 6840 0x07 0xf5 0x31 0x18 LDA r9, [sp, #-12]
+ 6844 0x07 0xeb 0x19 0x18 LDA p6, [sp, #-24]
+ 6848 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+ 6852 0x07 0xfd 0xd1 0x18 LDA r14, [sp, #-4]
+ 6856 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 6860 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 6866 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6868 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6870 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6872 0x00 0x00 NOPX
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0
+
+.text_segment PM 6880
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 6880 0x00 0x20 0x00 0x00 0x01 0xf2 0x32 0x20 0x10 0xba MOVA r0, #1; MOVXM p4, #508992
+ 6890 0x80 0xc2 0xd0 0x00 0x10 0x08 0x4b 0xd0 0x78 0xba LDA r16, [p4]; MOVX r1, #0; MOV r2, r15
+ 6900 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 6906 0x0f 0xf0 0x55 0x98 ST r2, [sp, #-16]
+ 6910 0x00 0x00 NOPX
+ 6912 0x00 0x00 NOPX
+ 6914 0x00 0x00 NOPX
+ 6916 0x00 0x00 NOPX
+ 6918 0x80 0x0d 0xd8 0x40 0x01 0x84 JNZ r16, #7088
+.delay_slot
+ 6924 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8]
+.delay_slot
+ 6928 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4]
+.delay_slot
+ 6932 0x0f 0xed 0x9d 0x98 ST p3, [sp, #-20]
+.delay_slot
+ 6936 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 6940 0x00 0x07 0xc7 0xac 0x00 0x44 MOVXM r15, #509440
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 6946 0xd0 0x91 0x60 0x00 0x01 0xf3 0xb2 0x34 0x11 0x3a MOVS p6, p1; MOVXM p7, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 6956 0xe0 0xc0 0xe1 0x8f 0x0b 0x00 0x01 0xf3 0xb2 0x32 0x10 0x76 ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6968 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6970 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6972 0x00 0x05 0x60 0x00 0x01 0x04 JL #2752
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6978 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6980 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 6984 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 6988 0xe0 0xc2 0x30 0x03 0xb0 0x60 0x70 0x02 ST r16, [p7]; MOV p7, p0
+.delay_slot
+ 6996 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x32 0x60 0x70 0xf6 NOPA; NOPB; NOPS; MOV p0, p2
+.return_address
+ 7008 0x1a 0x67 0x85 0x98 ADD.NC p2, r15, #11
+ 7012 0x4f 0xc1 0x50 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA.u8 r16, [p2], #7; MOVXM p1, #508996
+ 7022 0x43 0xcf 0x50 0x00 0x01 0xf0 0x32 0x30 0x10 0xba LDA.u16 r19, [p2], #2; MOVXM p0, #509024
+ 7032 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2]
+ 7036 0x00 0x00 NOPX
+ 7038 0x02 0x16 0x5a 0x98 LDA.u16 r18, [p2, #2]
+ 7042 0x00 0x00 NOPX
+ 7044 0x00 0x00 NOPX
+ 7046 0x20 0xc2 0x30 0x00 0xb6 0x60 0x70 0x02 ST r16, [p1]; MOV p1, p6
+ 7054 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16
+ 7058 0x00 0x00 NOPX
+ 7060 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+ 7064 0x00 0x00 NOPX
+ 7066 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16
+ 7070 0x00 0x00 NOPX
+ 7072 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 7088 0xfd 0xbe 0x20 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r15, [sp, #-20]; MOVXM p6, #509000
+ 7098 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x26 0x10 0xba LDA r16, [p6]; MOVXM p2, #509004
+ 7108 0x40 0xc6 0xd0 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba LDA r17, [p2]; MOVXM p7, #508992
+ 7118 0x07 0x06 0x56 0x98 LDA r18, [p7]
+ 7122 0x00 0x00 NOPX
+ 7124 0x00 0x00 NOPX
+ 7126 0x00 0x00 NOPX
+ 7128 0x00 0x00 NOPX
+ 7130 0x80 0x0e 0x18 0x40 0x01 0x84 JNZ r16, #7216
+.delay_slot
+ 7136 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 7140 0x40 0xc6 0x39 0x44 0x0e 0x5c ST r17, [p2]; ADD r17, r18, #1
+.delay_slot
+ 7146 0x14 0x26 0x07 0x18 ADD r19, r16, #1
+.delay_slot
+ 7150 0x0e 0x06 0x71 0x98 ST r19, [p6]
+.delay_slot
+ 7154 0x0f 0x06 0x31 0x98 ST r17, [p7]
+ 7158 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12
+ 7162 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4
+ 7166 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4
+ 7170 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7174 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7176 0x02 0x46 0x16 0x98 LDA r16, [p2, #16]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7180 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7182 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7184 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7186 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7188 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 0x0a 0x06 0x11 0x98 ST r16, [p2]
+ 7196 0x17 0xe2 0xfd 0x18 MOVX r17, #-1
+ 7200 0x00 0x00 NOPX
+ 7202 0x00 0x00 NOPX
+ 7204 0x00 0x00 NOPX
+ 7206 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336
+.no_stack_arguments
+ 7216 0x00 0x08 0xb8 0x00 0x01 0x04 JL #4464
+.delay_slot
+ 7222 0x00 0x07 0xc6 0xcc 0x00 0x44 MOVXM p3, #509440
+.delay_slot
+.swstall delay_slot
+ 7228 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7230 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7232 0x00 0x00 NOPX
+.delay_slot
+ 7234 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x53 0x3d 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p2, r15; NOPV
+.return_address
+ 7248 0xc0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r16, [p6]; MOVXM p1, #508996
+ 7258 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 7262 0x07 0xf0 0x11 0x18 LDA r0, [sp, #-16]
+ 7266 0x00 0x00 NOPX
+ 7268 0x00 0x00 NOPX
+ 7270 0x00 0x00 NOPX
+ 7272 0x00 0x00 NOPX
+ 7274 0x00 0x00 NOPX
+ 7276 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 7280 0x80 0x0e 0x60 0x40 0x01 0x84 JNZ r16, #7360
+.delay_slot
+ 7286 0x10 0x30 0x01 0x18 MOVX r24, #0
+.delay_slot
+.swstall delay_slot
+ 7290 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7292 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7294 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7296 0x00 0x00 NOPX
+ 7298 0x04 0x00 0xa2 0xcf 0x14 0x24 MOVX r16, #1; ADD.NC p1, r15, #20
+ 7304 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 7308 0x00 0x00 NOPX
+ 7310 0x00 0x00 NOPX
+ 7312 0x00 0x00 NOPX
+ 7314 0x00 0x00 NOPX
+ 7316 0x00 0x00 NOPX
+ 7318 0x00 0x00 NOPX
+ 7320 0x14 0x51 0x08 0x18 REL r17, r16
+ 7324 0x3c 0xc6 0xdc 0x0e 0x23 0x0c LDA r17, [p1, #-8]; ST r24, [p6]
+ 7330 0x00 0x00 NOPX
+ 7332 0x00 0x00 NOPX
+ 7334 0x00 0x00 NOPX
+ 7336 0x00 0x00 NOPX
+ 7338 0x00 0x00 NOPX
+ 7340 0x00 0x00 NOPX
+ 7342 0x14 0x21 0x11 0x98 SUB r16, r16, r17
+ 7346 0x00 0x2c 0xf3 0xcc 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p1, #-8]; NOPM; NOPV
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480
+ 7360 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x30 0x10 0xba LDA r16, [p7]; MOVXM p6, #509024
+ 7370 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 7374 0x07 0xf8 0x99 0x18 LDA p1, [sp, #-8]
+ 7378 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 7382 0x00 0x00 NOPX
+ 7384 0x00 0x00 NOPX
+ 7386 0x00 0x00 NOPX
+ 7388 0x00 0x00 NOPX
+ 7390 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 7394 0x80 0x0e 0x80 0x40 0x01 0x84 JNZ r16, #7424
+.delay_slot
+.swstall delay_slot
+ 7400 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7402 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7404 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7406 0x00 0x00 NOPX
+.delay_slot
+ 7408 0x1b 0xd0 0x20 0xf8 MOV r15, r0
+ 7412 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544
+ 7424 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4]
+ 7428 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 7432 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 7438 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7440 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7442 0x00 0x00 NOPX
+.delay_slot
+ 7444 0x0f 0x84 0x8b 0x18 MOVS p7, p1
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 7456
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.function_start
+ 7456 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64
+.delay_slot
+ 7462 0x18 0x50 0xc0 0xf8 MOV r1, p0
+.delay_slot
+ 7466 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32
+.delay_slot
+ 7470 0x08 0x04 0x11 0x98 ST r0, [p0]
+.delay_slot
+ 7474 0x08 0x14 0x11 0x98 ST r0, [p0, #4]
+.delay_slot
+.swstall delay_slot
+ 7478 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 7488
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.function_start
+ 7488 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 7492 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 7498 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8]
+ 7502 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4]
+ 7506 0x00 0x00 NOPX
+ 7508 0x00 0x00 NOPX
+ 7510 0x00 0x00 NOPX
+ 7512 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 7516 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 7520 0x00 0x00 NOPX
+ 7522 0x00 0x00 NOPX
+ 7524 0x00 0x00 NOPX
+ 7526 0x00 0x00 NOPX
+ 7528 0x00 0x00 NOPX
+ 7530 0x00 0x00 NOPX
+ 7532 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 7536 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 7540 0x00 0x00 NOPX
+ 7542 0x00 0x00 NOPX
+ 7544 0x00 0x00 NOPX
+ 7546 0x00 0x00 NOPX
+ 7548 0x00 0x00 NOPX
+ 7550 0x00 0x00 NOPX
+ 7552 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 7556 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4]
+ 7560 0x00 0x00 NOPX
+ 7562 0x00 0x00 NOPX
+.no_stack_arguments
+ 7564 0x00 0x0e 0x90 0x00 0x01 0x04 JL #7456
+.delay_slot
+.swstall delay_slot
+ 7570 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7572 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7574 0x00 0x00 NOPX
+.delay_slot
+ 7576 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12
+.delay_slot
+ 7580 0x1b 0xd0 0xc0 0xf8 MOV r15, p0
+.return_address
+ 7584 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16
+ 7594 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3
+ 7604 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128
+ 7614 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0]
+ 7618 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7620 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7622 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7626 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7630 0x10 0x22 0x05 0x18 MOVX r17, #1
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7634 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7640 0x14 0x77 0x27 0x98 EQ r27, r17, r18
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7644 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27
+.delay_slot
+.swstall delay_slot
+ 7648 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+
+.text_segment PM 7664
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.function_start
+ 7664 0x02 0x80 0x80 0x00 0x10 0xc8 0x08 0x60 0x78 0xba MOVA m0, #20; MOVX r1, #6; MOV r0, p0
+ 7674 0x00 0x00 0xa0 0xc0 0x0c 0x24 MOVX r0, #1; ADD.NC p0, r0, #12
+ 7680 0x00 0x08 0x4a 0x98 LDA.u8 r2, [p0], m0
+ 7684 0x00 0x00 NOPX
+ 7686 0x00 0x00 NOPX
+ 7688 0x00 0x00 NOPX
+ 7690 0x00 0x00 NOPX
+ 7692 0x00 0x00 NOPX
+ 7694 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 7698 0x10 0x80 0x08 0x98 NE r0, r2, r0
+.delay_slot
+ 7702 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1
+.delay_slot
+ 7706 0x02 0x82 0x31 0x0d 0xe0 0x5c ST r0, [p0, #4]; NEZ r3, r2
+.delay_slot
+ 7712 0x10 0xc4 0x1d 0x98 LSHL r2, r3, r1
+.delay_slot
+ 7716 0x08 0x04 0x51 0x98 ST r2, [p0]
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 7728
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function_start
+ 7728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 7734 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4]
+.no_stack_arguments
+ 7738 0x00 0x0e 0xa0 0x00 0x01 0x04 JL #7488
+.delay_slot
+ 7744 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8]
+.delay_slot
+ 7748 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 7752 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7754 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7756 0x00 0x01 0x67 0x98 NOPA
+.return_address
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7760 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7764 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+.tail_call
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7768 0x00 0x0e 0xf8 0x00 0x00 0x84 J #7664
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7774 0x18 0x6e 0xc0 0xf8 MOV p0, p7
+.delay_slot
+ 7778 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 7784 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7786 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7788 0x00 0x00 NOPX
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+
+.text_segment PM 7792
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function_start
+ 7792 0x67 0x82 0xd0 0x00 0x51 0x54 LDA r0, [p3], #12; MOV m0, #20
+ 7798 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3
+ 7804 0x00 0x00 NOPX
+ 7806 0x00 0x00 NOPX
+ 7808 0x00 0x00 NOPX
+ 7810 0x00 0x00 NOPX
+ 7812 0x00 0x00 NOPX
+ 7814 0x00 0x00 NOPX
+ 7816 0x08 0x0f 0x60 0x40 0x01 0x84 JNZ r1, #7872
+.delay_slot
+ 7822 0x17 0xc4 0xe9 0x18 MOVX r2, #-6
+.delay_slot
+ 7826 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2
+.delay_slot
+.swstall delay_slot
+ 7830 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7832 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7834 0x00 0x00 NOPX
+ 7836 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0]
+ 7840 0x00 0x00 NOPX
+ 7842 0x00 0x00 NOPX
+ 7844 0x00 0x00 NOPX
+ 7846 0x00 0x0f 0x70 0x00 0x00 0x84 J #7904
+.delay_slot
+.swstall delay_slot
+ 7852 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7854 0x00 0x00 NOPX
+.delay_slot
+ 7856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1
+.delay_slot
+.swstall delay_slot
+ 7860 0x00 0x00 NOPX
+.delay_slot
+ 7862 0x00 0x2c 0xf0 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p0]; NOPX
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80
+ 7872 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1]
+ 7876 0x00 0x00 NOPX
+ 7878 0x00 0x00 NOPX
+ 7880 0x00 0x00 NOPX
+ 7882 0x00 0x00 NOPX
+ 7884 0x00 0x00 NOPX
+ 7886 0x00 0x00 NOPX
+ 7888 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1
+ 7892 0x00 0x00 NOPX
+ 7894 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112
+ 7904 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3
+ 7914 0x62 0x90 0xd0 0x00 0x00 0x04 0x7f 0xa8 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #8016
+ 7924 0x00 0x00 0x16 0xfe 0xe0 0x44 MOVXM le, #8048
+ 7930 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032
+ 7936 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4]
+ 7940 0x00 0x00 NOPX
+ 7942 0x00 0x00 NOPX
+ 7944 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0
+ 7948 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1
+ 7952 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7956 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7960 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7966 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7974 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7978 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7986 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7990 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7998 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8002 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0
+.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8016 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8020 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8032 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8048 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8064 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8066 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8074 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8076 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8084 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8086 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr
+.delay_slot
+.swstall delay_slot
+ 8092 0x00 0x00 NOPX
+.delay_slot
+ 8094 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 8098 0x00 0x00 NOPX
+.delay_slot
+ 8100 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 8104 0x00 0x00 NOPX
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 8112
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.function_start
+ 8112 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+ 8118 0xff 0x87 0xb0 0x02 0x0a 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p2
+ 8126 0x50 0x91 0x60 0x01 0xb4 0x03 0x00 0x02 MOVS p2, p1; ADD.NC p3, r16, #12
+ 8134 0x65 0xed 0x58 0x21 0x81 0xd4 LDA.u8 r27, [p3], #2; MOV r16, p0
+ 8140 0x73 0xca 0x58 0xab 0xc1 0xd4 LDA.s16 r18, [p3], #-14; MOV r17, sp
+ 8146 0x18 0x68 0xc0 0x18 ADD.NC p0, r17, #-128
+ 8150 0x08 0x07 0x2b 0x18 VST sfh, [p0]
+ 8154 0x00 0x06 0x57 0x18 ST.s16 r18, [p0]
+ 8158 0x00 0x00 NOPX
+ 8160 0x00 0x00 NOPX
+.no_stack_arguments
+ 8162 0x00 0x0f 0x38 0x00 0x01 0x04 JL #7792
+.delay_slot
+ 8168 0x1c 0x50 0xc0 0xf8 MOV r17, p0
+.delay_slot
+.swstall delay_slot
+ 8172 0x00 0x00 NOPX
+.delay_slot
+ 8174 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27
+.delay_slot
+ 8178 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18
+.delay_slot
+ 8184 0x00 0x2b 0x60 0x00 0x34 0x10 0x70 0x02 NOPS; MOV p0, r16
+.return_address
+ 8192 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+ 8196 0x00 0x00 NOPX
+ 8198 0x00 0x00 NOPX
+ 8200 0x00 0x00 NOPX
+ 8202 0x00 0x00 NOPX
+ 8204 0x00 0x00 NOPX
+ 8206 0x00 0x00 NOPX
+ 8208 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 8212 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 8218 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8220 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8222 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8224 0x00 0x00 NOPX
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 8240
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 8240 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992
+ 8246 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID
+ 8252 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 8258 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15
+ 8266 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008
+ 8276 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+ 8280 0x00 0x00 NOPX
+ 8282 0x00 0x00 NOPX
+ 8284 0x80 0x10 0x80 0x40 0x01 0x84 JNZ r16, #8448
+.delay_slot
+ 8290 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 8294 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 8298 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 8302 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0
+.delay_slot
+ 8310 0x00 0x07 0xc0 0xc9 0x80 0x44 MOVXM p0, #509120
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8316 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8322 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8332 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 8334 0x00 0x0f 0x18 0x00 0x01 0x04 JL #7728
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8340 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8342 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8344 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 8348 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 8352 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV
+.return_address
+ 8368 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+ 8374 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r16, [p2]; MOVXM p2, #509120
+ 8384 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r17, [p2]; MOVXM p2, #509120
+ 8394 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012
+ 8404 0x00 0x00 NOPX
+ 8406 0x00 0x00 NOPX
+ 8408 0x00 0x10 0x88 0x00 0x00 0x84 J #8464
+.delay_slot
+ 8414 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024
+.delay_slot
+.swstall delay_slot
+ 8420 0x00 0x00 NOPX
+.delay_slot
+ 8422 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 8426 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0]
+.delay_slot
+ 8432 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 8448 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+ 8464 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12
+ 8472 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992
+ 8482 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4
+ 8486 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4
+ 8490 0x07 0x46 0x56 0x98 LDA r18, [p7, #16]
+ 8494 0x00 0x00 NOPX
+ 8496 0x00 0x00 NOPX
+ 8498 0x00 0x00 NOPX
+ 8500 0x00 0x00 NOPX
+ 8502 0x00 0x00 NOPX
+ 8504 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 8508 0x0f 0x06 0x11 0x98 ST r16, [p7]
+ 8512 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 8516 0x00 0x00 NOPX
+ 8518 0x00 0x00 NOPX
+ 8520 0x00 0x00 NOPX
+ 8522 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 8526 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3
+ 8532 0x00 0x00 NOPX
+ 8534 0x00 0x00 NOPX
+ 8536 0x00 0x06 0x36 0x98 LDA r17, [p0]
+ 8540 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7
+ 8546 0x01 0x06 0x76 0x98 LDA r19, [p1]
+ 8550 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20
+ 8554 0x00 0x00 NOPX
+.no_stack_arguments
+ 8556 0x00 0x0f 0xd8 0x00 0x01 0x04 JL #8112
+.delay_slot
+.swstall delay_slot
+ 8562 0x00 0x00 NOPX
+.delay_slot
+ 8564 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 8568 0x08 0x06 0x31 0x98 ST r17, [p0]
+.delay_slot
+ 8572 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+.delay_slot
+ 8576 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV
+.return_address
+ 8592 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992
+ 8602 0x10 0x20 0x05 0x18 MOVX r16, #1
+ 8606 0x00 0x00 NOPX
+ 8608 0x00 0x00 NOPX
+ 8610 0x00 0x00 NOPX
+ 8612 0x00 0x00 NOPX
+ 8614 0x00 0x00 NOPX
+ 8616 0x14 0x51 0x08 0x18 REL r17, r16
+ 8620 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024
+ 8630 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 8634 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 8638 0x00 0x00 NOPX
+ 8640 0x00 0x00 NOPX
+ 8642 0x00 0x00 NOPX
+ 8644 0x00 0x00 NOPX
+ 8646 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 8650 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 8654 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 8658 0x80 0x10 0xf8 0x40 0x01 0x84 JNZ r16, #8688
+.delay_slot
+.swstall delay_slot
+ 8664 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8666 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8668 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8670 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8672 0x00 0x00 NOPX
+ 8674 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 8678 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 8688 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 8692 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8]
+ 8696 0x00 0x00 NOPX
+ 8698 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 8700 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 8702 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8706 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8708 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8712 0x1f 0x67 0xa0 0xf8 MOV p7, r15
+.delay_slot
+ 8716 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 8722 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8724 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8726 0x00 0x00 NOPX
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 8736
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.function_start
+ 8736 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 8740 0x00 0x00 NOPX
+ 8742 0x00 0x00 NOPX
+ 8744 0x00 0x00 NOPX
+ 8746 0x00 0x00 NOPX
+ 8748 0x00 0x00 NOPX
+ 8750 0x00 0x00 NOPX
+ 8752 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 8756 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 8760 0x00 0x00 NOPX
+ 8762 0x00 0x00 NOPX
+ 8764 0x00 0x00 NOPX
+ 8766 0x00 0x00 NOPX
+ 8768 0x00 0x00 NOPX
+ 8770 0x00 0x00 NOPX
+ 8772 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 8776 0x01 0x6c 0x2e 0x98 LDA el0, [p1], #24
+ 8780 0x01 0x04 0x12 0x98 LDA.s16 r0, [p1]
+ 8784 0x00 0x00 NOPX
+ 8786 0x00 0x00 NOPX
+ 8788 0x00 0x00 NOPX
+ 8790 0x00 0x00 NOPX
+ 8792 0x00 0x00 NOPX
+ 8794 0x08 0x6c 0x29 0x98 ST el0, [p0], #24
+ 8798 0x00 0x04 0x17 0x18 ST.s16 r0, [p0]
+ 8802 0x00 0x00 NOPX
+ 8804 0x00 0x00 NOPX
+ 8806 0x00 0x00 NOPX
+ 8808 0x00 0x00 NOPX
+ 8810 0x00 0x00 NOPX
+ 8812 0x00 0x00 NOPX
+ 8814 0x01 0x24 0x12 0x98 LDA.s16 r0, [p1, #4]
+ 8818 0x00 0x14 0x17 0x18 ST.s16 r0, [p0, #2]
+ 8822 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+ 8826 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8828 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8830 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8832 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8834 0x00 0x00 NOPX
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 8848
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.function_start
+ 8848 0xfb 0xc2 0x80 0x3a 0x68 0x00 0x00 0x08 0x79 0x88 0x10 0xb6 MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976
+ 8860 0xff 0x51 0x00 0x39 0x68 0x00 0x00 0x09 0xb9 0xa0 0x10 0xb6 MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024
+ 8872 0x18 0x14 0xc0 0xf8 MOV r0, p2
+ 8876 0x1a 0x60 0x10 0x18 ADD.NC p2, r0, #32
+ 8880 0x02 0x1c 0x52 0x98 LDA.s16 r2, [p2], #2
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8884 0x02 0x00 0x16 0x98 LDA r0, [p2, dj0]
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8888 0x40 0x86 0x50 0x3a 0x68 0x3c LDA.s16 r1, [p2]; VLDB x4, [p0], #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8894 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8896 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8898 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8900 0x38 0x1c 0xb4 0x18 VLDB x2, [p0], #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8904 0x18 0x09 0x72 0xf8 VBCST.16 x0, r2
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8908 0x00 0x3a 0x68 0x01 0x18 0xed 0x50 0x36 0x78 0x3a VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8918 0x1d 0x78 0xfe 0x98 ADD.NC lc, r17, #-3
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8922 0x18 0x85 0x72 0xf8 VBCST.16 x1, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8926 0x19 0xa8 0xac 0xf8 VMIN_GE.bf16 x3, r16, x5, x1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8930 0x00 0x2c 0xf0 0x39 0x68 0x00 0x00 0x31 0x06 0xcf 0x00 0x2b 0x60 0x7e NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8944 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV
+.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8976 0x00 0x2c 0xf0 0x3a 0x69 0x1d 0xd3 0x00 0x00 0x00 0xd4 0x56 0x78 0x00 0x00 0xe1 NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8992 0x00 0x2c 0xf0 0x39 0x68 0x01 0x5b 0x00 0x00 0x01 0x88 0x36 0x78 0x00 0x00 0xe1 NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9008 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV
+.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9040 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9048 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0
+ 9052 0x23 0x9a 0x60 0x01 0xd8 0x56 0x70 0x02 VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1
+ 9060 0x05 0x00 0x05 0x40 0xd9 0xe4 RET lr; VMAX_LT.bf16 x5, r16, x4, x0
+.delay_slot
+ 9066 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1
+.delay_slot
+ 9074 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0
+.delay_slot
+ 9078 0x1b 0xb0 0xac 0xf8 VMIN_GE.bf16 x7, r16, x6, x1
+.delay_slot
+ 9082 0x09 0x1c 0xd3 0x18 VST x3, [p1], #64
+.delay_slot
+ 9086 0x09 0x1d 0xd3 0x18 VST x7, [p1], #64
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0
+
+.text_segment PM 9104
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 9104 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992
+ 9110 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID
+ 9116 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 9122 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15
+ 9130 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008
+ 9140 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+ 9144 0x00 0x00 NOPX
+ 9146 0x00 0x00 NOPX
+ 9148 0x80 0x12 0x30 0x40 0x01 0x84 JNZ r16, #9312
+.delay_slot
+ 9154 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 9158 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 9162 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 9166 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0
+.delay_slot
+ 9174 0x00 0x07 0xc0 0xcb 0x80 0x44 MOVXM p0, #509376
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9180 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9186 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9196 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9198 0x00 0x11 0x10 0x00 0x01 0x04 JL #8736
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9204 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9206 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9208 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 9212 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 9216 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV
+.return_address
+ 9232 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+ 9238 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0xe0 0x10 0xba LDA r16, [p2]; MOVXM p2, #509376
+ 9248 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0xe0 0x10 0xba LDA r17, [p2]; MOVXM p2, #509376
+ 9258 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #509012
+ 9268 0x00 0x00 NOPX
+ 9270 0x00 0x00 NOPX
+ 9272 0x00 0x12 0x38 0x00 0x00 0x84 J #9328
+.delay_slot
+ 9278 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024
+.delay_slot
+.swstall delay_slot
+ 9284 0x00 0x00 NOPX
+.delay_slot
+ 9286 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 9290 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0]
+.delay_slot
+ 9296 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 9312 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+ 9328 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12
+ 9336 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992
+ 9346 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4
+ 9350 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4
+ 9354 0x07 0x46 0x56 0x98 LDA r18, [p7, #16]
+ 9358 0x00 0x00 NOPX
+ 9360 0x00 0x00 NOPX
+ 9362 0x00 0x00 NOPX
+ 9364 0x00 0x00 NOPX
+ 9366 0x00 0x00 NOPX
+ 9368 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 9372 0x0f 0x06 0x11 0x98 ST r16, [p7]
+ 9376 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 9380 0x00 0x00 NOPX
+ 9382 0x00 0x00 NOPX
+ 9384 0x00 0x00 NOPX
+ 9386 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 9390 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3
+ 9396 0x00 0x00 NOPX
+ 9398 0x00 0x00 NOPX
+ 9400 0x00 0x06 0x36 0x98 LDA r17, [p0]
+ 9404 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7
+ 9410 0x01 0x06 0x76 0x98 LDA r19, [p1]
+ 9414 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20
+ 9418 0x00 0x00 NOPX
+.no_stack_arguments
+ 9420 0x00 0x11 0x48 0x00 0x01 0x04 JL #8848
+.delay_slot
+.swstall delay_slot
+ 9426 0x00 0x00 NOPX
+.delay_slot
+ 9428 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 9432 0x08 0x06 0x31 0x98 ST r17, [p0]
+.delay_slot
+ 9436 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+.delay_slot
+ 9440 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV
+.return_address
+ 9456 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992
+ 9466 0x10 0x20 0x05 0x18 MOVX r16, #1
+ 9470 0x00 0x00 NOPX
+ 9472 0x00 0x00 NOPX
+ 9474 0x00 0x00 NOPX
+ 9476 0x00 0x00 NOPX
+ 9478 0x00 0x00 NOPX
+ 9480 0x14 0x51 0x08 0x18 REL r17, r16
+ 9484 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024
+ 9494 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 9498 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 9502 0x00 0x00 NOPX
+ 9504 0x00 0x00 NOPX
+ 9506 0x00 0x00 NOPX
+ 9508 0x00 0x00 NOPX
+ 9510 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 9514 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 9518 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 9522 0x80 0x12 0xa8 0x40 0x01 0x84 JNZ r16, #9552
+.delay_slot
+.swstall delay_slot
+ 9528 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9530 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9532 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9534 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9536 0x00 0x00 NOPX
+ 9538 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 9542 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 9552 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 9556 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8]
+ 9560 0x00 0x00 NOPX
+ 9562 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 9564 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 9566 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9570 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9572 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9576 0x1f 0x67 0xa0 0xf8 MOV p7, r15
+.delay_slot
+ 9580 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 9586 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9588 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9590 0x00 0x00 NOPX
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 9600
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.function_start
+ 9600 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0
+ 9610 0x17 0x80 0x01 0x18 MOVX r0, #-128
+ 9614 0x00 0x00 NOPX
+ 9616 0x00 0x00 NOPX
+ 9618 0x00 0x00 NOPX
+ 9620 0x00 0x00 NOPX
+ 9622 0x00 0x00 NOPX
+ 9624 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 9628 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 9632 0x00 0x00 NOPX
+ 9634 0x00 0x00 NOPX
+ 9636 0x00 0x00 NOPX
+ 9638 0x00 0x00 NOPX
+ 9640 0x00 0x00 NOPX
+ 9642 0x00 0x00 NOPX
+ 9644 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 9648 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 9652 0x00 0x00 NOPX
+ 9654 0x00 0x00 NOPX
+ 9656 0x00 0x00 NOPX
+ 9658 0x00 0x00 NOPX
+ 9660 0x00 0x00 NOPX
+ 9662 0x00 0x00 NOPX
+ 9664 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 9668 0x01 0x14 0x76 0x98 LDA r3, [p1, #4]
+ 9672 0x00 0x00 NOPX
+ 9674 0x00 0x00 NOPX
+ 9676 0x00 0x00 NOPX
+ 9678 0x00 0x00 NOPX
+ 9680 0x00 0x00 NOPX
+ 9682 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9684 0x08 0x4c 0x71 0x98 ST r3, [p0], #16
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9688 0x00 0x04 0x17 0x18 ST.s16 r0, [p0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9692 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9696 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9702 0x10 0xc2 0x14 0x98 AND r1, r3, r1
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9706 0x10 0x76 0x27 0x98 EQ r27, r1, r2
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9710 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27
+.delay_slot
+.swstall delay_slot
+ 9714 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 9728
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.function_start
+ 9728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 9734 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8]
+.no_stack_arguments
+ 9738 0x00 0x12 0xc0 0x00 0x01 0x04 JL #9600
+.delay_slot
+ 9744 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4]
+.delay_slot
+ 9748 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 9752 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9754 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9756 0x00 0x01 0x67 0x98 NOPA
+.return_address
+ 9760 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8]
+ 9764 0x00 0x00 NOPX
+ 9766 0x00 0x00 NOPX
+ 9768 0x00 0x00 NOPX
+ 9770 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9772 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9774 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9778 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9782 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9784 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9786 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9788 0x10 0x20 0x09 0x18 MOVX r16, #2
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9792 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 9808
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0
+.function_start
+ 9808 0x18 0x16 0xc0 0xf8 MOV r0, p3
+ 9812 0x1b 0x60 0x07 0x18 ADD.NC p3, r0, #14
+ 9816 0x03 0x1c 0x52 0x98 LDA.s16 r2, [p3], #2
+ 9820 0x03 0x04 0x96 0x98 LDA r4, [p3]
+ 9824 0x00 0x00 NOPX
+ 9826 0x00 0x00 NOPX
+ 9828 0x00 0x00 NOPX
+ 9830 0x00 0x00 NOPX
+ 9832 0x10 0x06 0x09 0x18 MOVX r3, #2
+ 9836 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+ 9842 0x10 0xc6 0x4c 0x98 LTU r3, r3, r4
+ 9846 0x00 0x01 0x00 0x06 0x04 0xe2 0x10 0x00 0x60 0xba MOVA r1, #0; JNZ r3, #10000
+.delay_slot
+ 9856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1
+.delay_slot
+ 9860 0x18 0x5e 0xc0 0xf8 MOV r1, p7
+.delay_slot
+ 9864 0x1f 0x65 0xe0 0xf8 MOV p7, sp
+.delay_slot
+ 9868 0xff 0xf2 0x0a 0xdd 0x81 0xf4 PADDB [p7], #-64; MOV p5, p7
+.delay_slot
+ 9874 0x0f 0x04 0x13 0x18 VST x0, [p7]
+ 9878 0x01 0x82 0x84 0x80 0x0b 0x00 0x04 0xb9 0x72 0xba MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2
+ 9888 0x80 0x01 0x54 0x01 0x01 0x54 LDA.u8 r0, [p4, dj0]; MOV m2, #64
+ 9894 0x00 0x00 NOPX
+ 9896 0x00 0x00 NOPX
+ 9898 0x00 0x00 NOPX
+ 9900 0x00 0x00 NOPX
+ 9902 0x00 0x00 NOPX
+ 9904 0x00 0x00 NOPX
+ 9906 0x00 0x13 0x70 0x40 0x01 0x84 JNZ r0, #9952
+.delay_slot
+ 9912 0x18 0x00 0x00 0xb8 MOV m0, #0
+.delay_slot
+ 9916 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032
+.delay_slot
+.swstall delay_slot
+ 9922 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9924 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9926 0x00 0x00 NOPX
+ 9928 0x00 0x04 0x80 0x00 0x04 0xde 0x00 0x00 0x20 0xba MOVA m1, #0; J #9968
+.delay_slot
+.swstall delay_slot
+ 9938 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9940 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9942 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9944 0x00 0x00 NOPX
+.delay_slot
+ 9946 0x00 0x2c 0xf0 0x08 0x26 0x0c NOPA; VST x0, [p0]
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144
+ 9952 0x19 0x00 0x80 0xb8 MOV m1, #64
+ 9956 0x00 0x2c 0xf0 0x00 0x21 0x04 0x13 0x01 0x00 0x00 0x50 0xf6 NOPA; NOPB; VST x0, [p1]; MOV m2, #0
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160
+ 9968 0x00 0x13 0xc8 0x00 0x00 0x84 J #10128
+.delay_slot
+ 9974 0x13 0x91 0x60 0x03 0xb0 0x60 0x70 0x02 MOVS p0, p7; MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 9982 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9984 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9986 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9988 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192
+ 10000 0x10 0x04 0x0d 0x18 MOVX r2, #3
+ 10004 0x10 0x84 0x47 0x98 EQ r2, r2, r4
+ 10008 0x10 0x13 0xa0 0x40 0x01 0x84 JNZ r2, #10048
+.delay_slot
+ 10014 0x3f 0x80 0x00 0x20 0x00 0x44 MOVXM r0, #1065353216
+.delay_slot
+ 10020 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032
+.delay_slot
+.swstall delay_slot
+ 10026 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10028 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10030 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10032 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xe0 0x00 0x08 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10048 0x80 0x80 0x50 0x02 0xd2 0x00 0x47 0xbe 0x58 0xba LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10058 0x18 0x00 0x80 0xb8 MOV m0, #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10062 0x19 0x00 0x00 0xb8 MOV m1, #0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10066 0x1a 0x00 0x80 0xb8 MOV m2, #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10070 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10072 0x18 0x00 0x11 0x78 VINSERT.32 x0, x0, #0, r0
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10076 0xa0 0x02 0xe2 0x01 0x25 0xd4 ST.s16 r0, [p5, dj0]; VMOV bmll1, x0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10082 0x10 0x3a 0x80 0x18 MOVX crRnd, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10086 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10090 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10092 0x18 0x01 0x01 0xb8 VEXTRACT.16 r0, x0, #0, vaddSign0
+ 10096 0x00 0x00 NOPX
+ 10098 0x00 0x00 NOPX
+ 10100 0x05 0x00 0x12 0x98 LDA.s16 r0, [p5, dj0]
+ 10104 0x00 0x00 NOPX
+ 10106 0x00 0x00 NOPX
+ 10108 0x00 0x00 NOPX
+ 10110 0x00 0x00 NOPX
+ 10112 0x00 0x00 NOPX
+ 10114 0x00 0x00 NOPX
+ 10116 0x18 0x01 0x72 0xf8 VBCST.16 x0, r0
+ 10120 0x00 0x00 NOPX
+ 10122 0x00 0x2c 0xff 0xf8 0x66 0x0c NOPA; VST x0, [sp, #-64]
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320
+ 10128 0x78 0x8a 0xde 0x50 0xe8 0x00 0x00 0x08 0x7c 0x00 0x10 0xb6 LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10140 0xff 0x63 0x02 0x90 0x68 0x00 0x00 0x09 0xbc 0x18 0x10 0xb6 MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10152 0x01 0x05 0x7e 0x50 0xe8 0x00 0xf1 0x12 VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10160 0x80 0x90 0x52 0x90 0x68 0x3c LDA.s8 r4, [p4]; VLDB x0, [p1], m2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10166 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10168 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10172 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10174 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10178 0x05 0x71 0x7e 0x86 0x01 0x02 0x01 0x62 ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10186 0x29 0x03 0x7e 0x50 0xe8 0x3c VLDA x0, [p1], m2; VLDB x1, [p7], m1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10192 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x09 0xd4 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0
+.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10240 0x29 0x03 0x7e 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10256 0x01 0x05 0x70 0x00 0x22 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10272 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10288 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0
+.loop_nesting 0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10304 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10310 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 10314 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 10316 0x01 0x02 0x01 0x48 VMAC.f dm1, dm0, x1, x0, r0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10320 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10322 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10326 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+ 10330 0x1f 0x60 0xa0 0xf8 MOV p7, r1
+.delay_slot
+.swstall delay_slot
+ 10334 0x00 0x00 NOPX
+.delay_slot
+ 10336 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 10340 0x00 0x00 NOPX
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0
+
+.text_segment PM 10352
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.function_start
+ 10352 0x01 0x82 0x83 0x88 0x8b 0x00 0x60 0xf0 0x72 0xba MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr
+ 10362 0x40 0x01 0x54 0xc5 0x81 0xd4 LDA.u8 r0, [p2, dj0]; MOV p2, p1
+ 10368 0x00 0x00 NOPX
+ 10370 0x00 0x00 NOPX
+ 10372 0x00 0x00 NOPX
+ 10374 0x00 0x00 NOPX
+ 10376 0x00 0x00 NOPX
+ 10378 0x00 0x00 NOPX
+ 10380 0x00 0x14 0x68 0x00 0x01 0x84 JZ r0, #10448
+.delay_slot
+ 10386 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+.delay_slot
+ 10392 0x18 0x55 0xe0 0xf8 MOV r1, sp
+.delay_slot
+ 10396 0x19 0x60 0xe0 0x18 ADD.NC p1, r1, #-64
+.delay_slot
+ 10400 0x09 0x07 0x2b 0x18 VST sfh, [p1]
+.delay_slot
+.swstall delay_slot
+ 10404 0x00 0x00 NOPX
+.no_stack_arguments
+ 10406 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808
+.delay_slot
+.swstall delay_slot
+ 10412 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10414 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10416 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10418 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10420 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.return_address
+ 10432 0x00 0x14 0x78 0x00 0x00 0x84 J #10480
+.delay_slot
+.swstall delay_slot
+ 10438 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10440 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10442 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10444 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10446 0x00 0x00 NOPX
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96
+.no_stack_arguments
+ 10448 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808
+.delay_slot
+ 10454 0x10 0x91 0x60 0x00 0xb0 0x60 0x70 0x02 MOVS p0, p1; MOV p1, p0
+.delay_slot
+.swstall delay_slot
+ 10462 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10464 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10468 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128
+.return_address
+ 10480 0x1f 0x71 0x80 0xf8 MOV lr, dc0
+ 10484 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10488 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 10494 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10496 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10498 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10500 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0
+
+.text_segment PM 10512
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 10512 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992
+ 10518 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID
+ 10524 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 10530 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15
+ 10538 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008
+ 10548 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+ 10552 0x00 0x00 NOPX
+ 10554 0x00 0x00 NOPX
+ 10556 0x80 0x14 0xf0 0x40 0x01 0x84 JNZ r16, #10720
+.delay_slot
+ 10562 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 10566 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 10570 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 10574 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0
+.delay_slot
+ 10582 0x00 0x07 0xc0 0xca 0x00 0x44 MOVXM p0, #509184
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10588 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10594 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10604 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 10606 0x00 0x13 0x00 0x00 0x01 0x04 JL #9728
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10612 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10614 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10616 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 10620 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 10624 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV
+.return_address
+ 10640 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+ 10646 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #509184
+ 10656 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #509184
+ 10666 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012
+ 10676 0x00 0x00 NOPX
+ 10678 0x00 0x00 NOPX
+ 10680 0x00 0x14 0xf8 0x00 0x00 0x84 J #10736
+.delay_slot
+ 10686 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024
+.delay_slot
+.swstall delay_slot
+ 10692 0x00 0x00 NOPX
+.delay_slot
+ 10694 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 10698 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0]
+.delay_slot
+ 10704 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 10720 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+ 10736 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12
+ 10744 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992
+ 10754 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4
+ 10758 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4
+ 10762 0x07 0x46 0x56 0x98 LDA r18, [p7, #16]
+ 10766 0x00 0x00 NOPX
+ 10768 0x00 0x00 NOPX
+ 10770 0x00 0x00 NOPX
+ 10772 0x00 0x00 NOPX
+ 10774 0x00 0x00 NOPX
+ 10776 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 10780 0x0f 0x06 0x11 0x98 ST r16, [p7]
+ 10784 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 10788 0x00 0x00 NOPX
+ 10790 0x00 0x00 NOPX
+ 10792 0x00 0x00 NOPX
+ 10794 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 10798 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3
+ 10804 0x00 0x00 NOPX
+ 10806 0x00 0x00 NOPX
+ 10808 0x00 0x06 0x36 0x98 LDA r17, [p0]
+ 10812 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7
+ 10818 0x01 0x06 0x76 0x98 LDA r19, [p1]
+ 10822 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20
+ 10826 0x00 0x00 NOPX
+.no_stack_arguments
+ 10828 0x00 0x14 0x38 0x00 0x01 0x04 JL #10352
+.delay_slot
+.swstall delay_slot
+ 10834 0x00 0x00 NOPX
+.delay_slot
+ 10836 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 10840 0x08 0x06 0x31 0x98 ST r17, [p0]
+.delay_slot
+ 10844 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+.delay_slot
+ 10848 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV
+.return_address
+ 10864 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992
+ 10874 0x10 0x20 0x05 0x18 MOVX r16, #1
+ 10878 0x00 0x00 NOPX
+ 10880 0x00 0x00 NOPX
+ 10882 0x00 0x00 NOPX
+ 10884 0x00 0x00 NOPX
+ 10886 0x00 0x00 NOPX
+ 10888 0x14 0x51 0x08 0x18 REL r17, r16
+ 10892 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024
+ 10902 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 10906 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 10910 0x00 0x00 NOPX
+ 10912 0x00 0x00 NOPX
+ 10914 0x00 0x00 NOPX
+ 10916 0x00 0x00 NOPX
+ 10918 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 10922 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 10926 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 10930 0x80 0x15 0x68 0x40 0x01 0x84 JNZ r16, #10960
+.delay_slot
+.swstall delay_slot
+ 10936 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10938 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10940 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10942 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10944 0x00 0x00 NOPX
+ 10946 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 10950 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 10960 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 10964 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8]
+ 10968 0x00 0x00 NOPX
+ 10970 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10972 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10974 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10978 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10980 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10984 0x1f 0x67 0xa0 0xf8 MOV p7, r15
+.delay_slot
+ 10988 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 10994 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10996 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10998 0x00 0x00 NOPX
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 11008
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+.function_start
+ 11008 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0
+ 11018 0x17 0x80 0x01 0x18 MOVX r0, #-128
+ 11022 0x00 0x00 NOPX
+ 11024 0x00 0x00 NOPX
+ 11026 0x00 0x00 NOPX
+ 11028 0x00 0x00 NOPX
+ 11030 0x00 0x00 NOPX
+ 11032 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11036 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 11040 0x00 0x00 NOPX
+ 11042 0x00 0x00 NOPX
+ 11044 0x00 0x00 NOPX
+ 11046 0x00 0x00 NOPX
+ 11048 0x00 0x00 NOPX
+ 11050 0x00 0x00 NOPX
+ 11052 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11056 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 11060 0x00 0x00 NOPX
+ 11062 0x00 0x00 NOPX
+ 11064 0x00 0x00 NOPX
+ 11066 0x00 0x00 NOPX
+ 11068 0x00 0x00 NOPX
+ 11070 0x00 0x00 NOPX
+ 11072 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11076 0x01 0x14 0x76 0x98 LDA r3, [p1, #4]
+ 11080 0x00 0x00 NOPX
+ 11082 0x00 0x00 NOPX
+ 11084 0x00 0x00 NOPX
+ 11086 0x00 0x00 NOPX
+ 11088 0x00 0x00 NOPX
+ 11090 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11092 0x08 0x4c 0x71 0x98 ST r3, [p0], #16
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11096 0x00 0x04 0x17 0x18 ST.s16 r0, [p0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11100 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11104 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11110 0x10 0xc2 0x14 0x98 AND r1, r3, r1
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11114 0x10 0x76 0x27 0x98 EQ r27, r1, r2
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11118 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27
+.delay_slot
+.swstall delay_slot
+ 11122 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0
+
+.text_segment PM 11136
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function_start
+ 11136 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 11142 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8]
+.no_stack_arguments
+ 11146 0x00 0x15 0x80 0x00 0x01 0x04 JL #11008
+.delay_slot
+ 11152 0x18 0x17 0xa0 0xf8 MOV r0, r15
+.delay_slot
+ 11156 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4]
+.delay_slot
+ 11160 0x1b 0xd0 0xc0 0xf8 MOV r15, p0
+.delay_slot
+.swstall delay_slot
+ 11164 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11166 0x00 0x00 NOPX
+.return_address
+ 11168 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16
+ 11178 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3
+ 11188 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128
+ 11198 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0]
+ 11202 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11204 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11206 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11210 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11214 0x10 0x22 0x05 0x18 MOVX r17, #1
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11218 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11224 0x14 0x77 0x27 0x98 EQ r27, r17, r18
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11228 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27
+.delay_slot
+.swstall delay_slot
+ 11232 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+
+.text_segment PM 11248
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.tail_call
+.function_start
+ 11248 0x00 0x13 0x28 0x00 0x00 0x84 J #9808
+.delay_slot
+.swstall delay_slot
+ 11254 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11256 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11258 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11260 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11262 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.function_start
+ 11264 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64
+.delay_slot
+ 11270 0x18 0x50 0xc0 0xf8 MOV r1, p0
+.delay_slot
+ 11274 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32
+.delay_slot
+ 11278 0x08 0x04 0x11 0x98 ST r0, [p0]
+.delay_slot
+ 11282 0x08 0x14 0x11 0x98 ST r0, [p0, #4]
+.delay_slot
+.swstall delay_slot
+ 11286 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0
+
+.text_segment PM 11296
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.function_start
+ 11296 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 11300 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 11306 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4]
+ 11310 0x00 0x00 NOPX
+ 11312 0x00 0x00 NOPX
+ 11314 0x00 0x00 NOPX
+ 11316 0x00 0x00 NOPX
+ 11318 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11322 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 11326 0x00 0x00 NOPX
+ 11328 0x00 0x00 NOPX
+ 11330 0x00 0x00 NOPX
+ 11332 0x00 0x00 NOPX
+ 11334 0x00 0x00 NOPX
+ 11336 0x00 0x00 NOPX
+ 11338 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11342 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 11346 0x00 0x00 NOPX
+ 11348 0x00 0x00 NOPX
+ 11350 0x00 0x00 NOPX
+ 11352 0x00 0x00 NOPX
+ 11354 0x00 0x00 NOPX
+ 11356 0x00 0x00 NOPX
+ 11358 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11362 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4]
+ 11366 0x00 0x00 NOPX
+ 11368 0x00 0x00 NOPX
+.no_stack_arguments
+ 11370 0x00 0x16 0x00 0x00 0x01 0x04 JL #11264
+.delay_slot
+ 11376 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8]
+.delay_slot
+.swstall delay_slot
+ 11380 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11382 0x00 0x00 NOPX
+.delay_slot
+ 11384 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12
+.delay_slot
+ 11388 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.return_address
+ 11392 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+ 11396 0x00 0x00 NOPX
+ 11398 0x00 0x00 NOPX
+ 11400 0x00 0x00 NOPX
+ 11402 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11404 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11406 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11410 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11414 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11416 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11418 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11420 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11424 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 11440
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.function_start
+ 11440 0x04 0x00 0x80 0x00 0x00 0x08 0x7e 0xb0 0x10 0xba MOVA m0, #32; MOVXM ls, #11616
+ 11450 0x61 0x0e 0xd0 0x00 0x00 0x09 0xbe 0xb8 0x10 0xba LDA r3, [p3], m0; MOVXM le, #11632
+ 11460 0x60 0x90 0xd0 0x3e 0x17 0x48 0x0b 0x3c 0x58 0xba LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828
+ 11470 0x62 0x80 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m0, [p3, #4]; MOVXM p4, #509032
+ 11480 0x04 0x04 0x42 0x98 LDA.s8 r2, [p4]
+ 11484 0x00 0x00 NOPX
+ 11486 0x00 0x00 NOPX
+ 11488 0x00 0x00 NOPX
+ 11490 0x10 0xc2 0x1d 0x98 LSHL r1, r3, r1
+ 11494 0x05 0x0e 0x8a 0xe1 0xf9 0x34 VLDB x1, [p0], m1; ADD.NC lc, r1, #-7
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11500 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11506 0x21 0x1b 0x70 0x50 0xe8 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11514 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11520 0x21 0x1b 0x70 0x50 0xe8 0x3c VLDA x3, [p1], m0; VLDB x1, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11526 0x01 0x08 0x9b 0x98 VLDA x2, [p1], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11530 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11536 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11542 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11552 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11562 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11572 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11582 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11592 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11602 0x21 0x1b 0x70 0x50 0x68 0x00 0x00 0x08 0x70 0x8c 0x00 0xe2 0x41 0x6e VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0
+.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11616 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0
+.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11632 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11648 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11656 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11664 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11672 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11680 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11688 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11696 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11704 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11708 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11714 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11718 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+ 11722 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64
+.delay_slot
+ 11726 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 11730 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0
+
+.text_segment PM 11744
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.function_start
+ 11744 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992
+ 11750 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID
+ 11756 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 11762 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2
+ 11772 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15
+ 11780 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4]
+ 11784 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16]
+ 11788 0x00 0x00 NOPX
+ 11790 0x80 0x17 0x50 0x40 0x01 0x84 JNZ r16, #11936
+.delay_slot
+ 11796 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 11800 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+.delay_slot
+ 11806 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7
+.delay_slot
+ 11814 0x1b 0xd6 0xc0 0xf8 MOV r15, p3
+.delay_slot
+ 11818 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0xc0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #509312
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11828 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x34 0x11 0x3a MOVS p0, p7; MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11838 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11848 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11850 0x00 0x16 0x10 0x00 0x01 0x04 JL #11296
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11856 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11858 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11860 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 11864 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 11868 0x0a 0x06 0x11 0x98 ST r16, [p2]
+.return_address
+ 11872 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x28 0x10 0xba LDA r16, [p7]; MOVXM p1, #509008
+ 11882 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb2 0x2a 0x10 0xba LDA r17, [p1]; MOVXM p3, #509012
+ 11892 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #509020
+ 11902 0x00 0x00 NOPX
+ 11904 0x00 0x00 NOPX
+ 11906 0x00 0x00 NOPX
+ 11908 0x00 0x17 0x58 0x00 0x00 0x84 J #11952
+.delay_slot
+ 11914 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024
+.delay_slot
+ 11920 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 11924 0x0a 0x06 0x51 0x98 ST r18, [p2]
+.delay_slot
+ 11928 0x0b 0x06 0x11 0x98 ST r16, [p3]
+.delay_slot
+ 11932 0x09 0x06 0x11 0x98 ST r16, [p1]
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192
+ 11936 0x00 0x07 0xc6 0xc8 0xa8 0x44 MOVXM p3, #509012
+ 11942 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba NOPA; MOVXM p1, #509020
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208
+ 11952 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12
+ 11956 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508992
+ 11966 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4
+ 11970 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4
+ 11974 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 11978 0x00 0x46 0x76 0x98 LDA r19, [p0, #16]
+ 11982 0x00 0x00 NOPX
+ 11984 0x00 0x00 NOPX
+ 11986 0x00 0x00 NOPX
+ 11988 0x00 0x00 NOPX
+ 11990 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 11994 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1
+ 12000 0x0a 0x06 0x11 0x98 ST r16, [p2]
+ 12004 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 12008 0x00 0x00 NOPX
+ 12010 0x00 0x00 NOPX
+ 12012 0x00 0x00 NOPX
+ 12014 0x14 0xd3 0x08 0x18 ACQ r19, r16
+ 12018 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12
+ 12022 0x00 0x00 NOPX
+ 12024 0x00 0x00 NOPX
+ 12026 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4
+ 12030 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+ 12034 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4
+ 12038 0x02 0x56 0x76 0x98 LDA r19, [p2, #20]
+ 12042 0x00 0x00 NOPX
+ 12044 0x00 0x00 NOPX
+ 12046 0x00 0x00 NOPX
+ 12048 0x00 0x00 NOPX
+ 12050 0x00 0x00 NOPX
+ 12052 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27
+ 12056 0x0a 0x06 0x31 0x98 ST r17, [p2]
+ 12060 0x00 0x00 NOPX
+ 12062 0x00 0x00 NOPX
+ 12064 0x00 0x00 NOPX
+ 12066 0x00 0x00 NOPX
+ 12068 0x14 0xd3 0x08 0x18 ACQ r19, r16
+ 12072 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6
+ 12082 0x00 0x00 NOPX
+ 12084 0x00 0x00 NOPX
+ 12086 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20]
+ 12090 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20]
+ 12096 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 12102 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 12106 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12110 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12114 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12116 0x04 0x06 0x76 0x98 LDA r19, [p4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12120 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 12122 0x00 0x16 0x58 0x00 0x01 0x04 JL #11440
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12128 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+.delay_slot
+ 12132 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16
+.delay_slot
+ 12136 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16
+.delay_slot
+ 12140 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16
+.delay_slot
+ 12144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV
+.return_address
+ 12160 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15
+ 12170 0x00 0x07 0xcc 0xc8 0xc0 0x44 MOVXM p6, #509024
+ 12176 0x00 0x00 NOPX
+ 12178 0x00 0x00 NOPX
+ 12180 0x00 0x00 NOPX
+ 12182 0x00 0x00 NOPX
+ 12184 0x00 0x00 NOPX
+ 12186 0x14 0x51 0x08 0x18 REL r17, r16
+ 12190 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4]
+ 12194 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20]
+ 12198 0x00 0x00 NOPX
+ 12200 0x00 0x00 NOPX
+ 12202 0x00 0x00 NOPX
+ 12204 0x00 0x00 NOPX
+ 12206 0x00 0x00 NOPX
+ 12208 0x14 0x23 0x11 0x98 SUB r17, r16, r17
+ 12212 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4]
+ 12218 0x00 0x00 NOPX
+ 12220 0x00 0x00 NOPX
+ 12222 0x00 0x00 NOPX
+ 12224 0x00 0x00 NOPX
+ 12226 0x00 0x00 NOPX
+ 12228 0x00 0x00 NOPX
+ 12230 0x14 0x51 0x08 0x18 REL r17, r16
+ 12234 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb2 0x20 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508992
+ 12244 0x06 0x06 0x56 0x98 LDA r18, [p6]
+ 12248 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 12252 0x00 0x00 NOPX
+ 12254 0x00 0x00 NOPX
+ 12256 0x00 0x00 NOPX
+ 12258 0x00 0x00 NOPX
+ 12260 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 12264 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 12268 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 12272 0x80 0x18 0x08 0x40 0x01 0x84 JNZ r16, #12304
+.delay_slot
+.swstall delay_slot
+ 12278 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12280 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12282 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12284 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12286 0x00 0x00 NOPX
+ 12288 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 12292 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560
+ 12304 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16]
+ 12308 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+ 12312 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12]
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 12316 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 12318 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8]
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12322 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12324 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12326 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12330 0x0e 0x8e 0x0b 0x18 MOVS p6, r14
+.delay_slot
+ 12334 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 12340 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12342 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12344 0x00 0x00 NOPX
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0
+
+.text_segment PM 12352
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0
+.function_start
+ 12352 0x03 0x85 0xd0 0x00 0x01 0xf0 0xb3 0xe0 0x10 0xba LDA el0, [p0], #4; MOVXM p1, #509888
+ 12362 0x03 0x81 0xd0 0x01 0x00 0x4b 0x08 0x00 0x58 0xba LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0
+ 12372 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 12378 0xfe 0xf3 0xb0 0x00 0x01 0xf3 0xb3 0xe0 0x11 0x3a ST p7, [sp, #-12]; MOVXM p7, #509888
+ 12388 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4]
+ 12392 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8]
+ 12396 0x00 0x00 NOPX
+ 12398 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 12402 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 12406 0x00 0x04 0x2e 0x98 LDA el0, [p0]
+ 12410 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4]
+ 12414 0x00 0x00 NOPX
+ 12416 0x00 0x00 NOPX
+ 12418 0x00 0x00 NOPX
+ 12420 0x00 0x00 NOPX
+ 12422 0x00 0x00 NOPX
+ 12424 0x09 0x04 0x29 0x98 ST el0, [p1]
+ 12428 0x09 0x14 0x09 0x98 ST eh0, [p1, #4]
+ 12432 0x07 0x5e 0x2a 0x98 LDA.u8 r17, [p7], #5
+ 12436 0x07 0xee 0x4a 0x98 LDA.u8 r18, [p7], #-2
+ 12440 0x07 0xec 0x2a 0x98 LDA.u8 r1, [p7], #-2
+ 12444 0x00 0x00 NOPX
+ 12446 0x00 0x00 NOPX
+ 12448 0x00 0x00 NOPX
+ 12450 0x00 0x00 NOPX
+.no_stack_arguments
+ 12452 0x00 0x1e 0x98 0x00 0x01 0x04 JL #15664
+.delay_slot
+ 12458 0xfc 0xca 0xb8 0xbe 0x43 0x5c ST r18, [sp, #-28]; SUB r15, r17, r18
+.delay_slot
+ 12464 0xfd 0x86 0xb0 0xc2 0x11 0x5c ST r1, [sp, #-20]; NE r16, r1, r16
+.delay_slot
+ 12470 0xfe 0x42 0xb7 0xef 0x15 0x5c ST r16, [sp, #-16]; LT r27, r15, r24
+.delay_slot
+ 12476 0x16 0x22 0xf1 0x98 SUB r17, r24, r15
+.delay_slot
+ 12480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x1e 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV
+.return_address
+ 12496 0xfd 0xd2 0x20 0x40 0x02 0x2c LDA r20, [sp, #-20]; MOVX r16, #0
+ 12502 0xe7 0xc5 0x58 0x48 0x43 0x2c LDA.u8 r17, [p7], #3; SUB r18, r16, r2
+ 12508 0x07 0xee 0x6a 0x98 LDA.u8 r19, [p7], #-2
+ 12512 0x07 0xec 0x31 0x18 LDA r1, [sp, #-20]
+ 12516 0x00 0x00 NOPX
+ 12518 0x00 0x00 NOPX
+ 12520 0x00 0x00 NOPX
+ 12522 0x13 0xe9 0x46 0x98 XOR r20, r15, r20
+ 12526 0x15 0x37 0x0a 0x98 LT r27, r20, r16
+ 12530 0xfd 0x4e 0xb8 0xc6 0x63 0x5c ST r19, [sp, #-24]; SUB r17, r17, r19
+.no_stack_arguments
+ 12536 0xfc 0x46 0xb0 0x00 0x07 0xa6 0x00 0x00 0x41 0x3a ST r17, [sp, #-32]; JL #15664
+.delay_slot
+ 12546 0x10 0xa9 0x22 0x18 SEL.EQZ r20, r2, r18, r27
+.delay_slot
+ 12550 0x14 0x77 0x0a 0x98 LT r27, r17, r16
+.delay_slot
+ 12554 0x14 0x25 0x11 0x98 SUB r18, r16, r17
+.delay_slot
+ 12558 0x15 0x26 0x70 0x18 EXTEND.s16 r19, r20
+.delay_slot
+ 12562 0x00 0x2c 0xf0 0x00 0x24 0x41 0x22 0x3d 0x98 0x09 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1
+.return_address
+ 12576 0xfc 0x0e 0x20 0x3f 0x37 0xc8 0x00 0x42 0x58 0xba LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66
+ 12586 0xfd 0xc2 0x20 0x01 0x80 0x08 0x29 0xfc 0x58 0xba LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508
+ 12596 0xfc 0xda 0x20 0x00 0x60 0x88 0x88 0x02 0x58 0xba LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2
+ 12606 0xe1 0x45 0x50 0x00 0x51 0x0b 0x88 0x17 0x58 0xba LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23
+ 12616 0xfd 0x56 0x20 0x3f 0x27 0x48 0x80 0x20 0x58 0xba LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32
+ 12626 0xfe 0x7a 0x20 0x01 0x70 0xcb 0x48 0x01 0x58 0xba LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1
+ 12636 0xe9 0xc0 0x80 0x05 0xd0 0x0b 0xef 0xc0 0x58 0xba MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64
+ 12646 0x16 0x28 0x21 0x98 SUB r20, r24, r2
+ 12650 0x10 0xc7 0x06 0x98 XOR r3, r3, r16
+ 12654 0x1e 0xf1 0x50 0x36 0x02 0x24 LT r27, r3, r24; ADD.NC r0, r22, #2
+ 12660 0x15 0x28 0x4b 0x3f 0xf5 0x64 SEL.EQZ r20, r2, r20, r27; MOV r22, #-3
+ 12666 0x78 0xe1 0xf1 0x20 0x1d 0x64 MUL r3, r15, r16; MOV r2, #7
+ 12672 0x15 0x28 0x70 0x18 EXTEND.s16 r20, r20
+ 12676 0x08 0x00 0x90 0xa0 0x01 0x24 AND r0, r1, r0; ADD.NC r1, r0, #1
+ 12682 0x0c 0xe7 0xbd 0xb4 0x01 0x24 LSHL r19, r1, r19; ADD.NC r27, r20, #1
+ 12688 0x7d 0x0d 0xb0 0xa3 0x02 0xa4 LSHL r20, r15, r6; ADD.NC r1, r3, r0
+ 12694 0x09 0xcd 0xb0 0x35 0xff 0x24 LSHL r7, r1, r6; ADD.NC r0, r21, #-1
+ 12700 0x16 0xcd 0x0f 0x98 MUL r6, r27, r16
+ 12704 0x13 0xdf 0x1f 0x98 MUL r15, r15, r17
+ 12708 0x9d 0x6b 0xf9 0xb3 0xff 0x24 MUL r21, r19, r21; ADD.NC r19, r19, #-1
+ 12714 0x11 0x37 0x07 0x98 EQ r27, r4, r16
+ 12718 0xff 0xd6 0x37 0x90 0xdf 0x5c ST r21, [p7], #-4; MUL r4, r15, r6
+ 12724 0x17 0x38 0x52 0x18 SEL.EQZ r28, r28, r5, r27
+ 12728 0x11 0x25 0x2d 0x98 LSHL r18, r4, r18
+ 12732 0xe5 0x4a 0x38 0xc8 0x3f 0x5c ST r18, [p7], m1; MUL r18, r17, r1
+ 12738 0xf9 0xf2 0x3f 0x72 0xfb 0x5c ST r28, [p7], #-16; LSHL r28, r30, r23
+ 12744 0xed 0xf2 0x39 0x70 0x1f 0x5c ST r28, [p7], #24; MUL r28, r18, r0
+ 12750 0xe3 0xce 0x39 0xce 0xfb 0x5c ST r19, [p7], #4; LSHL r19, r19, r23
+ 12756 0xe7 0x35 0xb9 0xb3 0xea 0xa4 LSHL r28, r28, r26; ADD.NC r19, r19, r29
+ 12762 0xe3 0xfe 0x39 0x7b 0x5b 0x5c ST r31, [p7], #4; LSHL r30, r18, r26
+ 12768 0x94 0x21 0xf9 0x33 0xe2 0xa4 MUL r16, r18, r16; ADD.NC r18, r19, r28
+ 12774 0xe3 0x82 0x3f 0xf3 0x04 0x5c ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27
+ 12780 0x10 0xff 0x6d 0x98 LSHL r31, r3, r22
+ 12784 0xf0 0x66 0x39 0xbf 0xff 0x24 SUB r1, r30, r19; ADD.NC r19, r31, #-1
+ 12790 0xe3 0x86 0x38 0xc6 0xdb 0x5c ST r1, [p7], #4; LSHL r17, r17, r22
+ 12796 0xc5 0xa4 0x39 0x31 0xff 0x24 SUB r22, r24, r18; ADD.NC r18, r17, #-1
+ 12802 0xe3 0xda 0x33 0xdb 0xc3 0x5c ST r22, [p7], #4; SUB r22, r7, r30
+ 12808 0xe3 0xca 0x38 0x43 0x5b 0x5c ST r18, [p7], #4; LSHL r16, r16, r26
+ 12814 0xe3 0x9e 0x39 0xfc 0x5b 0x5c ST r7, [p7], #4; LSHL r31, r19, r2
+ 12820 0xe3 0xce 0x3e 0xda 0xc1 0x5c ST r19, [p7], #4; ADD r22, r29, r22
+ 12826 0x3c 0x20 0x1e 0xbf 0xf2 0xa4 ADD r16, r7, r16; ADD.NC r29, r31, r30
+ 12832 0xe3 0xda 0x38 0x43 0xa3 0x5c ST r22, [p7], #4; SUB r16, r16, r29
+ 12838 0xe3 0xc2 0x30 0x1f 0x6d 0x6e 0x0f 0xff 0x59 0x3a ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1
+ 12848 0xe3 0xca 0x3e 0x6a 0x81 0x5c ST r18, [p7], #4; ADD r26, r28, r20
+ 12854 0xe3 0xea 0x3a 0x52 0xc3 0x5c ST r26, [p7], #4; SUB r20, r20, r22
+ 12860 0x08 0x11 0x07 0x1e 0x71 0xab 0x08 0xb2 0x6d 0x10 0x08 0x76 MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64
+ 12872 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4
+ 12876 0xe3 0xc6 0x38 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r16, r23
+ 12882 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20
+ 12888 0xe3 0xc6 0x39 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r18, r23
+ 12894 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20
+ 12900 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4
+ 12904 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4
+ 12908 0x0f 0x0a 0x11 0x98 ST r16, [p7], m0
+ 12912 0x07 0x06 0x0a 0x98 LDA.u8 r16, [p7]
+ 12916 0x00 0x00 NOPX
+ 12918 0x00 0x00 NOPX
+ 12920 0x00 0x00 NOPX
+ 12922 0x00 0x00 NOPX
+ 12924 0x00 0x00 NOPX
+ 12926 0x00 0x00 NOPX
+ 12928 0x80 0x19 0x50 0x00 0x01 0x84 JZ r16, #12960
+.delay_slot
+ 12934 0x19 0x3b 0x60 0xf8 MOV vaddSign0, crMCDEn
+.delay_slot
+ 12938 0xff 0x7f 0x09 0xa0 0x00 0x44 MOVXM r19, #-8454144
+.delay_slot
+.swstall delay_slot
+ 12944 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12946 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12948 0x00 0x00 NOPX
+ 12950 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x00 0x26 0x01 0x7a NOPA; NOPS; MOVX r19, #0
+.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608
+ 12960 0xff 0x87 0x20 0x00 0x01 0xf0 0x32 0x34 0x10 0xba LDA lr, [sp, #-4]; MOVXM p0, #509032
+ 12970 0x00 0xc0 0x50 0x04 0xe2 0xd4 LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19
+ 12976 0xfe 0x83 0x21 0x02 0xe9 0x54 LDA p0, [sp, #-12]; MOV dj0, #186
+ 12982 0xff 0x3e 0x20 0x01 0x25 0xd4 LDA r15, [sp, #-8]; VMOV bmll0, x0
+ 12988 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+ 12994 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 12996 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 12998 0x07 0x02 0x17 0x18 ST.s16 r16, [p7, dj0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13002 0x05 0x00 0x0f 0x70 0x41 0xe4 RET lr; MOV crRnd, r16
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13008 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13012 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13016 0x1c 0x01 0x01 0xb8 VEXTRACT.16 r16, x0, #0, vaddSign0
+.delay_slot
+.swstall delay_slot
+ 13020 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 13022 0x00 0x00 NOPX
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.function_start
+ 13024 0x1c 0x56 0xc0 0xf8 MOV r17, p3
+ 13028 0x20 0x93 0xde 0x01 0xa9 0x54 LDA p1, [p1]; MOV m7, #106
+ 13034 0x00 0x83 0xd6 0xd1 0x02 0x14 LDA p0, [p0]; ADD.NC p3, r17, #2
+ 13040 0x03 0xe8 0x8a 0x98 LDA.u8 r4, [p3], m7
+ 13044 0x03 0xfd 0x46 0x98 LDA dj2, [p3], #-4
+ 13048 0x03 0x3d 0x26 0x98 LDA dn2, [p3], #12
+ 13052 0x03 0xff 0x46 0x98 LDA dj6, [p3], #-4
+ 13056 0x03 0x2f 0x26 0x98 LDA dn6, [p3], #8
+ 13060 0x03 0x2d 0x06 0x98 LDA m2, [p3], #8
+ 13064 0x03 0xfc 0x46 0x98 LDA dj0, [p3], #-4
+ 13068 0x03 0x3c 0x26 0x98 LDA dn0, [p3], #12
+ 13072 0x03 0xfe 0x46 0x98 LDA dj4, [p3], #-4
+ 13076 0x03 0x2e 0x26 0x98 LDA dn4, [p3], #8
+ 13080 0x03 0x2c 0x06 0x98 LDA m0, [p3], #8
+ 13084 0x03 0xfc 0xc6 0x98 LDA dj1, [p3], #-4
+ 13088 0x03 0x3c 0xa6 0x98 LDA dn1, [p3], #12
+ 13092 0x03 0xfe 0xc6 0x98 LDA dj5, [p3], #-4
+ 13096 0x03 0x2e 0xa6 0x98 LDA dn5, [p3], #8
+ 13100 0x03 0x2c 0x86 0x98 LDA m1, [p3], #8
+ 13104 0x03 0xff 0xc6 0x98 LDA dj7, [p3], #-4
+ 13108 0x03 0x2f 0xa6 0x98 LDA dn7, [p3], #8
+ 13112 0x65 0xf0 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m7, [p3], #8; MOVXM p4, #509032
+ 13122 0x80 0x98 0x58 0xc5 0x81 0xd4 LDA.s8 r6, [p4]; MOV p4, p1
+ 13128 0x1b 0x0f 0x10 0xb8 MOV m3, #-120
+ 13132 0x80 0x85 0x70 0x3b 0x68 0x00 0x20 0x6a 0x60 0x00 0x58 0xb6 VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0
+ 13144 0x7f 0xb8 0xd0 0x38 0xe9 0x04 0x2d 0xe0 0x10 0x0b 0x62 0x09 0x60 0x7e LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128
+ 13158 0x65 0xb4 0xd1 0x0c 0x4b 0x02 0x80 0x90 0x72 0xba LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2
+ 13168 0x6d 0x30 0xd1 0xab 0x90 0x03 0xe1 0xc0 0x7e 0xba LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1
+ 13178 0x79 0x0a 0xd1 0xf0 0xf4 0x02 0x07 0x90 0x5e 0xba LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112
+ 13188 0x71 0x1e 0x50 0x00 0x82 0x2c LDA.s16 r7, [p3], m4; MOVX r0, #16
+ 13194 0x69 0xc0 0xd6 0x10 0x4b 0x00 0x00 0x0c 0x79 0xf8 0x10 0x76 LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296
+ 13206 0x72 0x92 0xd2 0x10 0x4b 0x00 0x00 0x0d 0xba 0x28 0x10 0x76 LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392
+ 13218 0x0b 0x16 0x84 0x61 0x05 0xb4 VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0
+ 13224 0x1b 0x00 0x8a 0xf8 VMOV cml3, cml0
+ 13228 0x60 0x96 0xd0 0x00 0x00 0x0d 0xb2 0x48 0x10 0xba LDA r5, [p3]; MOVXM p3, #13456
+ 13238 0x00 0x2c 0xf0 0x00 0x14 0x0a 0x8e 0x01 0xa8 0xba NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0
+ 13248 0x07 0x91 0x00 0x00 0x20 0x01 0x5b 0x00 0x36 0x08 0x0e 0xb9 0x78 0x00 0x00 0xe1 MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV
+ 13264 0x00 0x2c 0xf0 0x00 0x20 0x10 0x4b 0x0d 0xd4 0x02 0x0e 0x03 0xac 0x63 0x6a 0x0b NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13280 0x40 0xa3 0xd0 0x00 0x25 0x10 0x4b 0x04 0x2f 0xda 0xb9 0x3f 0xcc 0x48 0x1a 0x0b LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13296 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13306 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13316 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13326 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13330 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13338 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13346 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3
+ 13350 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17
+ 13358 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17
+ 13366 0x00 0x2c 0xf0 0x00 0x10 0x01 0x18 0x41 0x6e 0xba NOPA; NOPB; VSHIFT x4, x6, x1, r0
+ 13376 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x63 0x6a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 13392 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17
+.loop_nesting 0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13408 0x03 0x0c 0xf4 0x73 0x90 0x02 0x84 0x81 0x6e 0xba PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13418 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13426 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13434 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13438 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13446 0x00 0x2c 0xf4 0xb0 0x8e 0xc2 0x8a 0x36 0xa1 0x4a NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17
+.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432
+.loop_nesting 1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13456 0x3e 0x1e 0x8b 0x12 0x1d 0xb4 VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13462 0x00 0x00 0x01 0xb7 0x54 0x02 0x8b 0x92 0xe1 0x5a MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13472 0x80 0x85 0x70 0x00 0x01 0x8f 0x4f 0x02 0x88 0x56 0xe1 0x46 VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13484 0x1d 0x72 0x7f 0x98 ADD.NC lc, r4, #-1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13488 0x00 0x1d 0x9b 0x98 VLDA x6, [p0], #64
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13492 0x38 0x1c 0x74 0x18 VLDB x1, [p0], #64
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13496 0x38 0x58 0xb4 0x18 VLDB.3D x2, [p0], d2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13500 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13504 0x0b 0x10 0x16 0x18 VCONV.bf16.fp32 x6, cml0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13508 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13512 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13516 0x51 0x42 0x60 0x02 0xa8 0x36 0x70 0x02 VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13524 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13528 0x47 0x52 0x60 0x01 0x80 0x45 0x70 0x02 VST.3D x10, [p2], d1; VMOV cml3, cml0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13536 0x04 0x1c 0x07 0x46 0x8c 0x6d 0x41 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13544 0x02 0x30 0x82 0xc6 0x89 0x03 0x41 0x62 VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528
+.loop_nesting 2
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 13552 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13562 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13572 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13582 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13586 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13594 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13602 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3
+ 13606 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17
+ 13614 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17
+ 13622 0x00 0x2c 0xf4 0x61 0x05 0x94 NOPA; VSHIFT x4, x6, x1, r0
+ 13628 0x8c 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r17
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608
+.end_of_loop
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 13632 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17
+.loop_nesting 1
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13648 0x03 0x0c 0xf8 0xe7 0x20 0x04 0x27 0x02 0x84 0x81 0x68 0xb6 PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13660 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13668 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13676 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13680 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13688 0x04 0xb0 0x8e 0xc6 0x8a 0x36 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17
+.loop_nesting 0
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13696 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3
+ 13700 0x8b 0x92 0xe1 0x48 VMAC.f dm3, dm4, x9, x7, r17
+ 13704 0x88 0x56 0xe1 0x48 VMAC.f dm0, dm2, x11, x7, r17
+ 13708 0x00 0x00 NOPX
+ 13710 0x00 0x00 NOPX
+ 13712 0x00 0x00 NOPX
+ 13714 0x00 0x00 NOPX
+ 13716 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3
+ 13720 0x62 0x02 0xc0 0x50 0x00 0x5c VCONV.bf16.fp32 x6, cml0; RET lr
+.delay_slot
+ 13726 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0
+.delay_slot
+ 13730 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5
+.delay_slot
+ 13734 0x1d 0x50 0x6c 0xf8 VMAX_LT.bf16 x10, r16, x10, x0
+.delay_slot
+ 13738 0x0a 0x8a 0x13 0x18 VST x8, [p2], m4
+.delay_slot
+ 13742 0x0a 0x3a 0x93 0x18 VST.3D x10, [p2], d1
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0
+
+.text_segment PM 13760
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 13760 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992
+ 13766 0x80 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p4]; MOV r0, r15
+ 13772 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+ 13778 0xff 0x3a 0xb0 0x02 0x2d 0x70 0x70 0x02 ST r14, [sp, #-8]; MOV r17, CORE_ID
+ 13786 0xff 0xb6 0xb0 0x01 0xa8 0xf0 0x70 0x02 ST r13, [sp, #-4]; MOV r13, lr
+ 13794 0x0f 0xec 0x1d 0x98 ST p0, [sp, #-20]
+ 13798 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12]
+ 13802 0xfe 0x02 0xb0 0x01 0xca 0x60 0x70 0x02 ST r0, [sp, #-16]; MOV r14, p2
+ 13810 0x80 0x1b 0x38 0x40 0x01 0x84 JNZ r16, #13936
+.delay_slot
+ 13816 0x1b 0xd6 0xc0 0xf8 MOV r15, p3
+.delay_slot
+ 13820 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 13824 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 13828 0x00 0x07 0xc6 0xc8 0xa0 0x44 MOVXM p3, #509008
+.delay_slot
+ 13834 0x0b 0x06 0x31 0x98 ST r17, [p3]
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13838 0xf0 0x91 0x60 0x00 0x01 0xf0 0xb2 0x34 0x11 0x3a MOVS p7, p1; MOVXM p1, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13848 0x20 0xc0 0xe0 0x88 0x8b 0x00 0x01 0xf0 0xb2 0x32 0x10 0x76 ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13860 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 13862 0x00 0x18 0x20 0x00 0x01 0x04 JL #12352
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13868 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13870 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13872 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 13876 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 13880 0x20 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p1]; NOPM
+.return_address
+ 13888 0x33 0x91 0x60 0x01 0x33 0x82 0x00 0x02 MOVS p1, p7; ADD.NC p2, r14, #8
+ 13896 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2]
+ 13900 0x44 0xc3 0x50 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA.u16 r16, [p2, #4]; MOVXM p2, #509024
+ 13910 0x00 0x00 NOPX
+ 13912 0x00 0x1b 0x40 0x00 0x00 0x84 J #13952
+.delay_slot
+ 13918 0x00 0x07 0xc6 0xc8 0xb0 0x44 MOVXM p3, #509016
+.delay_slot
+.swstall delay_slot
+ 13924 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 13926 0x00 0x00 NOPX
+.delay_slot
+ 13928 0x0b 0x06 0x31 0x98 ST r17, [p3]
+.delay_slot
+ 13932 0x0a 0x06 0x11 0x98 ST r16, [p2]
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176
+ 13936 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0xb2 0x2c 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192
+ 13952 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12
+ 13956 0x5f 0xee 0xd0 0x00 0x01 0xf2 0x32 0x28 0x10 0xba LDA r27, [p2], #-4; MOVXM p4, #509008
+ 13966 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4
+ 13970 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+ 13974 0x02 0x46 0x56 0x98 LDA r18, [p2, #16]
+ 13978 0x00 0x00 NOPX
+ 13980 0x00 0x00 NOPX
+ 13982 0x00 0x00 NOPX
+ 13984 0x00 0x00 NOPX
+ 13986 0x00 0x00 NOPX
+ 13988 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 13992 0x0a 0x06 0x11 0x98 ST r16, [p2]
+ 13996 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 14000 0x00 0x00 NOPX
+ 14002 0x00 0x00 NOPX
+ 14004 0x00 0x00 NOPX
+ 14006 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 14010 0x00 0x2f 0x00 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba MOVA r15, #1; MOVXM p7, #508992
+ 14020 0x06 0x00 0x28 0x2b 0xc1 0xe4 MOVX r24, #0; MOV r16, sp
+ 14026 0x18 0x68 0x5a 0x18 ADD.NC p0, r16, #-76
+ 14030 0xfd 0xd3 0x27 0x29 0x81 0xd4 LDA p5, [sp, #-20]; MOV r14, p2
+ 14036 0x04 0x06 0x36 0x98 LDA r17, [p4]
+ 14040 0x60 0xc2 0xd0 0x00 0x01 0xf1 0xb3 0xe0 0x10 0xba LDA r16, [p3]; MOVXM p3, #509888
+ 14050 0x07 0x06 0x56 0x98 LDA r18, [p7]
+ 14054 0x00 0x00 NOPX
+ 14056 0x00 0x00 NOPX
+ 14058 0x00 0x00 NOPX
+ 14060 0x05 0x06 0x76 0x98 LDA r19, [p5]
+ 14064 0x00 0x00 NOPX
+ 14066 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+ 14070 0x14 0xa2 0x07 0x18 ADD r17, r18, #1
+ 14074 0x14 0x20 0xfd 0x98 LSHL r16, r16, r15
+.no_stack_arguments
+ 14078 0x00 0x19 0x70 0x00 0x01 0x04 JL #13024
+.delay_slot
+ 14084 0x0f 0x06 0x31 0x98 ST r17, [p7]
+.delay_slot
+ 14088 0x18 0x49 0xc1 0x58 ADD.NC dn0, r19, r16
+.delay_slot
+ 14092 0x0f 0xb4 0x25 0x98 ST dn0, [sp, #-76]
+.delay_slot
+ 14096 0x0f 0xbb 0x15 0x98 ST r24, [sp, #-72]
+.delay_slot
+ 14100 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xdf 0x8a 0xc1 0x36 NOPA; NOPB; ST r24, [sp, #-68]; NOPX
+.return_address
+ 14112 0x1a 0x67 0x0a 0x18 ADD.NC p2, r14, #20
+ 14116 0x02 0x06 0x16 0x98 LDA r16, [p2]
+ 14120 0x00 0x00 NOPX
+ 14122 0x00 0x00 NOPX
+ 14124 0x00 0x00 NOPX
+ 14126 0x00 0x00 NOPX
+ 14128 0x00 0x00 NOPX
+ 14130 0x00 0x00 NOPX
+ 14132 0x14 0x10 0xf8 0x18 REL r16, r15
+ 14136 0x5c 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x30 0x10 0xba LDA r16, [p2, #-8]; MOVXM p1, #509024
+ 14146 0x01 0x06 0x56 0x98 LDA r18, [p1]
+ 14150 0x07 0x06 0x36 0x98 LDA r17, [p7]
+ 14154 0x07 0xf4 0x99 0x18 LDA p1, [sp, #-12]
+ 14158 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8]
+ 14162 0x00 0x00 NOPX
+ 14164 0x00 0x00 NOPX
+ 14166 0x13 0xe1 0x01 0x98 SUB r16, r15, r16
+ 14170 0x0a 0xe6 0x11 0x98 ST r16, [p2, #-8]
+ 14174 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 14178 0x80 0x1b 0xc0 0x40 0x01 0x84 JNZ r16, #14208
+.delay_slot
+ 14184 0x10 0x30 0x01 0x18 MOVX r24, #0
+.delay_slot
+.swstall delay_slot
+ 14188 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14190 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14192 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14194 0x00 0x00 NOPX
+ 14196 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 14208 0xff 0xb6 0x2e 0xed 0x41 0xd4 LDA r13, [sp, #-4]; MOV lr, r13
+ 14214 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16]
+ 14218 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 14222 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 14228 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14230 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14232 0x00 0x00 NOPX
+.delay_slot
+ 14234 0x1f 0x62 0xc0 0xf8 MOV p7, p1
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 14240
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+.function_start
+ 14240 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 14246 0xff 0x73 0xb0 0x00 0x01 0xf3 0xb2 0x20 0x11 0x3a ST p7, [sp, #-8]; MOVXM p7, #508992
+ 14256 0xe0 0xc2 0xd7 0xff 0x1d 0x82 0x2d 0x70 0x72 0xba LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID
+ 14266 0x0f 0xf6 0x1d 0x98 ST p4, [sp, #-12]
+ 14270 0x0f 0xf1 0x1d 0x98 ST p2, [sp, #-16]
+ 14274 0xfd 0x87 0xb0 0x03 0xb3 0x60 0x70 0x02 ST lr, [sp, #-20]; MOV p7, p3
+ 14282 0x00 0x00 NOPX
+ 14284 0x00 0x00 NOPX
+ 14286 0x00 0x00 NOPX
+ 14288 0x80 0x1c 0xb0 0x40 0x01 0x84 JNZ r16, #14688
+.delay_slot
+ 14294 0x0f 0xe8 0x1d 0x98 ST p0, [sp, #-24]
+.delay_slot
+ 14298 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 14302 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 14306 0x00 0x07 0xcc 0xc8 0xa0 0x44 MOVXM p6, #509008
+.delay_slot
+ 14312 0x0e 0x06 0x31 0x98 ST r17, [p6]
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 14316 0x00 0x20 0x00 0x00 0x01 0xf3 0x32 0x34 0x10 0xba MOVA r0, #1; MOVXM p6, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 14326 0xc0 0xc0 0xe6 0x84 0x8b 0x00 0x01 0xf0 0x32 0x32 0x10 0x76 ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14338 0x00 0x01 0x00 0x00 0x01 0xf0 0xb3 0x00 0x10 0xba MOVA r1, #0; MOVXM p1, #509440
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 14348 0x00 0x05 0x60 0x00 0x01 0x04 JL #2752
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14354 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14356 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14358 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 14362 0x00 0x2c 0xf0 0x40 0x0a 0x2c NOPA; MOVX r16, #1
+.delay_slot
+ 14368 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV
+.return_address
+ 14384 0x04 0x00 0xa1 0x01 0x01 0x64 MOVX r16, #1; MOV dj0, #64
+ 14390 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0]
+ 14394 0x00 0x00 NOPX
+ 14396 0x00 0x00 NOPX
+ 14398 0x00 0x00 NOPX
+ 14400 0x00 0x00 NOPX
+ 14402 0x00 0x00 NOPX
+ 14404 0x00 0x00 NOPX
+ 14406 0x14 0xa1 0x07 0x98 EQ r16, r18, r16
+ 14410 0x80 0x1c 0x68 0x40 0x01 0x84 JNZ r16, #14544
+.delay_slot
+ 14416 0x1c 0x5e 0xc0 0xf8 MOV r17, p7
+.delay_slot
+ 14420 0x18 0xc8 0x90 0x18 ADD.NC dc0, r17, #32
+.delay_slot
+.swstall delay_slot
+ 14424 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14426 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14428 0x00 0x00 NOPX
+ 14430 0x90 0x1c 0x58 0x40 0x01 0x84 JNZ r18, #14512
+.delay_slot
+ 14436 0x00 0x07 0xc8 0x2c 0x00 0x44 MOVXM r16, #509440
+.delay_slot
+ 14442 0x10 0x22 0x01 0x18 MOVX r17, #0
+.delay_slot
+.swstall delay_slot
+ 14446 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14448 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14450 0x00 0x00 NOPX
+.no_stack_arguments
+ 14452 0xfc 0xe3 0xb0 0x00 0x05 0x70 0x00 0x00 0x41 0x3a ST p6, [sp, #-28]; JL #11136
+.delay_slot
+ 14462 0x00 0x07 0xcc 0xca 0x80 0x44 MOVXM p6, #509248
+.delay_slot
+ 14468 0x00 0x07 0xc0 0xca 0x80 0x44 MOVXM p0, #509248
+.delay_slot
+ 14474 0x19 0x61 0x80 0xf8 MOV p1, dc0
+.delay_slot
+.swstall delay_slot
+ 14478 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.return_address
+ 14496 0xc0 0xc6 0xd0 0x00 0x01 0xf2 0x0b 0x00 0x10 0xba LDA r17, [p6]; MOVXM r16, #509440
+ 14506 0xfc 0xe3 0x20 0x00 0x20 0x3c LDA p6, [sp, #-28]; NOPB
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272
+ 14512 0x00 0x00 NOPX
+ 14514 0x00 0x1c 0x80 0x00 0x00 0x84 J #14592
+.delay_slot
+.swstall delay_slot
+ 14520 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14522 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14524 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14526 0x00 0x00 NOPX
+.delay_slot
+ 14528 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xb6 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304
+.no_stack_arguments
+ 14544 0xfc 0xe3 0xb0 0x00 0x05 0x84 0x00 0x00 0x41 0x3a ST p6, [sp, #-28]; JL #11296
+.delay_slot
+ 14554 0x00 0x07 0xcc 0xcb 0x00 0x44 MOVXM p6, #509312
+.delay_slot
+ 14560 0x00 0x07 0xc0 0xcb 0x00 0x44 MOVXM p0, #509312
+.delay_slot
+ 14566 0x19 0x61 0x80 0xf8 MOV p1, dc0
+.delay_slot
+.swstall delay_slot
+ 14570 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14572 0x00 0x01 0x67 0x98 NOPA
+.return_address
+ 14576 0xc0 0xc6 0xd0 0x00 0x01 0xf2 0x0b 0x00 0x10 0xba LDA r17, [p6]; MOVXM r16, #509440
+ 14586 0xfc 0x93 0x20 0x00 0x20 0x3c LDA p1, [sp, #-28]; NOPB
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352
+ 14592 0x1b 0x68 0x05 0x98 ADD.NC p3, r16, #11
+ 14596 0x6f 0xcd 0x50 0x00 0x01 0xf3 0x32 0x28 0x10 0xba LDA.u8 r19, [p3], #7; MOVXM p6, #509008
+ 14606 0x06 0x06 0x56 0x98 LDA r18, [p6]
+ 14610 0x03 0x1e 0xba 0x98 LDA.u16 r21, [p3], #2
+ 14614 0x03 0x06 0x1a 0x98 LDA.u16 r16, [p3]
+ 14618 0x00 0x00 NOPX
+ 14620 0x03 0x16 0x9a 0x98 LDA.u16 r20, [p3, #2]
+ 14624 0x00 0x00 NOPX
+ 14626 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 14628 0x00 0x07 0xc0 0xc8 0x88 0x44 MOVXM p0, #508996
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 14634 0x14 0xe7 0x5f 0x98 MUL r19, r19, r21
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14638 0x00 0xce 0x30 0x00 0x01 0xf1 0x32 0x2e 0x11 0x3a ST r19, [p0]; MOVXM p2, #509020
+ 14648 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16
+ 14652 0x14 0x63 0x2f 0x98 MUL r17, r17, r18
+ 14656 0x15 0x21 0x0f 0x98 MUL r16, r20, r16
+ 14660 0x00 0x2c 0xf2 0x06 0x31 0x80 0x01 0xf3 0x32 0x30 0x10 0x76 NOPA; ST r17, [p2]; MOVXM p6, #509024
+ 14672 0x00 0x2c 0xf0 0x00 0x26 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448
+ 14688 0x00 0x07 0xc0 0xc8 0x90 0x44 MOVXM p0, #509000
+ 14694 0x00 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r16, [p0]; MOVXM p2, #508992
+ 14704 0x40 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x26 0x10 0xba LDA r17, [p2]; MOVXM p6, #509004
+ 14714 0x06 0x06 0x56 0x98 LDA r18, [p6]
+ 14718 0x00 0x00 NOPX
+ 14720 0x00 0x00 NOPX
+ 14722 0x00 0x00 NOPX
+ 14724 0x00 0x00 NOPX
+ 14726 0x80 0x1c 0xf8 0x40 0x01 0x84 JNZ r16, #14832
+.delay_slot
+ 14732 0x8c 0x40 0xe9 0xb0 0x01 0x24 ADD r17, r17, #1; ADD.NC r19, r16, #1
+.delay_slot
+ 14738 0x14 0xa4 0x07 0x18 ADD r18, r18, #1
+.delay_slot
+ 14742 0x0a 0x06 0x31 0x98 ST r17, [p2]
+.delay_slot
+ 14746 0x0e 0x06 0x51 0x98 ST r18, [p6]
+.delay_slot
+ 14750 0x08 0x06 0x71 0x98 ST r19, [p0]
+ 14754 0x07 0xf6 0x31 0x18 LDA r17, [sp, #-12]
+ 14758 0x00 0x00 NOPX
+ 14760 0x00 0x00 NOPX
+ 14762 0x00 0x00 NOPX
+ 14764 0x00 0x00 NOPX
+ 14766 0x00 0x00 NOPX
+ 14768 0x00 0x00 NOPX
+ 14770 0x1e 0x68 0x86 0x18 ADD.NC p6, r17, #12
+ 14774 0x06 0xff 0x76 0x98 LDA r27, [p6], #-4
+ 14778 0x06 0xfe 0x36 0x98 LDA r17, [p6], #-4
+ 14782 0x06 0xfe 0x56 0x98 LDA r18, [p6], #-4
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 14786 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 14788 0x06 0x46 0x36 0x98 LDA r17, [p6, #16]
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14792 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14794 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14796 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14798 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14800 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14804 0xc0 0xc6 0x3f 0xc1 0xfa 0x5c ST r17, [p6]; MOVX r16, #-1
+ 14810 0x00 0x00 NOPX
+ 14812 0x00 0x00 NOPX
+ 14814 0x00 0x00 NOPX
+ 14816 0x00 0x00 NOPX
+ 14818 0x00 0x2c 0xf0 0x00 0x24 0x53 0x08 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; ACQ r17, r16; NOPM
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592
+ 14832 0x00 0x00 NOPX
+ 14834 0x00 0x00 NOPX
+ 14836 0x00 0x00 NOPX
+ 14838 0x07 0xf5 0x19 0x18 LDA p2, [sp, #-12]
+ 14842 0x07 0xe8 0x19 0x18 LDA p0, [sp, #-24]
+.no_stack_arguments
+ 14846 0x00 0x08 0xb8 0x00 0x01 0x04 JL #4464
+.delay_slot
+ 14852 0x00 0x07 0xc6 0xcc 0x00 0x44 MOVXM p3, #509440
+.delay_slot
+.swstall delay_slot
+ 14858 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14860 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14862 0x00 0x00 NOPX
+.delay_slot
+ 14864 0x00 0x2c 0xf0 0x00 0x26 0x88 0x8b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV
+.return_address
+ 14880 0xfe 0x42 0x20 0x00 0x01 0xf0 0xb2 0x24 0x10 0xba LDA r16, [sp, #-16]; MOVXM p1, #509000
+ 14890 0x20 0xc6 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r17, [p1]; MOVXM p1, #508996
+ 14900 0x01 0x06 0x56 0x98 LDA r18, [p1]
+ 14904 0x00 0x00 NOPX
+ 14906 0x00 0x00 NOPX
+ 14908 0x00 0x00 NOPX
+ 14910 0x00 0x00 NOPX
+ 14912 0x00 0x00 NOPX
+ 14914 0x00 0x00 NOPX
+ 14916 0x14 0x63 0x28 0x98 NE r17, r17, r18
+ 14920 0x88 0x1d 0xd0 0x40 0x01 0x84 JNZ r17, #15264
+.delay_slot
+.swstall delay_slot
+ 14926 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14928 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14930 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14932 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14934 0x00 0x00 NOPX
+ 14936 0x08 0x02 0x80 0x3f 0x17 0xe8 0xb4 0x03 0x08 0xba MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12
+ 14946 0x3f 0xee 0xd0 0x00 0x01 0xf0 0x32 0x2e 0x10 0xba LDA r27, [p1], #-4; MOVXM p0, #509020
+ 14956 0x01 0xfe 0x56 0x98 LDA r18, [p1], #-4
+ 14960 0x01 0xfe 0x76 0x98 LDA r19, [p1], #-4
+ 14964 0x01 0x56 0x96 0x98 LDA r20, [p1, #20]
+ 14968 0x00 0x00 NOPX
+ 14970 0x00 0x00 NOPX
+ 14972 0x00 0x00 NOPX
+ 14974 0x00 0x00 NOPX
+ 14976 0x00 0x00 NOPX
+ 14978 0x14 0xe5 0x22 0x18 SEL.EQZ r18, r19, r18, r27
+ 14982 0x20 0xca 0x30 0x40 0x0a 0x5c ST r18, [p1]; MOVX r16, #1
+ 14988 0x00 0x00 NOPX
+ 14990 0x00 0x00 NOPX
+ 14992 0x00 0x00 NOPX
+ 14994 0x00 0x00 NOPX
+ 14996 0x15 0x13 0x18 0x18 ACQ r20, r17
+ 15000 0x00 0x00 NOPX
+ 15002 0x00 0x00 NOPX
+ 15004 0x00 0x00 NOPX
+ 15006 0x00 0x06 0x76 0x98 LDA r19, [p0]
+ 15010 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0]
+ 15014 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 15016 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 15018 0x06 0x5c 0x1e 0x98 LDA p0, [p6], #20
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15022 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15024 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15026 0x29 0xc6 0xd0 0x27 0x38 0x6c 0x31 0x60 0x78 0xba LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15036 0x14 0xa1 0x07 0x98 EQ r16, r18, r16
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15040 0x80 0x1d 0x88 0x40 0x01 0x84 JNZ r16, #15120
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 15046 0x0f 0x80 0x8b 0x18 MOVS p7, p0
+.delay_slot
+.swstall delay_slot
+ 15050 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15052 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15054 0x00 0x00 NOPX
+.delay_slot
+ 15056 0xfe 0x13 0xb0 0x00 0xb4 0xe2 0xa0 0x02 ST p1, [sp, #-16]; ADD.NC p1, r19, r17
+ 15064 0x90 0x1d 0x98 0x40 0x01 0x84 JNZ r18, #15152
+.delay_slot
+.swstall delay_slot
+ 15070 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15072 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15074 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15076 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15078 0x00 0x00 NOPX
+.no_stack_arguments
+ 15080 0x00 0x15 0xf8 0x00 0x01 0x04 JL #11248
+.delay_slot
+ 15086 0x00 0x07 0xc6 0xca 0x80 0x44 MOVXM p3, #509248
+.delay_slot
+.swstall delay_slot
+ 15092 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15094 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15096 0x00 0x00 NOPX
+.delay_slot
+ 15098 0x00 0x2c 0xf4 0xc1 0x81 0xd4 NOPA; MOV p2, p0
+.return_address
+ 15104 0x00 0x1d 0x98 0x00 0x00 0x84 J #15152
+.delay_slot
+.swstall delay_slot
+ 15110 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15112 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15114 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15116 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15118 0x00 0x00 NOPX
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880
+.no_stack_arguments
+ 15120 0x00 0x16 0x58 0x00 0x01 0x04 JL #11440
+.delay_slot
+ 15126 0x00 0x07 0xc6 0xcb 0x00 0x44 MOVXM p3, #509312
+.delay_slot
+ 15132 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+.delay_slot
+.swstall delay_slot
+ 15136 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15138 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15140 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912
+.return_address
+ 15152 0x07 0xf0 0x99 0x18 LDA p1, [sp, #-16]
+ 15156 0xfe 0x83 0x20 0x44 0x0a 0x2c LDA p0, [sp, #-12]; MOVX r17, #1
+ 15162 0xe8 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x24 0x10 0xba LDA r16, [p7, #16]; MOVXM p7, #509000
+ 15172 0x00 0x00 NOPX
+ 15174 0x00 0x00 NOPX
+ 15176 0x00 0x00 NOPX
+ 15178 0x00 0x00 NOPX
+ 15180 0x00 0x00 NOPX
+ 15182 0x00 0x00 NOPX
+ 15184 0x14 0x11 0x18 0x18 REL r16, r17
+ 15188 0x01 0xf6 0x56 0x98 LDA r18, [p1, #-4]
+ 15192 0x00 0x56 0x16 0x98 LDA r16, [p0, #20]
+ 15196 0x00 0x00 NOPX
+ 15198 0x00 0x00 NOPX
+ 15200 0x00 0x00 NOPX
+ 15202 0x00 0x00 NOPX
+ 15204 0x00 0x00 NOPX
+ 15206 0x14 0x65 0x21 0x98 SUB r18, r17, r18
+ 15210 0x09 0xf6 0x51 0x98 ST r18, [p1, #-4]
+ 15214 0x00 0x00 NOPX
+ 15216 0x00 0x00 NOPX
+ 15218 0x00 0x00 NOPX
+ 15220 0x00 0x00 NOPX
+ 15222 0x14 0x11 0x18 0x18 REL r16, r17
+ 15226 0x06 0xe6 0x56 0x98 LDA r18, [p6, #-8]
+ 15230 0x00 0x00 NOPX
+ 15232 0x00 0x00 NOPX
+ 15234 0x00 0x1d 0xd8 0x00 0x00 0x84 J #15280
+.delay_slot
+.swstall delay_slot
+ 15240 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15242 0x00 0x00 NOPX
+.delay_slot
+ 15244 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+ 15248 0xe0 0xc2 0x38 0xc6 0x43 0x5c ST r16, [p7]; SUB r17, r17, r18
+.delay_slot
+ 15254 0x00 0x2c 0xf6 0xe6 0x31 0x80 0x00 0x00 0x00 0x7a NOPA; ST r17, [p6, #-8]; NOPX
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024
+ 15264 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040
+ 15280 0xfd 0x87 0x20 0x00 0x01 0xf3 0xb2 0x30 0x10 0xba LDA lr, [sp, #-20]; MOVXM p7, #509024
+ 15290 0xe0 0xca 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r18, [p7]; MOVXM p6, #508992
+ 15300 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 15304 0x00 0x00 NOPX
+ 15306 0x00 0x00 NOPX
+ 15308 0x00 0x00 NOPX
+ 15310 0x00 0x00 NOPX
+ 15312 0x00 0x00 NOPX
+ 15314 0x00 0x00 NOPX
+ 15316 0x14 0x63 0x28 0x98 NE r17, r17, r18
+ 15320 0x88 0x1d 0xf8 0x40 0x01 0x84 JNZ r17, #15344
+.delay_slot
+ 15326 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+.delay_slot
+.swstall delay_slot
+ 15330 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15332 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15334 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15336 0x00 0x00 NOPX
+ 15338 0x00 0x2c 0xfc 0x0c 0x23 0x0c NOPA; ST r16, [p6]
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104
+ 15344 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4]
+ 15348 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 15352 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 15358 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15360 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15362 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15364 0x00 0x00 NOPX
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0
+
+.text_segment PM 15376
+.label __Z13_b896_wrapperPPv___func_begin0
+.label _Z13_b896_wrapperPPv
+.function_start
+ 15376 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+ 15380 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4
+ 15384 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8
+ 15388 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4]
+ 15392 0x02 0x05 0x1e 0x98 LDA p2, [p2]
+.tail_call
+ 15396 0x00 0x0d 0x70 0x00 0x00 0x84 J #6880
+.delay_slot
+.swstall delay_slot
+ 15402 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15404 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15406 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15408 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15410 0x00 0x00 NOPX
+.label _Z13_b896_wrapperPPv__end
+.label __Z13_b896_wrapperPPv___func_end0
+
+.text_segment PM 15424
+.label __Z13_b901_wrapperPPv___func_begin0
+.label _Z13_b901_wrapperPPv
+.function_start
+ 15424 0x19 0x60 0xc0 0xf8 MOV p1, p0
+ 15428 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8
+ 15432 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4]
+ 15436 0x01 0x04 0x9e 0x98 LDA p1, [p1]
+.tail_call
+ 15440 0x00 0x10 0x18 0x00 0x00 0x84 J #8240
+.delay_slot
+.swstall delay_slot
+ 15446 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15448 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15450 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15452 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15454 0x00 0x00 NOPX
+.label _Z13_b901_wrapperPPv__end
+.label __Z13_b901_wrapperPPv___func_end0
+.label __Z13_b906_wrapperPPv___func_begin0
+.label _Z13_b906_wrapperPPv
+.function_start
+ 15456 0x19 0x60 0xc0 0xf8 MOV p1, p0
+ 15460 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8
+ 15464 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4]
+ 15468 0x01 0x04 0x9e 0x98 LDA p1, [p1]
+.tail_call
+ 15472 0x00 0x11 0xc8 0x00 0x00 0x84 J #9104
+.delay_slot
+.swstall delay_slot
+ 15478 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15480 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15482 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15484 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15486 0x00 0x00 NOPX
+.label _Z13_b906_wrapperPPv__end
+.label __Z13_b906_wrapperPPv___func_end0
+.label __Z13_b881_wrapperPPv___func_begin0
+.label _Z13_b881_wrapperPPv
+.function_start
+ 15488 0x19 0x60 0xc0 0xf8 MOV p1, p0
+ 15492 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8
+ 15496 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4]
+ 15500 0x01 0x04 0x9e 0x98 LDA p1, [p1]
+.tail_call
+ 15504 0x00 0x14 0x88 0x00 0x00 0x84 J #10512
+.delay_slot
+.swstall delay_slot
+ 15510 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15512 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15514 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15516 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15518 0x00 0x00 NOPX
+.label _Z13_b881_wrapperPPv__end
+.label __Z13_b881_wrapperPPv___func_end0
+.label __Z13_b891_wrapperPPv___func_begin0
+.label _Z13_b891_wrapperPPv
+.function_start
+ 15520 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+ 15524 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12
+ 15528 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8
+ 15532 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4]
+ 15536 0x02 0x05 0x1e 0x98 LDA p2, [p2]
+.tail_call
+ 15540 0x00 0x16 0xf0 0x00 0x00 0x84 J #11744
+.delay_slot
+.swstall delay_slot
+ 15546 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15548 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15550 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15552 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15554 0x00 0x00 NOPX
+.label _Z13_b891_wrapperPPv__end
+.label __Z13_b891_wrapperPPv___func_end0
+
+.text_segment PM 15568
+.label __Z13_b924_wrapperPPv___func_begin0
+.label _Z13_b924_wrapperPPv
+.function_start
+ 15568 0x1b 0x60 0xc0 0xf8 MOV p3, p0
+ 15572 0x03 0x1c 0x1e 0x98 LDA p0, [p3], #4
+ 15576 0x03 0x1c 0x9e 0x98 LDA p1, [p3], #4
+ 15580 0x03 0x2d 0x1e 0x98 LDA p2, [p3], #8
+ 15584 0x03 0xf6 0x1e 0x98 LDA p4, [p3, #-4]
+ 15588 0x03 0x05 0x9e 0x98 LDA p3, [p3]
+.tail_call
+ 15592 0x00 0x1b 0xd0 0x00 0x00 0x84 J #14240
+.delay_slot
+.swstall delay_slot
+ 15598 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15600 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15602 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15604 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15606 0x00 0x00 NOPX
+.label _Z13_b924_wrapperPPv__end
+.label __Z13_b924_wrapperPPv___func_end0
+
+.text_segment PM 15616
+.label __Z13_b919_wrapperPPv___func_begin0
+.label _Z13_b919_wrapperPPv
+.function_start
+ 15616 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+ 15620 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4
+ 15624 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8
+ 15628 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4]
+ 15632 0x02 0x05 0x1e 0x98 LDA p2, [p2]
+.tail_call
+ 15636 0x00 0x1a 0xe0 0x00 0x00 0x84 J #13760
+.delay_slot
+.swstall delay_slot
+ 15642 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15644 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15646 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15648 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15650 0x00 0x00 NOPX
+.label _Z13_b919_wrapperPPv__end
+.label __Z13_b919_wrapperPPv___func_end0
+
+.text_segment PM 15664
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function_start
+ 15664 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0
+ 15670 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15674 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15678 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15682 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15686 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15690 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15694 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15698 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15702 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15706 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15710 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15714 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15718 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15722 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15726 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15730 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15734 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15738 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15742 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15746 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15750 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15754 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15758 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15762 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15766 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15770 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15774 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15778 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15782 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 15786 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 15790 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 15794 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 15798 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 15802 0x18 0x9f 0xa0 0xf8 MOV r2, r31
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+
+.bss_segment DMb 508992 32
+
+.data_segment DMb 509024
+.label _ZL8num_iter
+ 0x1
+ 0x0
+ 0x0
+ 0x0
+
+.bss_segment DMb 509028 4
+
+.bss_segment DMb 509032 1
+
+.rodata_segment DMb 509056
+.label _ZL20g_uniformKernelFuncs
+ 0x10
+ 0x3c
+ 0x0
+ 0x0
+ 0x40
+ 0x3c
+ 0x0
+ 0x0
+ 0x60
+ 0x3c
+ 0x0
+ 0x0
+ 0x80
+ 0x3c
+ 0x0
+ 0x0
+ 0xa0
+ 0x3c
+ 0x0
+ 0x0
+ 0xd0
+ 0x3c
+ 0x0
+ 0x0
+ 0x0
+ 0x3d
+ 0x0
+ 0x0
+
+.bss_segment DMb 509120 1024
+
+.stack DM_stack 506560 508928
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.map
new file mode 100644
index 0000000000000000000000000000000000000000..b11a3b333f5cabeaaee231f81abbc9a33f2e051a
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.map
@@ -0,0 +1,324 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:21 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme
+
+// Release: ipp V-2024.06-TGT-241219
+
+Memory map for memory 'DM_stack':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 2368
+
+ 0x0007bac0..0x0007c3ff ( 2368 items) : Stack
+
+Memory map for memory 'DMb':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 3461
+
+ 0x00000000..0x0007babf ( 506560 items) : Reserved
+ 0x0007bac0..0x0007c3ff ( 2368 items) : Stack
+ 0x0007c400..0x0007c43f ( 64 items) : Reserved
+ 0x0007c440..0x0007c443 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL9curr_iter (Data, Local, .bss.DMb.4)
+ 0x0007c444..0x0007c447 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL14num_depth_iter (Data, Local, .bss.DMb.4)
+ 0x0007c448..0x0007c44b ( 4 items) : ../Release/0_0_reloadable5.o::_ZL10depth_iter (Data, Local, .bss.DMb.4)
+ 0x0007c44c..0x0007c44f ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11total_iters (Data, Local, .bss.DMb.4)
+ 0x0007c450..0x0007c453 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL8core_row (Data, Local, .bss.DMb.4)
+ 0x0007c454..0x0007c457 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4)
+ 0x0007c458..0x0007c45b ( 4 items) : ../Release/0_0_reloadable5.o::_ZL10ifmsv_size (Data, Local, .bss.DMb.4)
+ 0x0007c45c..0x0007c45f ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4)
+ 0x0007c460..0x0007c463 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL8num_iter (Data, Local, .data.DMb.4)
+ 0x0007c464..0x0007c467 ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4)
+ 0x0007c468..0x0007c468 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1)
+ 0x0007c480..0x0007c49b ( 28 items) : ../Release/0_0_reloadable5.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64)
+
+ Called functions : _Z13_b896_wrapperPPv
+ _Z13_b901_wrapperPPv
+ _Z13_b906_wrapperPPv
+ _Z13_b881_wrapperPPv
+ _Z13_b891_wrapperPPv
+ _Z13_b924_wrapperPPv
+ _Z13_b919_wrapperPPv
+
+ 0x0007c4c0..0x0007c4ff ( 64 items) : ../Release/0_0_reloadable5.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64)
+ 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable5.o::mul1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64)
+ 0x0007c540..0x0007c57f ( 64 items) : ../Release/0_0_reloadable5.o::add1d_params (Data, Global, .bss.DMb.64)
+ 0x0007c580..0x0007c5bf ( 64 items) : ../Release/0_0_reloadable5.o::mul1d_params (Data, Global, .bss.DMb.64)
+ 0x0007c5c0..0x0007c5ff ( 64 items) : ../Release/0_0_reloadable5.o::clip1d_params (Data, Global, .bss.DMb.64)
+ 0x0007c600..0x0007c7bf ( 448 items) : ../Release/0_0_reloadable5.o::conv2d_params (Data, Global, .bss.DMb.64)
+ 0x0007c7c0..0x0007c8bf ( 256 items) : ../Release/0_0_reloadable5.o::conv2d_dw_params (Data, Global, .bss.DMb.64)
+ 0x0007ccc0..0x000fffff ( 537408 items) : Reserved
+
+Memory map for memory 'PM':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 13150
+
+ 0x00000000..0x0000092f ( 2352 items) : Reserved
+ 0x00000930..0x00000ab5 ( 390 items) : ../Release/0_0_reloadable5.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64)
+
+ Referenced symbols: _ZL20g_uniformKernelFuncs
+
+ 0x00000ac0..0x00001055 ( 1430 items) : ../Release/0_0_reloadable5.o::_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (Function, Weak, .text) (stack frame size = 64)
+ 0x00001060..0x0000116d ( 270 items) : ../Release/0_0_reloadable5.o::_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001170..0x00001ad9 ( 2410 items) : ../Release/0_0_reloadable5.o::_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (Function, Weak, .text) (stack frame size = 128)
+
+ Called functions : _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001ae0..0x00001d17 ( 568 items) : ../Release/0_0_reloadable5.o::_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+
+ Referenced symbols: _ZL9curr_iter
+ conv2d_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL14num_depth_iter
+ _ZL8num_iter
+ _ZL10depth_iter
+ _ZL11total_iters
+
+ 0x00001d20..0x00001d37 ( 24 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00001d40..0x00001de1 ( 162 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+
+ 0x00001df0..0x00001e27 ( 56 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00001e30..0x00001e6d ( 62 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+
+ 0x00001e70..0x00001fa9 ( 314 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001fb0..0x00002021 ( 114 items) : ../Release/0_0_reloadable5.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128)
+
+ Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+
+ 0x00002030..0x00002217 ( 488 items) : ../Release/0_0_reloadable5.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ add1d_attribute_broadcasting_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL8num_iter
+
+ 0x00002220..0x00002283 ( 100 items) : ../Release/0_0_reloadable5.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0)
+ 0x00002290..0x00002381 ( 242 items) : ../Release/0_0_reloadable5.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00002390..0x00002577 ( 488 items) : ../Release/0_0_reloadable5.o::_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+ _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ clip1d_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL8num_iter
+
+ 0x00002580..0x000025f3 ( 116 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0)
+ 0x00002600..0x00002649 ( 74 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+
+ 0x00002650..0x00002865 ( 534 items) : ../Release/0_0_reloadable5.o::_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (Function, Local, .text) (stack frame size = 128)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00002870..0x00002905 ( 150 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+
+ 0x00002910..0x00002af7 ( 488 items) : ../Release/0_0_reloadable5.o::_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ mul1d_attribute_broadcasting_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL8num_iter
+
+ 0x00002b00..0x00002b73 ( 116 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 0)
+ 0x00002b80..0x00002be1 ( 98 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+
+ 0x00002bf0..0x00002bff ( 16 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+
+ Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+
+ 0x00002c00..0x00002c17 ( 24 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00002c20..0x00002ca9 ( 138 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+
+ 0x00002cb0..0x00002dd3 ( 292 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00002de0..0x00003039 ( 602 items) : ../Release/0_0_reloadable5.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ mul1d_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL11ifm2_offset
+ _ZL8num_iter
+
+ 0x00003040..0x000032df ( 672 items) : ../Release/0_0_reloadable5.o::_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh (Function, Local, .text) (stack frame size = 64)
+
+ Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_
+
+ Referenced symbols: conv2d_dw_params
+ _ZN12me_primitive11control_rndE
+
+ 0x000032e0..0x000035b1 ( 722 items) : ../Release/0_0_reloadable5.o::_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x000035c0..0x0000379d ( 478 items) : ../Release/0_0_reloadable5.o::_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128)
+
+ Called functions : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+ _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL8num_iter
+ _ZL10ifmsv_size
+ conv2d_dw_params
+
+ 0x000037a0..0x00003c05 ( 1126 items) : ../Release/0_0_reloadable5.o::_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ conv2d_params
+ add1d_params
+ mul1d_params
+ _ZL14num_depth_iter
+ _ZL11ifm2_offset
+ _ZL8num_iter
+ _ZL10depth_iter
+ _ZL11total_iters
+
+ 0x00003c10..0x00003c33 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b896_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003c40..0x00003c5f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b901_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003c60..0x00003c7f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b906_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003c80..0x00003c9f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b881_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003ca0..0x00003cc3 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b891_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+
+ 0x00003cd0..0x00003cf7 ( 40 items) : ../Release/0_0_reloadable5.o::_Z13_b924_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+
+ 0x00003d00..0x00003d23 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b919_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003d30..0x00003dbd ( 142 items) : me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0)
+
+External symbols:
+
+ __dso_handle = 0x0
+ _ctors_end = 0x0
+ _ctors_start = 0x0
+ _dtors_end = 0x0
+ _dtors_start = 0x0
+ _pc_end = 0x3dbe
+ _pc_start = 0x930
+ _sp_end_DM_stack = 0x7c400
+ _sp_start_DM_stack = 0x7bac0
+
+Section summary for memory 'DM_stack':
+
+ .stack File
+ ---------- ----------
+ 2368
+ ---------- ----------
+ 2368 Total
+
+Section summary for memory 'DMb':
+
+ .bss .data .rodata File
+ ---------- ---------- ---------- ----------
+ 1056 4 28 ../Release/0_0_reloadable5.o
+ 5 0 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ ---------- ---------- ---------- ----------
+ 1061 4 28 Total
+
+Section summary for memory 'PM':
+
+ .text File
+ ---------- ----------
+ 13008 ../Release/0_0_reloadable5.o
+ 142 me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ ---------- ----------
+ 13150 Total
+
+File summary:
+
+../Release/0_0_reloadable5.o
+ DMb 1088
+ PM 13008
+
+me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ DMb 5
+
+me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ PM 142
+
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.sdr
new file mode 100644
index 0000000000000000000000000000000000000000..029eac6b3129d1ccada1bf5bd7decb96296f96f7
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.sdr
@@ -0,0 +1,129 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:21 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme
+
+// Release: ipp V-2024.06-TGT-241219
+
+// Symbols in memory 'DM_bankA':
+// Symbols in memory 'DM_bankAB':
+// Symbols in memory 'DM_bankAC':
+// Symbols in memory 'DM_bankAD':
+// Symbols in memory 'DM_bankB':
+// Symbols in memory 'DM_bankBC':
+// Symbols in memory 'DM_bankBD':
+// Symbols in memory 'DM_bankC':
+// Symbols in memory 'DM_bankCD':
+// Symbols in memory 'DM_bankD':
+// Symbols in memory 'DM_stack':
+// Symbols in memory 'DM_test':
+// Symbols in memory 'DMb':
+_symbol _ZN12me_primitive11control_satE 0x0007c464
+_symbol _ZN12me_primitive11control_rndE 0x0007c468
+_symbol add1d_attribute_broadcasting_params 0x0007c4c0
+_symbol mul1d_attribute_broadcasting_params 0x0007c500
+_symbol add1d_params 0x0007c540
+_symbol mul1d_params 0x0007c580
+_symbol clip1d_params 0x0007c5c0
+_symbol conv2d_params 0x0007c600
+_symbol conv2d_dw_params 0x0007c7c0
+// Symbols in memory 'DMh':
+// Symbols in memory 'DMh_bankA':
+// Symbols in memory 'DMh_bankAB':
+// Symbols in memory 'DMh_bankAC':
+// Symbols in memory 'DMh_bankAD':
+// Symbols in memory 'DMh_bankB':
+// Symbols in memory 'DMh_bankBC':
+// Symbols in memory 'DMh_bankBD':
+// Symbols in memory 'DMh_bankC':
+// Symbols in memory 'DMh_bankCD':
+// Symbols in memory 'DMh_bankD':
+// Symbols in memory 'DMh_stack':
+// Symbols in memory 'DMs':
+// Symbols in memory 'DMs_bankA':
+// Symbols in memory 'DMs_bankAB':
+// Symbols in memory 'DMs_bankAC':
+// Symbols in memory 'DMs_bankAD':
+// Symbols in memory 'DMs_bankB':
+// Symbols in memory 'DMs_bankBC':
+// Symbols in memory 'DMs_bankBD':
+// Symbols in memory 'DMs_bankC':
+// Symbols in memory 'DMs_bankCD':
+// Symbols in memory 'DMs_bankD':
+// Symbols in memory 'DMs_stack':
+// Symbols in memory 'DMv':
+// Symbols in memory 'DMv_bankA':
+// Symbols in memory 'DMv_bankAB':
+// Symbols in memory 'DMv_bankAC':
+// Symbols in memory 'DMv_bankAD':
+// Symbols in memory 'DMv_bankB':
+// Symbols in memory 'DMv_bankBC':
+// Symbols in memory 'DMv_bankBD':
+// Symbols in memory 'DMv_bankC':
+// Symbols in memory 'DMv_bankCD':
+// Symbols in memory 'DMv_bankD':
+// Symbols in memory 'DMv_stack':
+// Symbols in memory 'DMw':
+// Symbols in memory 'DMw_bankA':
+// Symbols in memory 'DMw_bankAB':
+// Symbols in memory 'DMw_bankAC':
+// Symbols in memory 'DMw_bankAD':
+// Symbols in memory 'DMw_bankB':
+// Symbols in memory 'DMw_bankBC':
+// Symbols in memory 'DMw_bankBD':
+// Symbols in memory 'DMw_bankC':
+// Symbols in memory 'DMw_bankCD':
+// Symbols in memory 'DMw_bankD':
+// Symbols in memory 'DMw_stack':
+// Symbols in memory 'DMx':
+// Symbols in memory 'DMx_bankA':
+// Symbols in memory 'DMx_bankAB':
+// Symbols in memory 'DMx_bankAC':
+// Symbols in memory 'DMx_bankAD':
+// Symbols in memory 'DMx_bankB':
+// Symbols in memory 'DMx_bankBC':
+// Symbols in memory 'DMx_bankBD':
+// Symbols in memory 'DMx_bankC':
+// Symbols in memory 'DMx_bankCD':
+// Symbols in memory 'DMx_bankD':
+// Symbols in memory 'DMx_stack':
+// Symbols in memory 'PM':
+_symbol _Z13kernelWrapperPPvjjjj 0x00000930
+_symbol _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh 0x00000ac0
+_symbol _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams 0x00001060
+_symbol _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params 0x00001170
+_symbol _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001ae0
+_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E 0x00001d20
+_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001d40
+_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E 0x00001df0
+_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001e30
+_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00001e70
+_symbol _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E 0x00001fb0
+_symbol _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002030
+_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv 0x00002220
+_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E 0x00002290
+_symbol _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002390
+_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv 0x00002580
+_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002600
+_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E 0x00002870
+_symbol _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002910
+_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv 0x00002b00
+_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00002b80
+_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00002bf0
+_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x00002c00
+_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002c20
+_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x00002cb0
+_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x00002de0
+_symbol _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params 0x000032e0
+_symbol _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x000035c0
+_symbol _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE 0x000037a0
+_symbol _Z13_b896_wrapperPPv 0x00003c10
+_symbol _Z13_b901_wrapperPPv 0x00003c40
+_symbol _Z13_b906_wrapperPPv 0x00003c60
+_symbol _Z13_b881_wrapperPPv 0x00003c80
+_symbol _Z13_b891_wrapperPPv 0x00003ca0
+_symbol _Z13_b924_wrapperPPv 0x00003cd0
+_symbol _Z13_b919_wrapperPPv 0x00003d00
+_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x00003d30
+// Symbols in memory 'PMw':
+// Symbols in memory 'TM4':
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.srv
new file mode 100644
index 0000000000000000000000000000000000000000..cc24263e196c609ab062129e37812e382b48d43f
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.srv
@@ -0,0 +1,19187 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me
+
+// Release: ipp V-2024.06-TGT-241219
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function kernelWrapper _Z13kernelWrapperPPvjjjj
+.src_ref 0 "0_0_reloadable5.cc" 94 first
+.src_ref 0 "0_0_reloadable5.cc" 96 60 first
+.src_ref 0 "0_0_reloadable5.cc" 96 110
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.function_start
+ 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2353 "01000001" // /* MW 5 */
+ 2354 "00100001" // /* MW 4 */
+ 2355 "11010001" // /* MW 3 */
+ 2356 "11000110" // /* MW 2 */
+ 2357 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 94
+ 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2359 "00000001" // /* MW 5 */
+ 2360 "00000000" // /* MW 4 */
+ 2361 "00000000" // /* MW 3 */
+ 2362 "00001000" // /* MW 2 */
+ 2363 "00000000" // /* MW 1 */
+ 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2365 "01110000" // /* MW 7 */
+ 2366 "11010000" // /* MW 6 */
+ 2367 "00101011" // /* MW 5 */
+ 2368 "00000000" // /* MW 4 */
+ 2369 "10110000" // /* MW 3 */
+ 2370 "11110011" // /* MW 2 */
+ 2371 "11111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 96 110
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2373 "01110000" // /* MW 7 */
+ 2374 "10010000" // /* MW 6 */
+ 2375 "11101000" // /* MW 5 */
+ 2376 "00000001" // /* MW 4 */
+ 2377 "10110000" // /* MW 3 */
+ 2378 "10000111" // /* MW 2 */
+ 2379 "11111111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 96 110 first
+ 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2381 "11100000" // /* MW 5 */
+ 2382 "11000001" // /* MW 4 */
+ 2383 "10110111" // /* MW 3 */
+ 2384 "00000110" // /* MW 2 */
+ 2385 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2387 "00100000" // /* MW 3 */
+ 2388 "10011000" // /* MW 2 */
+ 2389 "00011110" // /* MW 1 */
+ 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2391 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2393 "10000010" // /* MW 3 */
+ 2394 "01101000" // /* MW 2 */
+ 2395 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2397 "00110110" // /* MW 3 */
+ 2398 "00011110" // /* MW 2 */
+ 2399 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2401 "01110110" // /* MW 3 */
+ 2402 "00111110" // /* MW 2 */
+ 2403 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2405 "01010110" // /* MW 3 */
+ 2406 "11101110" // /* MW 2 */
+ 2407 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2409 "01110110" // /* MW 3 */
+ 2410 "00000111" // /* MW 2 */
+ 2411 "00000111" // /* MW 1 */
+ 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2413 "00000000" // /* MW 1 */
+ 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2415 "00000000" // /* MW 1 */
+ 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2417 "00000000" // /* MW 1 */
+ 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2419 "00000000" // /* MW 1 */
+ 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2421 "00000000" // /* MW 1 */
+ 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2423 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2425 "00110010" // /* MW 3 */
+ 2426 "01100011" // /* MW 2 */
+ 2427 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2429 "00110001" // /* MW 3 */
+ 2430 "11010110" // /* MW 2 */
+ 2431 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2433 "11111101" // /* MW 3 */
+ 2434 "11100010" // /* MW 2 */
+ 2435 "00010111" // /* MW 1 */
+ 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2437 "00000000" // /* MW 1 */
+ 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2439 "00000000" // /* MW 1 */
+ 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2441 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2443 "00011000" // /* MW 3 */
+ 2444 "10010111" // /* MW 2 */
+ 2445 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2447 "00001001" // /* MW 3 */
+ 2448 "00100100" // /* MW 2 */
+ 2449 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60 first
+ 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2451 "00101101" // /* MW 3 */
+ 2452 "00101001" // /* MW 2 */
+ 2453 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2455 "00100000" // /* MW 3 */
+ 2456 "10001010" // /* MW 2 */
+ 2457 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2459 "10001011" // /* MW 5 */
+ 2460 "11011000" // /* MW 4 */
+ 2461 "11011111" // /* MW 3 */
+ 2462 "01001110" // /* MW 2 */
+ 2463 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2465 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2467 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2469 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2471 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2473 "00000101" // /* MW 3 */
+ 2474 "00100110" // /* MW 2 */
+ 2475 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2477 "11111100" // /* MW 3 */
+ 2478 "11110100" // /* MW 2 */
+ 2479 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2481 "00000000" // /* MW 7 */
+ 2482 "11000001" // /* MW 6 */
+ 2483 "10110100" // /* MW 5 */
+ 2484 "00000011" // /* MW 4 */
+ 2485 "10110000" // /* MW 3 */
+ 2486 "01101010" // /* MW 2 */
+ 2487 "11111110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2489 "01110110" // /* MW 3 */
+ 2490 "00011110" // /* MW 2 */
+ 2491 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2493 "10110110" // /* MW 3 */
+ 2494 "00111110" // /* MW 2 */
+ 2495 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2497 "10010110" // /* MW 3 */
+ 2498 "11101110" // /* MW 2 */
+ 2499 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2501 "01110110" // /* MW 3 */
+ 2502 "00000111" // /* MW 2 */
+ 2503 "00000111" // /* MW 1 */
+ 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2505 "00000000" // /* MW 1 */
+ 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2507 "00000000" // /* MW 1 */
+ 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2509 "00000000" // /* MW 1 */
+ 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2511 "00000000" // /* MW 1 */
+ 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2513 "00000000" // /* MW 1 */
+ 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2515 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2517 "01010010" // /* MW 3 */
+ 2518 "11100111" // /* MW 2 */
+ 2519 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2521 "01110001" // /* MW 3 */
+ 2522 "11010110" // /* MW 2 */
+ 2523 "00001111" // /* MW 1 */
+ 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2525 "00000000" // /* MW 1 */
+ 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2527 "00000000" // /* MW 1 */
+ 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2529 "00000000" // /* MW 1 */
+ 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2531 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2533 "00011000" // /* MW 3 */
+ 2534 "00010111" // /* MW 2 */
+ 2535 "00010101" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7 first
+ 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2537 "00101101" // /* MW 3 */
+ 2538 "00100011" // /* MW 2 */
+ 2539 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2541 "10100000" // /* MW 3 */
+ 2542 "10001000" // /* MW 2 */
+ 2543 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2545 "00000000" // /* MW 5 */
+ 2546 "11001001" // /* MW 4 */
+ 2547 "11001110" // /* MW 3 */
+ 2548 "00000111" // /* MW 2 */
+ 2549 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2551 "00101011" // /* MW 5 */
+ 2552 "11010100" // /* MW 4 */
+ 2553 "11011111" // /* MW 3 */
+ 2554 "00010011" // /* MW 2 */
+ 2555 "11100000" // /* MW 1 */
+ 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2557 "00000000" // /* MW 1 */
+ 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2559 "00000000" // /* MW 1 */
+ 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2561 "00000000" // /* MW 1 */
+ 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2563 "00000000" // /* MW 1 */
+ 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2565 "00000000" // /* MW 1 */
+ 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2567 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 4
+.no_stack_arguments
+ 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */
+ 2569 "01000000" // /* MW 3 */
+ 2570 "00110000" // /* MW 2 */
+ 2571 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 105 60
+.src_ref 0 "0_0_reloadable5.cc" 107 60
+.delay_slot
+ 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2573 "11000000" // /* MW 3 */
+ 2574 "01100000" // /* MW 2 */
+ 2575 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2577 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2579 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2581 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2583 "01111110" // /* MW 9 */
+ 2584 "10100101" // /* MW 8 */
+ 2585 "00000001" // /* MW 7 */
+ 2586 "00000000" // /* MW 6 */
+ 2587 "00010000" // /* MW 5 */
+ 2588 "00000000" // /* MW 4 */
+ 2589 "11110000" // /* MW 3 */
+ 2590 "00101100" // /* MW 2 */
+ 2591 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 105 60 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+.src_ref 1 "io_buffer_main.h" 440 8
+.return_address
+ 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2593 "00001010" // /* MW 5 */
+ 2594 "01000000" // /* MW 4 */
+ 2595 "11010000" // /* MW 3 */
+ 2596 "11000110" // /* MW 2 */
+ 2597 "11100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2599 "01010001" // /* MW 3 */
+ 2600 "11101011" // /* MW 2 */
+ 2601 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 107 60
+ 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2603 "01000001" // /* MW 3 */
+ 2604 "11101100" // /* MW 2 */
+ 2605 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2607 "00101001" // /* MW 3 */
+ 2608 "11110000" // /* MW 2 */
+ 2609 "00000111" // /* MW 1 */
+ 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2611 "00000000" // /* MW 1 */
+ 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2613 "00000000" // /* MW 1 */
+ 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+ 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2617 "10001000" // /* MW 3 */
+ 2618 "01101000" // /* MW 2 */
+ 2619 "00011001" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+ 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2621 "00110110" // /* MW 3 */
+ 2622 "00000110" // /* MW 2 */
+ 2623 "00000001" // /* MW 1 */
+ 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2625 "00000000" // /* MW 1 */
+ 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2627 "00000000" // /* MW 1 */
+ 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2629 "00000000" // /* MW 1 */
+ 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2631 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2633 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2635 "00011100" // /* MW 3 */
+ 2636 "10100000" // /* MW 2 */
+ 2637 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2639 "00001000" // /* MW 3 */
+ 2640 "01010101" // /* MW 2 */
+ 2641 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2643 "01000001" // /* MW 5 */
+ 2644 "10101111" // /* MW 4 */
+ 2645 "11011101" // /* MW 3 */
+ 2646 "11000110" // /* MW 2 */
+ 2647 "00111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 107 60 first
+ 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2649 "01010110" // /* MW 3 */
+ 2650 "00000010" // /* MW 2 */
+ 2651 "00000111" // /* MW 1 */
+ 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2653 "00000000" // /* MW 1 */
+ 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2655 "00000000" // /* MW 1 */
+ 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2657 "00000000" // /* MW 1 */
+ 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2659 "00000000" // /* MW 1 */
+ 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2661 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2663 "00010001" // /* MW 3 */
+ 2664 "00100111" // /* MW 2 */
+ 2665 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2667 "00010000" // /* MW 5 */
+ 2668 "11010010" // /* MW 4 */
+ 2669 "01000000" // /* MW 3 */
+ 2670 "01100110" // /* MW 2 */
+ 2671 "10001100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+.src_ref 1 "io_buffer_compiler.h" 606 22 first
+ 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2673 "01100011" // /* MW 5 */
+ 2674 "11101100" // /* MW 4 */
+ 2675 "11010011" // /* MW 3 */
+ 2676 "11000110" // /* MW 2 */
+ 2677 "00000000" // /* MW 1 */
+ 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2679 "00000000" // /* MW 1 */
+ 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2681 "00000000" // /* MW 1 */
+ 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2683 "00000000" // /* MW 1 */
+ 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2685 "00000000" // /* MW 1 */
+ 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2687 "00000000" // /* MW 1 */
+ 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2689 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+ 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2691 "00001000" // /* MW 3 */
+ 2692 "01010101" // /* MW 2 */
+ 2693 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110
+ 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2695 "00111001" // /* MW 3 */
+ 2696 "11111100" // /* MW 2 */
+ 2697 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2699 "00110110" // /* MW 3 */
+ 2700 "11110110" // /* MW 2 */
+ 2701 "00000000" // /* MW 1 */
+ 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2703 "10011001" // /* MW 3 */
+ 2704 "11110111" // /* MW 2 */
+ 2705 "00000111" // /* MW 1 */
+ 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2707 "11110001" // /* MW 3 */
+ 2708 "11111001" // /* MW 2 */
+ 2709 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110 first
+ 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2711 "00000001" // /* MW 5 */
+ 2712 "00000000" // /* MW 4 */
+ 2713 "00000000" // /* MW 3 */
+ 2714 "11111000" // /* MW 2 */
+ 2715 "11111111" // /* MW 1 */
+ 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2717 "00000000" // /* MW 1 */
+ 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2719 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110
+ 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 2721 "00000000" // /* MW 3 */
+ 2722 "00101000" // /* MW 2 */
+ 2723 "00010000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2725 "00011100" // /* MW 3 */
+ 2726 "11100000" // /* MW 2 */
+ 2727 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+.delay_slot
+ 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2729 "00010001" // /* MW 3 */
+ 2730 "00100001" // /* MW 2 */
+ 2731 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2733 "00000010" // /* MW 3 */
+ 2734 "01100001" // /* MW 2 */
+ 2735 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 22
+.delay_slot
+ 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2737 "00010001" // /* MW 3 */
+ 2738 "11110110" // /* MW 2 */
+ 2739 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+ 2741 "00000000" // /* MW 1 */
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 432 first
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.function_start
+ 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2753 "01111000" // /* MW 9 */
+ 2754 "01100000" // /* MW 8 */
+ 2755 "01001001" // /* MW 7 */
+ 2756 "10001000" // /* MW 6 */
+ 2757 "01000000" // /* MW 5 */
+ 2758 "00000000" // /* MW 4 */
+ 2759 "11010000" // /* MW 3 */
+ 2760 "10000101" // /* MW 2 */
+ 2761 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2763 "01001000" // /* MW 9 */
+ 2764 "10000010" // /* MW 8 */
+ 2765 "00110000" // /* MW 7 */
+ 2766 "11101001" // /* MW 6 */
+ 2767 "01010111" // /* MW 5 */
+ 2768 "00111110" // /* MW 4 */
+ 2769 "11010000" // /* MW 3 */
+ 2770 "10000001" // /* MW 2 */
+ 2771 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 432
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+ 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2773 "01110000" // /* MW 9 */
+ 2774 "00000000" // /* MW 8 */
+ 2775 "00000000" // /* MW 7 */
+ 2776 "00000000" // /* MW 6 */
+ 2777 "00000010" // /* MW 5 */
+ 2778 "00000000" // /* MW 4 */
+ 2779 "00000000" // /* MW 3 */
+ 2780 "10000001" // /* MW 2 */
+ 2781 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+ 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2783 "01011000" // /* MW 11 */
+ 2784 "00010000" // /* MW 10 */
+ 2785 "00000000" // /* MW 9 */
+ 2786 "00101000" // /* MW 8 */
+ 2787 "00000000" // /* MW 7 */
+ 2788 "10000001" // /* MW 6 */
+ 2789 "10110101" // /* MW 5 */
+ 2790 "11111101" // /* MW 4 */
+ 2791 "00000111" // /* MW 3 */
+ 2792 "10000110" // /* MW 2 */
+ 2793 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2795 "01011000" // /* MW 11 */
+ 2796 "00001111" // /* MW 10 */
+ 2797 "10001000" // /* MW 9 */
+ 2798 "10101010" // /* MW 8 */
+ 2799 "01010111" // /* MW 7 */
+ 2800 "10111111" // /* MW 6 */
+ 2801 "11010101" // /* MW 5 */
+ 2802 "11111001" // /* MW 4 */
+ 2803 "00000111" // /* MW 3 */
+ 2804 "01100011" // /* MW 2 */
+ 2805 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2807 "00000010" // /* MW 5 */
+ 2808 "01100000" // /* MW 4 */
+ 2809 "10110000" // /* MW 3 */
+ 2810 "10111110" // /* MW 2 */
+ 2811 "11111110" // /* MW 1 */
+ 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2815 "00101001" // /* MW 3 */
+ 2816 "00011100" // /* MW 2 */
+ 2817 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2819 "00001001" // /* MW 3 */
+ 2820 "00011100" // /* MW 2 */
+ 2821 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2823 "00101110" // /* MW 3 */
+ 2824 "00011100" // /* MW 2 */
+ 2825 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2827 "00001110" // /* MW 3 */
+ 2828 "00011100" // /* MW 2 */
+ 2829 "00000000" // /* MW 1 */
+ 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2831 "00000000" // /* MW 1 */
+ 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2833 "00000000" // /* MW 1 */
+ 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2835 "00000000" // /* MW 1 */
+ 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2837 "00000000" // /* MW 1 */
+ 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2839 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2841 "00101001" // /* MW 3 */
+ 2842 "00011100" // /* MW 2 */
+ 2843 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2845 "00001001" // /* MW 3 */
+ 2846 "00011100" // /* MW 2 */
+ 2847 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2849 "00101110" // /* MW 3 */
+ 2850 "00011100" // /* MW 2 */
+ 2851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2853 "00001110" // /* MW 3 */
+ 2854 "00011100" // /* MW 2 */
+ 2855 "00000000" // /* MW 1 */
+ 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2857 "00000000" // /* MW 1 */
+ 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2859 "00000000" // /* MW 1 */
+ 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2861 "00000000" // /* MW 1 */
+ 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2863 "00000000" // /* MW 1 */
+ 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2865 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2867 "00101001" // /* MW 3 */
+ 2868 "00011100" // /* MW 2 */
+ 2869 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2871 "00001001" // /* MW 3 */
+ 2872 "00011100" // /* MW 2 */
+ 2873 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2875 "00001110" // /* MW 3 */
+ 2876 "00000100" // /* MW 2 */
+ 2877 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2879 "00101110" // /* MW 3 */
+ 2880 "00010100" // /* MW 2 */
+ 2881 "00000000" // /* MW 1 */
+ 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2883 "00000000" // /* MW 1 */
+ 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2885 "00000000" // /* MW 1 */
+ 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2887 "00000000" // /* MW 1 */
+ 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2889 "00000000" // /* MW 1 */
+ 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2891 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2893 "00001001" // /* MW 3 */
+ 2894 "00000100" // /* MW 2 */
+ 2895 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2897 "00101001" // /* MW 3 */
+ 2898 "00010100" // /* MW 2 */
+ 2899 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 40 first
+ 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2901 "10101010" // /* MW 3 */
+ 2902 "11011101" // /* MW 2 */
+ 2903 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 447 34 first
+ 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2905 "00101010" // /* MW 3 */
+ 2906 "00011110" // /* MW 2 */
+ 2907 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 448 34 first
+ 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2909 "11001010" // /* MW 3 */
+ 2910 "10111101" // /* MW 2 */
+ 2911 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2913 "11111010" // /* MW 3 */
+ 2914 "11111101" // /* MW 2 */
+ 2915 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+ 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2917 "01101010" // /* MW 3 */
+ 2918 "00001010" // /* MW 2 */
+ 2919 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 20 first
+ 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2921 "11101010" // /* MW 3 */
+ 2922 "10101100" // /* MW 2 */
+ 2923 "00000010" // /* MW 1 */
+ 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2925 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+ 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2927 "00011101" // /* MW 3 */
+ 2928 "01000010" // /* MW 2 */
+ 2929 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+ 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2931 "00000001" // /* MW 5 */
+ 2932 "00110001" // /* MW 4 */
+ 2933 "11111001" // /* MW 3 */
+ 2934 "00100000" // /* MW 2 */
+ 2935 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2937 "01011101" // /* MW 3 */
+ 2938 "10100100" // /* MW 2 */
+ 2939 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2941 "01000111" // /* MW 3 */
+ 2942 "11110110" // /* MW 2 */
+ 2943 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2945 "00111001" // /* MW 5 */
+ 2946 "10110111" // /* MW 4 */
+ 2947 "01000000" // /* MW 3 */
+ 2948 "01001010" // /* MW 2 */
+ 2949 "11000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2951 "00100010" // /* MW 3 */
+ 2952 "01111011" // /* MW 2 */
+ 2953 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+ 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2955 "01100111" // /* MW 3 */
+ 2956 "11001100" // /* MW 2 */
+ 2957 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+ 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2959 "00000100" // /* MW 3 */
+ 2960 "10110111" // /* MW 2 */
+ 2961 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+ 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2963 "01000001" // /* MW 5 */
+ 2964 "10111011" // /* MW 4 */
+ 2965 "10111100" // /* MW 3 */
+ 2966 "11101011" // /* MW 2 */
+ 2967 "01111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+ 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2969 "00000100" // /* MW 5 */
+ 2970 "10011011" // /* MW 4 */
+ 2971 "10110011" // /* MW 3 */
+ 2972 "10111110" // /* MW 2 */
+ 2973 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+ 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 2975 "00000001" // /* MW 5 */
+ 2976 "01000000" // /* MW 4 */
+ 2977 "11111000" // /* MW 3 */
+ 2978 "00000101" // /* MW 2 */
+ 2979 "11001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+.delay_slot
+ 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2981 "01000111" // /* MW 3 */
+ 2982 "10110110" // /* MW 2 */
+ 2983 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+.delay_slot
+ 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2985 "01000100" // /* MW 3 */
+ 2986 "01110001" // /* MW 2 */
+ 2987 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.delay_slot
+ 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2989 "01011101" // /* MW 3 */
+ 2990 "11111100" // /* MW 2 */
+ 2991 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11 first
+.delay_slot
+ 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2993 "01001101" // /* MW 3 */
+ 2994 "11101000" // /* MW 2 */
+ 2995 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.delay_slot
+ 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2997 "00110010" // /* MW 3 */
+ 2998 "10001100" // /* MW 2 */
+ 2999 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+ 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 3001 "00000001" // /* MW 5 */
+ 3002 "01000000" // /* MW 4 */
+ 3003 "11111000" // /* MW 3 */
+ 3004 "00000101" // /* MW 2 */
+ 3005 "11011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3007 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3013 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3015 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */
+ 3017 "00100000" // /* MW 9 */
+ 3018 "00000000" // /* MW 8 */
+ 3019 "00000000" // /* MW 7 */
+ 3020 "10000100" // /* MW 6 */
+ 3021 "00000001" // /* MW 5 */
+ 3022 "00000000" // /* MW 4 */
+ 3023 "00000000" // /* MW 3 */
+ 3024 "00101111" // /* MW 2 */
+ 3025 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3027 "01011000" // /* MW 9 */
+ 3028 "00001100" // /* MW 8 */
+ 3029 "10001000" // /* MW 7 */
+ 3030 "10101011" // /* MW 6 */
+ 3031 "01010111" // /* MW 5 */
+ 3032 "00111110" // /* MW 4 */
+ 3033 "00000000" // /* MW 3 */
+ 3034 "00011010" // /* MW 2 */
+ 3035 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3037 "01000001" // /* MW 5 */
+ 3038 "00100000" // /* MW 4 */
+ 3039 "00100001" // /* MW 3 */
+ 3040 "01000010" // /* MW 2 */
+ 3041 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.delay_slot
+ 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3043 "00001101" // /* MW 3 */
+ 3044 "00011010" // /* MW 2 */
+ 3045 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.delay_slot
+ 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3047 "00111101" // /* MW 3 */
+ 3048 "00001110" // /* MW 2 */
+ 3049 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3051 "11100010" // /* MW 5 */
+ 3052 "10010001" // /* MW 4 */
+ 3053 "11111111" // /* MW 3 */
+ 3054 "00101100" // /* MW 2 */
+ 3055 "00000000" // /* MW 1 */
+.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3057 "01011000" // /* MW 11 */
+ 3058 "11111100" // /* MW 10 */
+ 3059 "10001111" // /* MW 9 */
+ 3060 "10001000" // /* MW 8 */
+ 3061 "01010000" // /* MW 7 */
+ 3062 "00000001" // /* MW 6 */
+ 3063 "00001011" // /* MW 5 */
+ 3064 "10000010" // /* MW 4 */
+ 3065 "10000001" // /* MW 3 */
+ 3066 "00000010" // /* MW 2 */
+ 3067 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3069 "01011000" // /* MW 9 */
+ 3070 "00001100" // /* MW 8 */
+ 3071 "10001000" // /* MW 7 */
+ 3072 "00001011" // /* MW 6 */
+ 3073 "10100000" // /* MW 5 */
+ 3074 "00000001" // /* MW 4 */
+ 3075 "11100000" // /* MW 3 */
+ 3076 "00011000" // /* MW 2 */
+ 3077 "00100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3079 "01011000" // /* MW 9 */
+ 3080 "00000001" // /* MW 8 */
+ 3081 "11101000" // /* MW 7 */
+ 3082 "10101001" // /* MW 6 */
+ 3083 "01010111" // /* MW 5 */
+ 3084 "00111110" // /* MW 4 */
+ 3085 "00000000" // /* MW 3 */
+ 3086 "00000010" // /* MW 2 */
+ 3087 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+ 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 3089 "00000000" // /* MW 15 */
+ 3090 "00000000" // /* MW 14 */
+ 3091 "01011000" // /* MW 13 */
+ 3092 "00000011" // /* MW 12 */
+ 3093 "10101000" // /* MW 11 */
+ 3094 "11101001" // /* MW 10 */
+ 3095 "01110001" // /* MW 9 */
+ 3096 "00000000" // /* MW 8 */
+ 3097 "01011011" // /* MW 7 */
+ 3098 "00000001" // /* MW 6 */
+ 3099 "00100000" // /* MW 5 */
+ 3100 "00000000" // /* MW 4 */
+ 3101 "11110000" // /* MW 3 */
+ 3102 "00101100" // /* MW 2 */
+ 3103 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.src_ref 2 "conv2d_bf16_params.h" 495 68 first
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+ 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3105 "01011000" // /* MW 9 */
+ 3106 "00111100" // /* MW 8 */
+ 3107 "00000000" // /* MW 7 */
+ 3108 "00111100" // /* MW 6 */
+ 3109 "10110011" // /* MW 5 */
+ 3110 "00011011" // /* MW 4 */
+ 3111 "01010000" // /* MW 3 */
+ 3112 "11000101" // /* MW 2 */
+ 3113 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24 first
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+ 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3115 "01011000" // /* MW 9 */
+ 3116 "11001101" // /* MW 8 */
+ 3117 "10000111" // /* MW 7 */
+ 3118 "00010010" // /* MW 6 */
+ 3119 "00101101" // /* MW 5 */
+ 3120 "00000011" // /* MW 4 */
+ 3121 "01010000" // /* MW 3 */
+ 3122 "00000101" // /* MW 2 */
+ 3123 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18 first
+.src_ref 2 "conv2d_bf16_params.h" 496 68
+.src_ref 2 "conv2d_bf16_params.h" 504 35
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 578 47
+ 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3125 "01011000" // /* MW 9 */
+ 3126 "00110111" // /* MW 8 */
+ 3127 "10000000" // /* MW 7 */
+ 3128 "10010001" // /* MW 6 */
+ 3129 "11011010" // /* MW 5 */
+ 3130 "00111011" // /* MW 4 */
+ 3131 "00000000" // /* MW 3 */
+ 3132 "01010111" // /* MW 2 */
+ 3133 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.src_ref 2 "conv2d_bf16_params.h" 504 45 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+.src_ref 2 "conv2d_bf16_params.h" 519 42
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+ 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3135 "01011000" // /* MW 9 */
+ 3136 "10111100" // /* MW 8 */
+ 3137 "00000111" // /* MW 7 */
+ 3138 "00111101" // /* MW 6 */
+ 3139 "10110000" // /* MW 5 */
+ 3140 "00101011" // /* MW 4 */
+ 3141 "00000000" // /* MW 3 */
+ 3142 "00000011" // /* MW 2 */
+ 3143 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 492 25 first
+.src_ref 2 "conv2d_bf16_params.h" 497 46
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+ 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3145 "01011000" // /* MW 9 */
+ 3146 "01110000" // /* MW 8 */
+ 3147 "10000000" // /* MW 7 */
+ 3148 "01101100" // /* MW 6 */
+ 3149 "01101100" // /* MW 5 */
+ 3150 "00011111" // /* MW 4 */
+ 3151 "00000000" // /* MW 3 */
+ 3152 "00010000" // /* MW 2 */
+ 3153 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 520 34 first
+ 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3155 "01011101" // /* MW 5 */
+ 3156 "00011110" // /* MW 4 */
+ 3157 "00001000" // /* MW 3 */
+ 3158 "10010010" // /* MW 2 */
+ 3159 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+ 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3161 "01011001" // /* MW 9 */
+ 3162 "00110001" // /* MW 8 */
+ 3163 "10000000" // /* MW 7 */
+ 3164 "01101111" // /* MW 6 */
+ 3165 "01100001" // /* MW 5 */
+ 3166 "00101101" // /* MW 4 */
+ 3167 "10110000" // /* MW 3 */
+ 3168 "01011010" // /* MW 2 */
+ 3169 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+.src_ref 2 "conv2d_bf16_params.h" 507 42 first
+ 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3171 "00000101" // /* MW 5 */
+ 3172 "00011111" // /* MW 4 */
+ 3173 "00111100" // /* MW 3 */
+ 3174 "10111010" // /* MW 2 */
+ 3175 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 99 first
+ 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3177 "00010001" // /* MW 3 */
+ 3178 "11000010" // /* MW 2 */
+ 3179 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 610 64
+.src_ref 2 "conv2d_bf16_params.h" 709 96
+ 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3181 "00011101" // /* MW 5 */
+ 3182 "10100000" // /* MW 4 */
+ 3183 "11110000" // /* MW 3 */
+ 3184 "11000011" // /* MW 2 */
+ 3185 "10001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+ 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3187 "00100001" // /* MW 3 */
+ 3188 "10100011" // /* MW 2 */
+ 3189 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96 first
+ 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3191 "00011101" // /* MW 3 */
+ 3192 "11111110" // /* MW 2 */
+ 3193 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 506 48
+.src_ref 2 "conv2d_bf16_params.h" 519 42 first
+ 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3195 "01011001" // /* MW 9 */
+ 3196 "01010111" // /* MW 8 */
+ 3197 "10000000" // /* MW 7 */
+ 3198 "11101110" // /* MW 6 */
+ 3199 "11110001" // /* MW 5 */
+ 3200 "00111011" // /* MW 4 */
+ 3201 "00110000" // /* MW 3 */
+ 3202 "01111110" // /* MW 2 */
+ 3203 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 68 first
+.src_ref 2 "conv2d_bf16_params.h" 504 35 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68
+ 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3205 "01011000" // /* MW 9 */
+ 3206 "10110010" // /* MW 8 */
+ 3207 "10000111" // /* MW 7 */
+ 3208 "00111101" // /* MW 6 */
+ 3209 "00110000" // /* MW 5 */
+ 3210 "00101111" // /* MW 4 */
+ 3211 "01010000" // /* MW 3 */
+ 3212 "01010101" // /* MW 2 */
+ 3213 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3215 "01111011" // /* MW 5 */
+ 3216 "11001100" // /* MW 4 */
+ 3217 "10111001" // /* MW 3 */
+ 3218 "01001110" // /* MW 2 */
+ 3219 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53 first
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3221 "01011000" // /* MW 9 */
+ 3222 "11110110" // /* MW 8 */
+ 3223 "00000000" // /* MW 7 */
+ 3224 "00101101" // /* MW 6 */
+ 3225 "01101011" // /* MW 5 */
+ 3226 "00111111" // /* MW 4 */
+ 3227 "11100000" // /* MW 3 */
+ 3228 "01010100" // /* MW 2 */
+ 3229 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 46 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3231 "01011000" // /* MW 9 */
+ 3232 "01010000" // /* MW 8 */
+ 3233 "10000111" // /* MW 7 */
+ 3234 "00010000" // /* MW 6 */
+ 3235 "00111000" // /* MW 5 */
+ 3236 "00100111" // /* MW 4 */
+ 3237 "01010000" // /* MW 3 */
+ 3238 "01000011" // /* MW 2 */
+ 3239 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3241 "01100111" // /* MW 3 */
+ 3242 "11111110" // /* MW 2 */
+ 3243 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3245 "01100111" // /* MW 3 */
+ 3246 "11100000" // /* MW 2 */
+ 3247 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3249 "00000101" // /* MW 3 */
+ 3250 "11110111" // /* MW 2 */
+ 3251 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3253 "01010100" // /* MW 3 */
+ 3254 "11101011" // /* MW 2 */
+ 3255 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3257 "01100001" // /* MW 5 */
+ 3258 "10100000" // /* MW 4 */
+ 3259 "11011000" // /* MW 3 */
+ 3260 "10100011" // /* MW 2 */
+ 3261 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25 first
+.src_ref 2 "conv2d_bf16_params.h" 507 34
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3263 "01001001" // /* MW 9 */
+ 3264 "10000000" // /* MW 8 */
+ 3265 "11001111" // /* MW 7 */
+ 3266 "01101111" // /* MW 6 */
+ 3267 "00101001" // /* MW 5 */
+ 3268 "00011111" // /* MW 4 */
+ 3269 "10110000" // /* MW 3 */
+ 3270 "01000010" // /* MW 2 */
+ 3271 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47 first
+ 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3273 "00111011" // /* MW 5 */
+ 3274 "01000110" // /* MW 4 */
+ 3275 "00111111" // /* MW 3 */
+ 3276 "11101010" // /* MW 2 */
+ 3277 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3279 "01010000" // /* MW 7 */
+ 3280 "10101000" // /* MW 6 */
+ 3281 "00000000" // /* MW 5 */
+ 3282 "00000010" // /* MW 4 */
+ 3283 "00110000" // /* MW 3 */
+ 3284 "01101010" // /* MW 2 */
+ 3285 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77 first
+.src_ref 2 "conv2d_bf16_params.h" 509 19 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3287 "01111000" // /* MW 11 */
+ 3288 "11001110" // /* MW 10 */
+ 3289 "00001101" // /* MW 9 */
+ 3290 "00101100" // /* MW 8 */
+ 3291 "10110000" // /* MW 7 */
+ 3292 "10100111" // /* MW 6 */
+ 3293 "11110101" // /* MW 5 */
+ 3294 "11100111" // /* MW 4 */
+ 3295 "01010111" // /* MW 3 */
+ 3296 "01001001" // /* MW 2 */
+ 3297 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 19 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3299 "00010101" // /* MW 3 */
+ 3300 "11100011" // /* MW 2 */
+ 3301 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3303 "10000001" // /* MW 3 */
+ 3304 "10110111" // /* MW 2 */
+ 3305 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 47 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3307 "10010000" // /* MW 3 */
+ 3308 "10110000" // /* MW 2 */
+ 3309 "00010100" // /* MW 1 */
+ 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3311 "00000000" // /* MW 1 */
+ 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3313 "00000000" // /* MW 1 */
+ 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3315 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 57 first
+ 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3317 "00100001" // /* MW 3 */
+ 3318 "11100101" // /* MW 2 */
+ 3319 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+ 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3321 "01010001" // /* MW 3 */
+ 3322 "11001010" // /* MW 2 */
+ 3323 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 48 first
+ 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3325 "01001010" // /* MW 3 */
+ 3326 "10101010" // /* MW 2 */
+ 3327 "00000010" // /* MW 1 */
+ 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3329 "00000000" // /* MW 1 */
+ 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3331 "00000000" // /* MW 1 */
+ 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3333 "00000000" // /* MW 1 */
+ 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3335 "00000000" // /* MW 1 */
+ 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3337 "00000000" // /* MW 1 */
+ 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3339 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 62
+ 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3341 "11100001" // /* MW 3 */
+ 3342 "10100100" // /* MW 2 */
+ 3343 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+ 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3345 "10111110" // /* MW 3 */
+ 3346 "10100101" // /* MW 2 */
+ 3347 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45 first
+ 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3349 "00101101" // /* MW 3 */
+ 3350 "10100100" // /* MW 2 */
+ 3351 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3353 "00000000" // /* MW 5 */
+ 3354 "10100000" // /* MW 4 */
+ 3355 "00001101" // /* MW 3 */
+ 3356 "00000001" // /* MW 2 */
+ 3357 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3359 "00100000" // /* MW 3 */
+ 3360 "11100101" // /* MW 2 */
+ 3361 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3363 "00000000" // /* MW 5 */
+ 3364 "10100000" // /* MW 4 */
+ 3365 "00001101" // /* MW 3 */
+ 3366 "11111111" // /* MW 2 */
+ 3367 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 642 99
+ 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3369 "11000001" // /* MW 5 */
+ 3370 "00111111" // /* MW 4 */
+ 3371 "10011001" // /* MW 3 */
+ 3372 "11100100" // /* MW 2 */
+ 3373 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 19 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3375 "11100001" // /* MW 5 */
+ 3376 "10111111" // /* MW 4 */
+ 3377 "10111000" // /* MW 3 */
+ 3378 "11100010" // /* MW 2 */
+ 3379 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 512 64 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122 first
+ 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3381 "00111011" // /* MW 5 */
+ 3382 "11001110" // /* MW 4 */
+ 3383 "00111001" // /* MW 3 */
+ 3384 "11101110" // /* MW 2 */
+ 3385 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3387 "00110001" // /* MW 3 */
+ 3388 "10110101" // /* MW 2 */
+ 3389 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3391 "10101101" // /* MW 3 */
+ 3392 "00101001" // /* MW 2 */
+ 3393 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+ 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3395 "01100101" // /* MW 3 */
+ 3396 "10110101" // /* MW 2 */
+ 3397 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 36 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68 first
+ 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3399 "00100000" // /* MW 5 */
+ 3400 "01101001" // /* MW 4 */
+ 3401 "00111111" // /* MW 3 */
+ 3402 "01101010" // /* MW 2 */
+ 3403 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 65 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62 first
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3405 "10101000" // /* MW 9 */
+ 3406 "10101000" // /* MW 8 */
+ 3407 "11001110" // /* MW 7 */
+ 3408 "01101111" // /* MW 6 */
+ 3409 "01001001" // /* MW 5 */
+ 3410 "00110111" // /* MW 4 */
+ 3411 "01010000" // /* MW 3 */
+ 3412 "01100101" // /* MW 2 */
+ 3413 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3415 "11111001" // /* MW 5 */
+ 3416 "10100011" // /* MW 4 */
+ 3417 "10111000" // /* MW 3 */
+ 3418 "10100011" // /* MW 2 */
+ 3419 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 45 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3421 "00011111" // /* MW 5 */
+ 3422 "01101011" // /* MW 4 */
+ 3423 "11101101" // /* MW 3 */
+ 3424 "01100100" // /* MW 2 */
+ 3425 "01000101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3427 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3429 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3431 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3433 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 48 first
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3435 "11001010" // /* MW 5 */
+ 3436 "10110101" // /* MW 4 */
+ 3437 "10111101" // /* MW 3 */
+ 3438 "01011111" // /* MW 2 */
+ 3439 "10000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3441 "00000001" // /* MW 5 */
+ 3442 "01000000" // /* MW 4 */
+ 3443 "11111000" // /* MW 3 */
+ 3444 "00000110" // /* MW 2 */
+ 3445 "11111000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 76 first
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3447 "11110010" // /* MW 5 */
+ 3448 "10111011" // /* MW 4 */
+ 3449 "11101101" // /* MW 3 */
+ 3450 "01000001" // /* MW 2 */
+ 3451 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3453 "01011101" // /* MW 3 */
+ 3454 "11101011" // /* MW 2 */
+ 3455 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93 first
+.delay_slot
+ 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3457 "00010100" // /* MW 3 */
+ 3458 "01100011" // /* MW 2 */
+ 3459 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.src_ref 2 "conv2d_bf16_params.h" 539 139 first
+.src_ref 2 "conv2d_bf16_params.h" 555 59
+.src_ref 2 "conv2d_bf16_params.h" 559 59
+.src_ref 2 "conv2d_bf16_params.h" 700 17
+.delay_slot
+ 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3461 "01011001" // /* MW 9 */
+ 3462 "00000001" // /* MW 8 */
+ 3463 "00101000" // /* MW 7 */
+ 3464 "00111110" // /* MW 6 */
+ 3465 "10111110" // /* MW 5 */
+ 3466 "00001101" // /* MW 4 */
+ 3467 "00110000" // /* MW 3 */
+ 3468 "01000110" // /* MW 2 */
+ 3469 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3471 "10011100" // /* MW 3 */
+ 3472 "10011011" // /* MW 2 */
+ 3473 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3475 "10010001" // /* MW 3 */
+ 3476 "11100011" // /* MW 2 */
+ 3477 "00000111" // /* MW 1 */
+ 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3479 "00000000" // /* MW 1 */
+ 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3481 "00000000" // /* MW 1 */
+ 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3483 "00000000" // /* MW 1 */
+ 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3485 "00000000" // /* MW 1 */
+ 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3487 "00000000" // /* MW 1 */
+ 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3491 "00000001" // /* MW 5 */
+ 3492 "01000000" // /* MW 4 */
+ 3493 "11111000" // /* MW 3 */
+ 3494 "00000110" // /* MW 2 */
+ 3495 "11100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3501 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3503 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3505 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3507 "01011000" // /* MW 9 */
+ 3508 "01000000" // /* MW 8 */
+ 3509 "00101000" // /* MW 7 */
+ 3510 "10001011" // /* MW 6 */
+ 3511 "00010000" // /* MW 5 */
+ 3512 "00000001" // /* MW 4 */
+ 3513 "00000000" // /* MW 3 */
+ 3514 "10111100" // /* MW 2 */
+ 3515 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3517 "11010010" // /* MW 3 */
+ 3518 "01111110" // /* MW 2 */
+ 3519 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3521 "01100111" // /* MW 3 */
+ 3522 "01110110" // /* MW 2 */
+ 3523 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3525 "00000001" // /* MW 5 */
+ 3526 "10100000" // /* MW 4 */
+ 3527 "01001111" // /* MW 3 */
+ 3528 "00111000" // /* MW 2 */
+ 3529 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 46
+ 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3531 "01010000" // /* MW 3 */
+ 3532 "00110010" // /* MW 2 */
+ 3533 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 44
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3535 "11101111" // /* MW 3 */
+ 3536 "01111101" // /* MW 2 */
+ 3537 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3539 "00111001" // /* MW 5 */
+ 3540 "11000100" // /* MW 4 */
+ 3541 "01011101" // /* MW 3 */
+ 3542 "11100011" // /* MW 2 */
+ 3543 "11001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3545 "10000010" // /* MW 3 */
+ 3546 "11100011" // /* MW 2 */
+ 3547 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 79
+ 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3549 "11101111" // /* MW 3 */
+ 3550 "01100011" // /* MW 2 */
+ 3551 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3553 "11000001" // /* MW 3 */
+ 3554 "11111001" // /* MW 2 */
+ 3555 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3557 "11001110" // /* MW 3 */
+ 3558 "01100011" // /* MW 2 */
+ 3559 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 55 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3561 "00011100" // /* MW 7 */
+ 3562 "00000000" // /* MW 6 */
+ 3563 "00000000" // /* MW 5 */
+ 3564 "10000001" // /* MW 4 */
+ 3565 "00010100" // /* MW 3 */
+ 3566 "00100011" // /* MW 2 */
+ 3567 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3569 "01111000" // /* MW 9 */
+ 3570 "00001110" // /* MW 8 */
+ 3571 "01110000" // /* MW 7 */
+ 3572 "11101011" // /* MW 6 */
+ 3573 "11000111" // /* MW 5 */
+ 3574 "00111111" // /* MW 4 */
+ 3575 "00000000" // /* MW 3 */
+ 3576 "00011001" // /* MW 2 */
+ 3577 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63 first
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3579 "11000010" // /* MW 3 */
+ 3580 "01111111" // /* MW 2 */
+ 3581 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 34 first
+.src_ref 2 "conv2d_bf16_params.h" 641 32 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3583 "10101000" // /* MW 9 */
+ 3584 "01110100" // /* MW 8 */
+ 3585 "01001111" // /* MW 7 */
+ 3586 "10000011" // /* MW 6 */
+ 3587 "00000100" // /* MW 5 */
+ 3588 "00100001" // /* MW 4 */
+ 3589 "00100000" // /* MW 3 */
+ 3590 "01101110" // /* MW 2 */
+ 3591 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 61 first
+.src_ref 2 "conv2d_bf16_params.h" 640 16
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3593 "01011000" // /* MW 9 */
+ 3594 "00001001" // /* MW 8 */
+ 3595 "10101000" // /* MW 7 */
+ 3596 "10000011" // /* MW 6 */
+ 3597 "01000100" // /* MW 5 */
+ 3598 "00101001" // /* MW 4 */
+ 3599 "00000000" // /* MW 3 */
+ 3600 "00011110" // /* MW 2 */
+ 3601 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3603 "11100010" // /* MW 3 */
+ 3604 "01110011" // /* MW 2 */
+ 3605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 47 first
+ 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3607 "10001000" // /* MW 3 */
+ 3608 "11111001" // /* MW 2 */
+ 3609 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 640 16 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3611 "00111101" // /* MW 3 */
+ 3612 "01111011" // /* MW 2 */
+ 3613 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3615 "00010000" // /* MW 9 */
+ 3616 "00000100" // /* MW 8 */
+ 3617 "00001010" // /* MW 7 */
+ 3618 "00000011" // /* MW 6 */
+ 3619 "00000000" // /* MW 5 */
+ 3620 "00000000" // /* MW 4 */
+ 3621 "00100000" // /* MW 3 */
+ 3622 "11011110" // /* MW 2 */
+ 3623 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 641 44 first
+.src_ref 2 "conv2d_bf16_params.h" 642 45 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3625 "11111111" // /* MW 5 */
+ 3626 "00111010" // /* MW 4 */
+ 3627 "10111111" // /* MW 3 */
+ 3628 "11100111" // /* MW 2 */
+ 3629 "11001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3631 "11100110" // /* MW 3 */
+ 3632 "11001111" // /* MW 2 */
+ 3633 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 55 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3635 "00101001" // /* MW 5 */
+ 3636 "10101000" // /* MW 4 */
+ 3637 "00001011" // /* MW 3 */
+ 3638 "11010010" // /* MW 2 */
+ 3639 "10110100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3641 "00000001" // /* MW 5 */
+ 3642 "00100001" // /* MW 4 */
+ 3643 "01001101" // /* MW 3 */
+ 3644 "10110000" // /* MW 2 */
+ 3645 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3647 "00111001" // /* MW 5 */
+ 3648 "11000010" // /* MW 4 */
+ 3649 "00011101" // /* MW 3 */
+ 3650 "10110101" // /* MW 2 */
+ 3651 "00110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 99 first
+ 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3653 "00100100" // /* MW 3 */
+ 3654 "11001111" // /* MW 2 */
+ 3655 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3657 "01000001" // /* MW 5 */
+ 3658 "10100110" // /* MW 4 */
+ 3659 "01001101" // /* MW 3 */
+ 3660 "11011110" // /* MW 2 */
+ 3661 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3663 "01111101" // /* MW 5 */
+ 3664 "00100000" // /* MW 4 */
+ 3665 "01001001" // /* MW 3 */
+ 3666 "00001000" // /* MW 2 */
+ 3667 "00101001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119 first
+ 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3669 "00100100" // /* MW 3 */
+ 3670 "11101111" // /* MW 2 */
+ 3671 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 15 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3673 "01011000" // /* MW 9 */
+ 3674 "01110000" // /* MW 8 */
+ 3675 "01001111" // /* MW 7 */
+ 3676 "01101110" // /* MW 6 */
+ 3677 "01000010" // /* MW 5 */
+ 3678 "00100000" // /* MW 4 */
+ 3679 "00000000" // /* MW 3 */
+ 3680 "00011110" // /* MW 2 */
+ 3681 "11011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3683 "00100010" // /* MW 3 */
+ 3684 "10111101" // /* MW 2 */
+ 3685 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 85 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3687 "01011000" // /* MW 9 */
+ 3688 "00100000" // /* MW 8 */
+ 3689 "00001001" // /* MW 7 */
+ 3690 "11111110" // /* MW 6 */
+ 3691 "10101001" // /* MW 5 */
+ 3692 "00101111" // /* MW 4 */
+ 3693 "00000000" // /* MW 3 */
+ 3694 "00000101" // /* MW 2 */
+ 3695 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3697 "01010010" // /* MW 3 */
+ 3698 "00100000" // /* MW 2 */
+ 3699 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 559 59 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3701 "11110010" // /* MW 5 */
+ 3702 "10111101" // /* MW 4 */
+ 3703 "11111101" // /* MW 3 */
+ 3704 "00001001" // /* MW 2 */
+ 3705 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 41 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3707 "00111001" // /* MW 5 */
+ 3708 "11000100" // /* MW 4 */
+ 3709 "10111101" // /* MW 3 */
+ 3710 "00111111" // /* MW 2 */
+ 3711 "10000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 117 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3713 "01011111" // /* MW 5 */
+ 3714 "01101011" // /* MW 4 */
+ 3715 "10110111" // /* MW 3 */
+ 3716 "11101110" // /* MW 2 */
+ 3717 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+ 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3719 "00110010" // /* MW 3 */
+ 3720 "10000100" // /* MW 2 */
+ 3721 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 52 first
+ 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3723 "00001100" // /* MW 3 */
+ 3724 "01111110" // /* MW 2 */
+ 3725 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 92 first
+ 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3727 "10001111" // /* MW 3 */
+ 3728 "00110001" // /* MW 2 */
+ 3729 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 36 first
+ 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3731 "11000101" // /* MW 3 */
+ 3732 "11110111" // /* MW 2 */
+ 3733 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 64 first
+.src_ref 2 "conv2d_bf16_params.h" 611 47
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 629 82
+ 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3735 "01011000" // /* MW 11 */
+ 3736 "00000000" // /* MW 10 */
+ 3737 "10001001" // /* MW 9 */
+ 3738 "11101110" // /* MW 8 */
+ 3739 "11000000" // /* MW 7 */
+ 3740 "10110111" // /* MW 6 */
+ 3741 "10010101" // /* MW 5 */
+ 3742 "11101110" // /* MW 4 */
+ 3743 "00000111" // /* MW 3 */
+ 3744 "00000011" // /* MW 2 */
+ 3745 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+ 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3747 "00111001" // /* MW 5 */
+ 3748 "10110111" // /* MW 4 */
+ 3749 "01000000" // /* MW 3 */
+ 3750 "00101000" // /* MW 2 */
+ 3751 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3753 "00001100" // /* MW 5 */
+ 3754 "10101100" // /* MW 4 */
+ 3755 "00001111" // /* MW 3 */
+ 3756 "00000000" // /* MW 2 */
+ 3757 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 60 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+ 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3759 "11001001" // /* MW 9 */
+ 3760 "00111111" // /* MW 8 */
+ 3761 "10001001" // /* MW 7 */
+ 3762 "00111100" // /* MW 6 */
+ 3763 "10110000" // /* MW 5 */
+ 3764 "00011111" // /* MW 4 */
+ 3765 "10110000" // /* MW 3 */
+ 3766 "00010010" // /* MW 2 */
+ 3767 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 53
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 555 59 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3769 "11001000" // /* MW 11 */
+ 3770 "01111111" // /* MW 10 */
+ 3771 "11001100" // /* MW 9 */
+ 3772 "10010010" // /* MW 8 */
+ 3773 "11111111" // /* MW 7 */
+ 3774 "10101101" // /* MW 6 */
+ 3775 "10010001" // /* MW 5 */
+ 3776 "00011100" // /* MW 4 */
+ 3777 "10000010" // /* MW 3 */
+ 3778 "10001100" // /* MW 2 */
+ 3779 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 240 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3781 "01111001" // /* MW 9 */
+ 3782 "10001110" // /* MW 8 */
+ 3783 "01110000" // /* MW 7 */
+ 3784 "11101111" // /* MW 6 */
+ 3785 "01010111" // /* MW 5 */
+ 3786 "00101011" // /* MW 4 */
+ 3787 "00110000" // /* MW 3 */
+ 3788 "01011010" // /* MW 2 */
+ 3789 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 53 first
+.src_ref 2 "conv2d_bf16_params.h" 559 53
+.src_ref 2 "conv2d_bf16_params.h" 621 140
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3791 "01011000" // /* MW 11 */
+ 3792 "01011000" // /* MW 10 */
+ 3793 "00000000" // /* MW 9 */
+ 3794 "00001110" // /* MW 8 */
+ 3795 "01001110" // /* MW 7 */
+ 3796 "10101001" // /* MW 6 */
+ 3797 "01010001" // /* MW 5 */
+ 3798 "00011111" // /* MW 4 */
+ 3799 "00000010" // /* MW 3 */
+ 3800 "11011001" // /* MW 2 */
+ 3801 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 53 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3803 "00100100" // /* MW 5 */
+ 3804 "11100011" // /* MW 4 */
+ 3805 "00111111" // /* MW 3 */
+ 3806 "01100010" // /* MW 2 */
+ 3807 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 47 first
+.src_ref 2 "conv2d_bf16_params.h" 621 222
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3809 "01111000" // /* MW 11 */
+ 3810 "10010000" // /* MW 10 */
+ 3811 "01101001" // /* MW 9 */
+ 3812 "00001111" // /* MW 8 */
+ 3813 "11001110" // /* MW 7 */
+ 3814 "10101011" // /* MW 6 */
+ 3815 "10010001" // /* MW 5 */
+ 3816 "11101111" // /* MW 4 */
+ 3817 "00100010" // /* MW 3 */
+ 3818 "01101110" // /* MW 2 */
+ 3819 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 661 61
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3821 "11001000" // /* MW 9 */
+ 3822 "11111111" // /* MW 8 */
+ 3823 "10001100" // /* MW 7 */
+ 3824 "00010010" // /* MW 6 */
+ 3825 "11001110" // /* MW 5 */
+ 3826 "00101001" // /* MW 4 */
+ 3827 "00000000" // /* MW 3 */
+ 3828 "11110011" // /* MW 2 */
+ 3829 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 710 60
+.src_ref 2 "conv2d_bf16_params.h" 710 65
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3831 "01111000" // /* MW 9 */
+ 3832 "10001110" // /* MW 8 */
+ 3833 "01110000" // /* MW 7 */
+ 3834 "01110011" // /* MW 6 */
+ 3835 "11101010" // /* MW 5 */
+ 3836 "00111011" // /* MW 4 */
+ 3837 "00000000" // /* MW 3 */
+ 3838 "00011101" // /* MW 2 */
+ 3839 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3841 "01000100" // /* MW 5 */
+ 3842 "11001010" // /* MW 4 */
+ 3843 "00101110" // /* MW 3 */
+ 3844 "11101110" // /* MW 2 */
+ 3845 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 649 41 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3847 "01111000" // /* MW 9 */
+ 3848 "10010000" // /* MW 8 */
+ 3849 "01101001" // /* MW 7 */
+ 3850 "10010011" // /* MW 6 */
+ 3851 "00111001" // /* MW 5 */
+ 3852 "00111111" // /* MW 4 */
+ 3853 "00000000" // /* MW 3 */
+ 3854 "00011111" // /* MW 2 */
+ 3855 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3857 "00100010" // /* MW 3 */
+ 3858 "11000100" // /* MW 2 */
+ 3859 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 82 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3861 "01010001" // /* MW 3 */
+ 3862 "11101011" // /* MW 2 */
+ 3863 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 611 47 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3865 "01011001" // /* MW 9 */
+ 3866 "11000000" // /* MW 8 */
+ 3867 "01101111" // /* MW 7 */
+ 3868 "10010000" // /* MW 6 */
+ 3869 "00100111" // /* MW 5 */
+ 3870 "00000100" // /* MW 4 */
+ 3871 "00110000" // /* MW 3 */
+ 3872 "10001110" // /* MW 2 */
+ 3873 "01000111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3875 "00110010" // /* MW 3 */
+ 3876 "00111000" // /* MW 2 */
+ 3877 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 643 22 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3879 "01111111" // /* MW 3 */
+ 3880 "11111110" // /* MW 2 */
+ 3881 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3883 "01100100" // /* MW 5 */
+ 3884 "00001100" // /* MW 4 */
+ 3885 "00101110" // /* MW 3 */
+ 3886 "11000110" // /* MW 2 */
+ 3887 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 47 first
+.src_ref 2 "conv2d_bf16_params.h" 629 45
+.src_ref 2 "conv2d_bf16_params.h" 684 30 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3889 "01011001" // /* MW 9 */
+ 3890 "00101000" // /* MW 8 */
+ 3891 "10000000" // /* MW 7 */
+ 3892 "01111100" // /* MW 6 */
+ 3893 "00101001" // /* MW 5 */
+ 3894 "00110101" // /* MW 4 */
+ 3895 "00110000" // /* MW 3 */
+ 3896 "10001110" // /* MW 2 */
+ 3897 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 45 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3899 "11100100" // /* MW 5 */
+ 3900 "00001101" // /* MW 4 */
+ 3901 "00110001" // /* MW 3 */
+ 3902 "01010110" // /* MW 2 */
+ 3903 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 644 22
+.src_ref 2 "conv2d_bf16_params.h" 700 17 first
+.src_ref 2 "conv2d_bf16_params.h" 705 50
+.src_ref 2 "conv2d_bf16_params.h" 705 61
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3905 "10101000" // /* MW 9 */
+ 3906 "11111100" // /* MW 8 */
+ 3907 "10101001" // /* MW 7 */
+ 3908 "11111110" // /* MW 6 */
+ 3909 "00111000" // /* MW 5 */
+ 3910 "00000110" // /* MW 4 */
+ 3911 "00100000" // /* MW 3 */
+ 3912 "00000010" // /* MW 2 */
+ 3913 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 38 first
+.src_ref 2 "conv2d_bf16_params.h" 700 111
+.src_ref 2 "conv2d_bf16_params.h" 700 149
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3915 "00000110" // /* MW 9 */
+ 3916 "00000110" // /* MW 8 */
+ 3917 "00000101" // /* MW 7 */
+ 3918 "10000000" // /* MW 6 */
+ 3919 "00010001" // /* MW 5 */
+ 3920 "00011111" // /* MW 4 */
+ 3921 "00100010" // /* MW 3 */
+ 3922 "11000110" // /* MW 2 */
+ 3923 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14
+.src_ref 2 "conv2d_bf16_params.h" 649 38 first
+.src_ref 2 "conv2d_bf16_params.h" 674 24
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3925 "00010001" // /* MW 9 */
+ 3926 "11111000" // /* MW 8 */
+ 3927 "01101111" // /* MW 7 */
+ 3928 "00111110" // /* MW 6 */
+ 3929 "00000000" // /* MW 5 */
+ 3930 "00000000" // /* MW 4 */
+ 3931 "00110000" // /* MW 3 */
+ 3932 "11001110" // /* MW 2 */
+ 3933 "01001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14 first
+.src_ref 2 "conv2d_bf16_params.h" 662 61
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3935 "11001001" // /* MW 9 */
+ 3936 "10111111" // /* MW 8 */
+ 3937 "01001011" // /* MW 7 */
+ 3938 "10100100" // /* MW 6 */
+ 3939 "01001001" // /* MW 5 */
+ 3940 "00111111" // /* MW 4 */
+ 3941 "00110000" // /* MW 3 */
+ 3942 "11010010" // /* MW 2 */
+ 3943 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 663 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3945 "10011100" // /* MW 5 */
+ 3946 "01010110" // /* MW 4 */
+ 3947 "00110001" // /* MW 3 */
+ 3948 "11000110" // /* MW 2 */
+ 3949 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+ 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3951 "10000001" // /* MW 5 */
+ 3952 "01111010" // /* MW 4 */
+ 3953 "00111111" // /* MW 3 */
+ 3954 "10001010" // /* MW 2 */
+ 3955 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3957 "11100011" // /* MW 5 */
+ 3958 "01110011" // /* MW 4 */
+ 3959 "00111000" // /* MW 3 */
+ 3960 "11111010" // /* MW 2 */
+ 3961 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3963 "01011001" // /* MW 9 */
+ 3964 "00000000" // /* MW 8 */
+ 3965 "01100000" // /* MW 7 */
+ 3966 "00110000" // /* MW 6 */
+ 3967 "11111000" // /* MW 5 */
+ 3968 "00101101" // /* MW 4 */
+ 3969 "00110000" // /* MW 3 */
+ 3970 "11010110" // /* MW 2 */
+ 3971 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3973 "11001001" // /* MW 9 */
+ 3974 "01111111" // /* MW 8 */
+ 3975 "00101100" // /* MW 7 */
+ 3976 "01111110" // /* MW 6 */
+ 3977 "00100000" // /* MW 5 */
+ 3978 "00111110" // /* MW 4 */
+ 3979 "00110000" // /* MW 3 */
+ 3980 "10001100" // /* MW 2 */
+ 3981 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 705 50 first
+.src_ref 2 "conv2d_bf16_params.h" 705 61 first
+ 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3983 "00001100" // /* MW 5 */
+ 3984 "10111000" // /* MW 4 */
+ 3985 "00111000" // /* MW 3 */
+ 3986 "10001100" // /* MW 2 */
+ 3987 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10
+.src_ref 2 "conv2d_bf16_params.h" 674 24 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.src_ref 2 "conv2d_bf16_params.h" 720 50
+ 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3989 "01011001" // /* MW 9 */
+ 3990 "00000000" // /* MW 8 */
+ 3991 "01001000" // /* MW 7 */
+ 3992 "00100100" // /* MW 6 */
+ 3993 "00000001" // /* MW 5 */
+ 3994 "00100111" // /* MW 4 */
+ 3995 "00110000" // /* MW 3 */
+ 3996 "11011010" // /* MW 2 */
+ 3997 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3999 "01111001" // /* MW 9 */
+ 4000 "00001110" // /* MW 8 */
+ 4001 "01110000" // /* MW 7 */
+ 4002 "10001111" // /* MW 6 */
+ 4003 "00011111" // /* MW 5 */
+ 4004 "00000101" // /* MW 4 */
+ 4005 "00110000" // /* MW 3 */
+ 4006 "11110010" // /* MW 2 */
+ 4007 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 707 61 first
+ 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4009 "11011111" // /* MW 5 */
+ 4010 "10111001" // /* MW 4 */
+ 4011 "00111011" // /* MW 3 */
+ 4012 "10010010" // /* MW 2 */
+ 4013 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 674 22 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4015 "01011001" // /* MW 9 */
+ 4016 "00000110" // /* MW 8 */
+ 4017 "00001000" // /* MW 7 */
+ 4018 "10001100" // /* MW 6 */
+ 4019 "00001111" // /* MW 5 */
+ 4020 "00100001" // /* MW 4 */
+ 4021 "00110000" // /* MW 3 */
+ 4022 "11000110" // /* MW 2 */
+ 4023 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4025 "01111000" // /* MW 11 */
+ 4026 "10010000" // /* MW 10 */
+ 4027 "01101001" // /* MW 9 */
+ 4028 "00010011" // /* MW 8 */
+ 4029 "00000000" // /* MW 7 */
+ 4030 "10011011" // /* MW 6 */
+ 4031 "00010001" // /* MW 5 */
+ 4032 "00011110" // /* MW 4 */
+ 4033 "00000010" // /* MW 3 */
+ 4034 "00000000" // /* MW 2 */
+ 4035 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4037 "10100100" // /* MW 5 */
+ 4038 "00010100" // /* MW 4 */
+ 4039 "00100000" // /* MW 3 */
+ 4040 "00010110" // /* MW 2 */
+ 4041 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 691 56 first
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4043 "10101111" // /* MW 3 */
+ 4044 "01100011" // /* MW 2 */
+ 4045 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 709 71 first
+ 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4047 "01011001" // /* MW 9 */
+ 4048 "11001000" // /* MW 8 */
+ 4049 "00000111" // /* MW 7 */
+ 4050 "01101101" // /* MW 6 */
+ 4051 "00001000" // /* MW 5 */
+ 4052 "00000111" // /* MW 4 */
+ 4053 "00110000" // /* MW 3 */
+ 4054 "10001100" // /* MW 2 */
+ 4055 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 706 23 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4057 "11001000" // /* MW 11 */
+ 4058 "11000001" // /* MW 10 */
+ 4059 "10101000" // /* MW 9 */
+ 4060 "11101101" // /* MW 8 */
+ 4061 "11110111" // /* MW 7 */
+ 4062 "10100000" // /* MW 6 */
+ 4063 "01100001" // /* MW 5 */
+ 4064 "01001000" // /* MW 4 */
+ 4065 "00000010" // /* MW 3 */
+ 4066 "01100011" // /* MW 2 */
+ 4067 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 682 38 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4069 "01111011" // /* MW 5 */
+ 4070 "11000000" // /* MW 4 */
+ 4071 "00110110" // /* MW 3 */
+ 4072 "00001010" // /* MW 2 */
+ 4073 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+ 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4075 "01000001" // /* MW 5 */
+ 4076 "10001110" // /* MW 4 */
+ 4077 "00111000" // /* MW 3 */
+ 4078 "11011010" // /* MW 2 */
+ 4079 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+ 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4081 "10011100" // /* MW 5 */
+ 4082 "11001000" // /* MW 4 */
+ 4083 "00111000" // /* MW 3 */
+ 4084 "11001010" // /* MW 2 */
+ 4085 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4087 "11011011" // /* MW 5 */
+ 4088 "10010100" // /* MW 4 */
+ 4089 "00110010" // /* MW 3 */
+ 4090 "10010010" // /* MW 2 */
+ 4091 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 706 28 first
+ 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4093 "01011001" // /* MW 9 */
+ 4094 "11111101" // /* MW 8 */
+ 4095 "00001111" // /* MW 7 */
+ 4096 "00000100" // /* MW 6 */
+ 4097 "00111000" // /* MW 5 */
+ 4098 "00011010" // /* MW 4 */
+ 4099 "00110000" // /* MW 3 */
+ 4100 "10001110" // /* MW 2 */
+ 4101 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4103 "00001110" // /* MW 3 */
+ 4104 "11000000" // /* MW 2 */
+ 4105 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 707 66 first
+ 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4107 "00011111" // /* MW 5 */
+ 4108 "00010000" // /* MW 4 */
+ 4109 "00110111" // /* MW 3 */
+ 4110 "11001010" // /* MW 2 */
+ 4111 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 709 96 first
+ 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4113 "00111011" // /* MW 5 */
+ 4114 "00001100" // /* MW 4 */
+ 4115 "00110000" // /* MW 3 */
+ 4116 "10001100" // /* MW 2 */
+ 4117 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 709 90
+ 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4119 "00110001" // /* MW 9 */
+ 4120 "11000110" // /* MW 8 */
+ 4121 "00000011" // /* MW 7 */
+ 4122 "10000000" // /* MW 6 */
+ 4123 "01100001" // /* MW 5 */
+ 4124 "00011100" // /* MW 4 */
+ 4125 "00100010" // /* MW 3 */
+ 4126 "10110110" // /* MW 2 */
+ 4127 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 707 50 first
+.src_ref 2 "conv2d_bf16_params.h" 708 59
+.src_ref 2 "conv2d_bf16_params.h" 710 60 first
+.src_ref 2 "conv2d_bf16_params.h" 710 65 first
+ 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4129 "11001000" // /* MW 11 */
+ 4130 "00111111" // /* MW 10 */
+ 4131 "00101000" // /* MW 9 */
+ 4132 "00110000" // /* MW 8 */
+ 4133 "01110000" // /* MW 7 */
+ 4134 "10111010" // /* MW 6 */
+ 4135 "10010001" // /* MW 5 */
+ 4136 "00011100" // /* MW 4 */
+ 4137 "00100010" // /* MW 3 */
+ 4138 "00111010" // /* MW 2 */
+ 4139 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 708 48 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4141 "10101111" // /* MW 9 */
+ 4142 "01000001" // /* MW 8 */
+ 4143 "00000001" // /* MW 7 */
+ 4144 "10000000" // /* MW 6 */
+ 4145 "00110001" // /* MW 5 */
+ 4146 "00011100" // /* MW 4 */
+ 4147 "00100010" // /* MW 3 */
+ 4148 "10111110" // /* MW 2 */
+ 4149 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 709 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+ 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4151 "00000000" // /* MW 5 */
+ 4152 "01010000" // /* MW 4 */
+ 4153 "00110000" // /* MW 3 */
+ 4154 "10001110" // /* MW 2 */
+ 4155 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 710 50 first
+.delay_slot
+ 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4157 "11110001" // /* MW 3 */
+ 4158 "01011100" // /* MW 2 */
+ 4159 "00001010" // /* MW 1 */
+.delay_slot
+ 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4161 "00010001" // /* MW 3 */
+ 4162 "00011100" // /* MW 2 */
+ 4163 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48 first
+.delay_slot
+ 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4165 "01010001" // /* MW 3 */
+ 4166 "00011100" // /* MW 2 */
+ 4167 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.delay_slot
+ 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4169 "01010001" // /* MW 3 */
+ 4170 "00000100" // /* MW 2 */
+ 4171 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 720 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+.delay_slot
+ 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4173 "01110001" // /* MW 9 */
+ 4174 "00000000" // /* MW 8 */
+ 4175 "00000000" // /* MW 7 */
+ 4176 "00000000" // /* MW 6 */
+ 4177 "11111110" // /* MW 5 */
+ 4178 "00111111" // /* MW 4 */
+ 4179 "00110000" // /* MW 3 */
+ 4180 "10001010" // /* MW 2 */
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0
+ 4181 "01000010" // /* MW 1 */
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 689 first
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 704 12
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.function_start
+ 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4193 "01111000" // /* MW 11 */
+ 4194 "01100000" // /* MW 10 */
+ 4195 "00001010" // /* MW 9 */
+ 4196 "00001000" // /* MW 8 */
+ 4197 "10000000" // /* MW 7 */
+ 4198 "00000001" // /* MW 6 */
+ 4199 "10001011" // /* MW 5 */
+ 4200 "10000100" // /* MW 4 */
+ 4201 "10000010" // /* MW 3 */
+ 4202 "00000011" // /* MW 2 */
+ 4203 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 526 11
+.src_ref 2 "conv2d_bf16.h" 698 28 first
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+ 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4205 "01100000" // /* MW 13 */
+ 4206 "00001001" // /* MW 12 */
+ 4207 "00100000" // /* MW 11 */
+ 4208 "00100001" // /* MW 10 */
+ 4209 "00000000" // /* MW 9 */
+ 4210 "00110110" // /* MW 8 */
+ 4211 "00000001" // /* MW 7 */
+ 4212 "00110100" // /* MW 6 */
+ 4213 "00101000" // /* MW 5 */
+ 4214 "00101000" // /* MW 4 */
+ 4215 "10001000" // /* MW 3 */
+ 4216 "00000110" // /* MW 2 */
+ 4217 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4219 "00010000" // /* MW 9 */
+ 4220 "00110100" // /* MW 8 */
+ 4221 "00110010" // /* MW 7 */
+ 4222 "11110010" // /* MW 6 */
+ 4223 "00000001" // /* MW 5 */
+ 4224 "00000000" // /* MW 4 */
+ 4225 "11010000" // /* MW 3 */
+ 4226 "10010100" // /* MW 2 */
+ 4227 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 43
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+ 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4229 "00010000" // /* MW 9 */
+ 4230 "01111000" // /* MW 8 */
+ 4231 "01111000" // /* MW 7 */
+ 4232 "00000100" // /* MW 6 */
+ 4233 "00000000" // /* MW 5 */
+ 4234 "00000000" // /* MW 4 */
+ 4235 "11010000" // /* MW 3 */
+ 4236 "10010000" // /* MW 2 */
+ 4237 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 699 43 first
+.src_ref 2 "conv2d_bf16.h" 702 4
+ 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4239 "00010000" // /* MW 9 */
+ 4240 "10010000" // /* MW 8 */
+ 4241 "10111000" // /* MW 7 */
+ 4242 "00000101" // /* MW 6 */
+ 4243 "00000000" // /* MW 5 */
+ 4244 "00000000" // /* MW 4 */
+ 4245 "11010000" // /* MW 3 */
+ 4246 "10000000" // /* MW 2 */
+ 4247 "01100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 702 37 first
+ 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4249 "00000001" // /* MW 5 */
+ 4250 "00000000" // /* MW 4 */
+ 4251 "11010001" // /* MW 3 */
+ 4252 "10000010" // /* MW 2 */
+ 4253 "01111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4255 "00100010" // /* MW 3 */
+ 4256 "00000100" // /* MW 2 */
+ 4257 "00000100" // /* MW 1 */
+ 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4259 "00000000" // /* MW 1 */
+ 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4261 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+ 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4263 "00000001" // /* MW 5 */
+ 4264 "10000101" // /* MW 4 */
+ 4265 "10000000" // /* MW 3 */
+ 4266 "00001010" // /* MW 2 */
+ 4267 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+ 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4269 "00010100" // /* MW 3 */
+ 4270 "00110000" // /* MW 2 */
+ 4271 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4273 "00010100" // /* MW 3 */
+ 4274 "00010100" // /* MW 2 */
+ 4275 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4277 "11111101" // /* MW 5 */
+ 4278 "11100000" // /* MW 4 */
+ 4279 "10001010" // /* MW 3 */
+ 4280 "00001010" // /* MW 2 */
+ 4281 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4283 "00000000" // /* MW 5 */
+ 4284 "11110101" // /* MW 4 */
+ 4285 "10000000" // /* MW 3 */
+ 4286 "00000010" // /* MW 2 */
+ 4287 "11000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4289 "00010100" // /* MW 3 */
+ 4290 "00010100" // /* MW 2 */
+ 4291 "00111100" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4293 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4295 "01111110" // /* MW 9 */
+ 4296 "10100101" // /* MW 8 */
+ 4297 "00000001" // /* MW 7 */
+ 4298 "00000000" // /* MW 6 */
+ 4299 "01010100" // /* MW 5 */
+ 4300 "00000000" // /* MW 4 */
+ 4301 "11110000" // /* MW 3 */
+ 4302 "00101100" // /* MW 2 */
+ 4303 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4305 "00000000" // /* MW 15 */
+ 4306 "00000000" // /* MW 14 */
+ 4307 "01111000" // /* MW 13 */
+ 4308 "11000101" // /* MW 12 */
+ 4309 "00000001" // /* MW 11 */
+ 4310 "00000000" // /* MW 10 */
+ 4311 "00000000" // /* MW 9 */
+ 4312 "00000000" // /* MW 8 */
+ 4313 "01011011" // /* MW 7 */
+ 4314 "00000001" // /* MW 6 */
+ 4315 "00101000" // /* MW 5 */
+ 4316 "01100000" // /* MW 4 */
+ 4317 "11111100" // /* MW 3 */
+ 4318 "00101100" // /* MW 2 */
+ 4319 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4321 "00000000" // /* MW 15 */
+ 4322 "00000000" // /* MW 14 */
+ 4323 "01111000" // /* MW 13 */
+ 4324 "11000101" // /* MW 12 */
+ 4325 "01000000" // /* MW 11 */
+ 4326 "00000000" // /* MW 10 */
+ 4327 "00000000" // /* MW 9 */
+ 4328 "00000000" // /* MW 8 */
+ 4329 "01011011" // /* MW 7 */
+ 4330 "00000001" // /* MW 6 */
+ 4331 "00100000" // /* MW 5 */
+ 4332 "00000000" // /* MW 4 */
+ 4333 "11110000" // /* MW 3 */
+ 4334 "00101100" // /* MW 2 */
+ 4335 "00000000" // /* MW 1 */
+.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4337 "00000000" // /* MW 15 */
+ 4338 "00000000" // /* MW 14 */
+ 4339 "01111000" // /* MW 13 */
+ 4340 "10100101" // /* MW 12 */
+ 4341 "00000001" // /* MW 11 */
+ 4342 "00000000" // /* MW 10 */
+ 4343 "00000000" // /* MW 9 */
+ 4344 "00000000" // /* MW 8 */
+ 4345 "01011011" // /* MW 7 */
+ 4346 "00000001" // /* MW 6 */
+ 4347 "00101000" // /* MW 5 */
+ 4348 "00101000" // /* MW 4 */
+ 4349 "11111000" // /* MW 3 */
+ 4350 "00101100" // /* MW 2 */
+ 4351 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4353 "00000000" // /* MW 15 */
+ 4354 "00000000" // /* MW 14 */
+ 4355 "01111000" // /* MW 13 */
+ 4356 "10100101" // /* MW 12 */
+ 4357 "00000001" // /* MW 11 */
+ 4358 "00000000" // /* MW 10 */
+ 4359 "00000000" // /* MW 9 */
+ 4360 "00000000" // /* MW 8 */
+ 4361 "00000011" // /* MW 7 */
+ 4362 "10000000" // /* MW 6 */
+ 4363 "10101101" // /* MW 5 */
+ 4364 "00000000" // /* MW 4 */
+ 4365 "11110000" // /* MW 3 */
+ 4366 "00101100" // /* MW 2 */
+ 4367 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4369 "00000000" // /* MW 15 */
+ 4370 "00000000" // /* MW 14 */
+ 4371 "01111000" // /* MW 13 */
+ 4372 "11000101" // /* MW 12 */
+ 4373 "00000001" // /* MW 11 */
+ 4374 "00000000" // /* MW 10 */
+ 4375 "00000000" // /* MW 9 */
+ 4376 "00000000" // /* MW 8 */
+ 4377 "00000011" // /* MW 7 */
+ 4378 "00000000" // /* MW 6 */
+ 4379 "00101001" // /* MW 5 */
+ 4380 "01100000" // /* MW 4 */
+ 4381 "11111100" // /* MW 3 */
+ 4382 "00101100" // /* MW 2 */
+ 4383 "00000000" // /* MW 1 */
+.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 3 "utils.h" 531 4 first
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4385 "00000000" // /* MW 15 */
+ 4386 "00000000" // /* MW 14 */
+ 4387 "01111000" // /* MW 13 */
+ 4388 "11000101" // /* MW 12 */
+ 4389 "01000000" // /* MW 11 */
+ 4390 "00000000" // /* MW 10 */
+ 4391 "00000000" // /* MW 9 */
+ 4392 "00000000" // /* MW 8 */
+ 4393 "00000011" // /* MW 7 */
+ 4394 "00000000" // /* MW 6 */
+ 4395 "00100011" // /* MW 5 */
+ 4396 "00000000" // /* MW 4 */
+ 4397 "11110000" // /* MW 3 */
+ 4398 "00101100" // /* MW 2 */
+ 4399 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4401 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4403 "00000011" // /* MW 3 */
+ 4404 "10000000" // /* MW 2 */
+ 4405 "00001101" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4407 "01110000" // /* MW 7 */
+ 4408 "11000101" // /* MW 6 */
+ 4409 "00000001" // /* MW 5 */
+ 4410 "00000000" // /* MW 4 */
+ 4411 "01100000" // /* MW 3 */
+ 4412 "00000000" // /* MW 2 */
+ 4413 "00100000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4415 "10001010" // /* MW 3 */
+ 4416 "10000001" // /* MW 2 */
+ 4417 "00011000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4419 "00000011" // /* MW 3 */
+ 4420 "00000000" // /* MW 2 */
+ 4421 "00001011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+ 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4423 "01110000" // /* MW 7 */
+ 4424 "11000101" // /* MW 6 */
+ 4425 "00000001" // /* MW 5 */
+ 4426 "00000000" // /* MW 4 */
+ 4427 "01100000" // /* MW 3 */
+ 4428 "00000000" // /* MW 2 */
+ 4429 "10110000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+ 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4431 "01110000" // /* MW 7 */
+ 4432 "11000101" // /* MW 6 */
+ 4433 "01000000" // /* MW 5 */
+ 4434 "00000000" // /* MW 4 */
+ 4435 "01100000" // /* MW 3 */
+ 4436 "00000000" // /* MW 2 */
+ 4437 "00100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4439 "00000011" // /* MW 3 */
+ 4440 "00000000" // /* MW 2 */
+ 4441 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.src_ref 2 "conv2d_bf16.h" 723 first
+ 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4443 "00000000" // /* MW 5 */
+ 4444 "01010000" // /* MW 4 */
+ 4445 "01100000" // /* MW 3 */
+ 4446 "00000000" // /* MW 2 */
+ 4447 "10110000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.delay_slot
+ 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4449 "00000011" // /* MW 3 */
+ 4450 "00000000" // /* MW 2 */
+ 4451 "00001001" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+.delay_slot
+ 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4453 "00000011" // /* MW 3 */
+ 4454 "00000000" // /* MW 2 */
+ 4455 "00001011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4457 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4459 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0
+ 4461 "00000000" // /* MW 1 */
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.function_start
+ 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4465 "01100000" // /* MW 13 */
+ 4466 "00010001" // /* MW 12 */
+ 4467 "10010001" // /* MW 11 */
+ 4468 "00001110" // /* MW 10 */
+ 4469 "00000000" // /* MW 9 */
+ 4470 "00000000" // /* MW 8 */
+ 4471 "10000000" // /* MW 7 */
+ 4472 "00000000" // /* MW 6 */
+ 4473 "00100000" // /* MW 5 */
+ 4474 "00111111" // /* MW 4 */
+ 4475 "10000110" // /* MW 3 */
+ 4476 "11100000" // /* MW 2 */
+ 4477 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 241 95
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4479 "01111000" // /* MW 11 */
+ 4480 "01100000" // /* MW 10 */
+ 4481 "00101011" // /* MW 9 */
+ 4482 "00001010" // /* MW 8 */
+ 4483 "11000101" // /* MW 7 */
+ 4484 "10111111" // /* MW 6 */
+ 4485 "10010101" // /* MW 5 */
+ 4486 "11110001" // /* MW 4 */
+ 4487 "00000111" // /* MW 3 */
+ 4488 "01110011" // /* MW 2 */
+ 4489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 94
+.src_ref 2 "conv2d_bf16_params.h" 242 100
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 245 28
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4491 "00001000" // /* MW 11 */
+ 4492 "01000111" // /* MW 10 */
+ 4493 "00110100" // /* MW 9 */
+ 4494 "00101001" // /* MW 8 */
+ 4495 "00010000" // /* MW 7 */
+ 4496 "10000001" // /* MW 6 */
+ 4497 "00110101" // /* MW 5 */
+ 4498 "11011010" // /* MW 4 */
+ 4499 "00000111" // /* MW 3 */
+ 4500 "00011001" // /* MW 2 */
+ 4501 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 240 68 first
+ 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4503 "00010000" // /* MW 11 */
+ 4504 "00000000" // /* MW 10 */
+ 4505 "10101000" // /* MW 9 */
+ 4506 "00000011" // /* MW 8 */
+ 4507 "01000000" // /* MW 7 */
+ 4508 "10000000" // /* MW 6 */
+ 4509 "00110101" // /* MW 5 */
+ 4510 "11110101" // /* MW 4 */
+ 4511 "11010111" // /* MW 3 */
+ 4512 "11001010" // /* MW 2 */
+ 4513 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.src_ref 2 "conv2d_bf16_params.h" 245 20
+ 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4515 "10010000" // /* MW 11 */
+ 4516 "11111111" // /* MW 10 */
+ 4517 "11101111" // /* MW 9 */
+ 4518 "11111111" // /* MW 8 */
+ 4519 "01111111" // /* MW 7 */
+ 4520 "10000000" // /* MW 6 */
+ 4521 "11010101" // /* MW 5 */
+ 4522 "11111101" // /* MW 4 */
+ 4523 "10000111" // /* MW 3 */
+ 4524 "00011000" // /* MW 2 */
+ 4525 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4527 "01011000" // /* MW 11 */
+ 4528 "11101100" // /* MW 10 */
+ 4529 "00000111" // /* MW 9 */
+ 4530 "00001010" // /* MW 8 */
+ 4531 "01100001" // /* MW 7 */
+ 4532 "10000001" // /* MW 6 */
+ 4533 "10110101" // /* MW 5 */
+ 4534 "11100001" // /* MW 4 */
+ 4535 "00000111" // /* MW 3 */
+ 4536 "10110100" // /* MW 2 */
+ 4537 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.src_ref 2 "conv2d_bf16_params.h" 250 71
+ 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4539 "01011000" // /* MW 11 */
+ 4540 "11000100" // /* MW 10 */
+ 4541 "10000111" // /* MW 9 */
+ 4542 "11001010" // /* MW 8 */
+ 4543 "01110111" // /* MW 7 */
+ 4544 "10000111" // /* MW 6 */
+ 4545 "11110101" // /* MW 5 */
+ 4546 "11101101" // /* MW 4 */
+ 4547 "00000111" // /* MW 3 */
+ 4548 "10010101" // /* MW 2 */
+ 4549 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44
+ 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4551 "01010000" // /* MW 7 */
+ 4552 "01000000" // /* MW 6 */
+ 4553 "10000000" // /* MW 5 */
+ 4554 "00000011" // /* MW 4 */
+ 4555 "10110000" // /* MW 3 */
+ 4556 "01110011" // /* MW 2 */
+ 4557 "11111111" // /* MW 1 */
+ 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4559 "00111101" // /* MW 3 */
+ 4560 "11100100" // /* MW 2 */
+ 4561 "00001111" // /* MW 1 */
+ 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4563 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+ 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4565 "00100000" // /* MW 3 */
+ 4566 "01011001" // /* MW 2 */
+ 4567 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+ 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4569 "10011011" // /* MW 5 */
+ 4570 "01110111" // /* MW 4 */
+ 4571 "00110110" // /* MW 3 */
+ 4572 "00110010" // /* MW 2 */
+ 4573 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54 first
+.src_ref 2 "conv2d_bf16_params.h" 242 94 first
+ 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4575 "00101111" // /* MW 5 */
+ 4576 "11110010" // /* MW 4 */
+ 4577 "01011110" // /* MW 3 */
+ 4578 "11111001" // /* MW 2 */
+ 4579 "01011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 20 first
+ 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4581 "00101010" // /* MW 3 */
+ 4582 "11001001" // /* MW 2 */
+ 4583 "00000010" // /* MW 1 */
+ 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4585 "00000000" // /* MW 1 */
+ 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4587 "00000000" // /* MW 1 */
+ 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4589 "00000000" // /* MW 1 */
+ 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4591 "00000000" // /* MW 1 */
+ 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4593 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 174 first
+ 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4595 "11101100" // /* MW 3 */
+ 4596 "01110111" // /* MW 2 */
+ 4597 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+ 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4599 "00110010" // /* MW 3 */
+ 4600 "01011101" // /* MW 2 */
+ 4601 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4603 "11001100" // /* MW 3 */
+ 4604 "11110110" // /* MW 2 */
+ 4605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4607 "11001111" // /* MW 5 */
+ 4608 "10110111" // /* MW 4 */
+ 4609 "11101110" // /* MW 3 */
+ 4610 "01110000" // /* MW 2 */
+ 4611 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 100 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4613 "00011101" // /* MW 3 */
+ 4614 "01111111" // /* MW 2 */
+ 4615 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4617 "11100010" // /* MW 3 */
+ 4618 "01011000" // /* MW 2 */
+ 4619 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 98
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4621 "11000101" // /* MW 3 */
+ 4622 "11111001" // /* MW 2 */
+ 4623 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 151
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4625 "01100010" // /* MW 5 */
+ 4626 "00111100" // /* MW 4 */
+ 4627 "10011110" // /* MW 3 */
+ 4628 "11111101" // /* MW 2 */
+ 4629 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4631 "11000010" // /* MW 3 */
+ 4632 "01111001" // /* MW 2 */
+ 4633 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+ 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4635 "11001100" // /* MW 3 */
+ 4636 "01111111" // /* MW 2 */
+ 4637 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 117 first
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+ 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4639 "11010001" // /* MW 5 */
+ 4640 "11110111" // /* MW 4 */
+ 4641 "00111110" // /* MW 3 */
+ 4642 "01111110" // /* MW 2 */
+ 4643 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44 first
+.src_ref 2 "conv2d_bf16_params.h" 245 28 first
+ 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4645 "00110001" // /* MW 5 */
+ 4646 "10110010" // /* MW 4 */
+ 4647 "01010100" // /* MW 3 */
+ 4648 "01111001" // /* MW 2 */
+ 4649 "01011101" // /* MW 1 */
+ 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4651 "00000000" // /* MW 1 */
+ 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4653 "00000000" // /* MW 1 */
+ 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4655 "00000000" // /* MW 1 */
+ 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4657 "00000000" // /* MW 1 */
+ 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4659 "00000000" // /* MW 1 */
+ 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4661 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+ 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4663 "11111100" // /* MW 5 */
+ 4664 "10111110" // /* MW 4 */
+ 4665 "00011111" // /* MW 3 */
+ 4666 "10101101" // /* MW 2 */
+ 4667 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4669 "00000001" // /* MW 5 */
+ 4670 "01000000" // /* MW 4 */
+ 4671 "01000000" // /* MW 3 */
+ 4672 "00001001" // /* MW 2 */
+ 4673 "01100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.delay_slot
+ 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4675 "01001000" // /* MW 3 */
+ 4676 "10010011" // /* MW 2 */
+ 4677 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4679 "10010000" // /* MW 3 */
+ 4680 "11111110" // /* MW 2 */
+ 4681 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4683 "01100100" // /* MW 3 */
+ 4684 "01101101" // /* MW 2 */
+ 4685 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4687 "01111100" // /* MW 3 */
+ 4688 "11101111" // /* MW 2 */
+ 4689 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 132
+.delay_slot
+ 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4691 "01100100" // /* MW 3 */
+ 4692 "11100001" // /* MW 2 */
+ 4693 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4695 "00000001" // /* MW 5 */
+ 4696 "01000000" // /* MW 4 */
+ 4697 "01000000" // /* MW 3 */
+ 4698 "00001001" // /* MW 2 */
+ 4699 "11101000" // /* MW 1 */
+.delay_slot
+ 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4701 "00011101" // /* MW 3 */
+ 4702 "11101011" // /* MW 2 */
+ 4703 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4705 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4707 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4709 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4711 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+ 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */
+ 4713 "00100000" // /* MW 9 */
+ 4714 "00000000" // /* MW 8 */
+ 4715 "00000000" // /* MW 7 */
+ 4716 "01010110" // /* MW 6 */
+ 4717 "00000010" // /* MW 5 */
+ 4718 "00000000" // /* MW 4 */
+ 4719 "00000000" // /* MW 3 */
+ 4720 "00111011" // /* MW 2 */
+ 4721 "00000000" // /* MW 1 */
+.delay_slot
+ 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4723 "10011100" // /* MW 3 */
+ 4724 "00011001" // /* MW 2 */
+ 4725 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1849 12
+.delay_slot
+ 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4727 "00000101" // /* MW 3 */
+ 4728 "00100110" // /* MW 2 */
+ 4729 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4731 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4733 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4735 "00000000" // /* MW 1 */
+.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 2 "conv2d_bf16_params.h" 250 71 first
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4737 "01111000" // /* MW 11 */
+ 4738 "11001110" // /* MW 10 */
+ 4739 "00001100" // /* MW 9 */
+ 4740 "00111100" // /* MW 8 */
+ 4741 "10111111" // /* MW 7 */
+ 4742 "10101011" // /* MW 6 */
+ 4743 "00011101" // /* MW 5 */
+ 4744 "11101011" // /* MW 4 */
+ 4745 "00000111" // /* MW 3 */
+ 4746 "10010101" // /* MW 2 */
+ 4747 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4749 "01011101" // /* MW 3 */
+ 4750 "10101011" // /* MW 2 */
+ 4751 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+ 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4753 "10010010" // /* MW 3 */
+ 4754 "01101011" // /* MW 2 */
+ 4755 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4757 "11100111" // /* MW 3 */
+ 4758 "11110111" // /* MW 2 */
+ 4759 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4761 "01000001" // /* MW 5 */
+ 4762 "10110000" // /* MW 4 */
+ 4763 "01001101" // /* MW 3 */
+ 4764 "11110010" // /* MW 2 */
+ 4765 "10101100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4767 "00110010" // /* MW 3 */
+ 4768 "01100111" // /* MW 2 */
+ 4769 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87 first
+ 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4771 "01000100" // /* MW 3 */
+ 4772 "00101001" // /* MW 2 */
+ 4773 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4775 "11110000" // /* MW 3 */
+ 4776 "00110110" // /* MW 2 */
+ 4777 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 152 first
+ 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4779 "10001011" // /* MW 5 */
+ 4780 "11001111" // /* MW 4 */
+ 4781 "11111001" // /* MW 3 */
+ 4782 "00101100" // /* MW 2 */
+ 4783 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320
+.src_ref 2 "conv2d_bf16_params.h" 258 8 first
+ 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */
+ 4785 "01100000" // /* MW 11 */
+ 4786 "00000000" // /* MW 10 */
+ 4787 "00010000" // /* MW 9 */
+ 4788 "01011100" // /* MW 8 */
+ 4789 "00000010" // /* MW 7 */
+ 4790 "10111010" // /* MW 6 */
+ 4791 "01110001" // /* MW 5 */
+ 4792 "01101111" // /* MW 4 */
+ 4793 "10000010" // /* MW 3 */
+ 4794 "10010000" // /* MW 2 */
+ 4795 "00000001" // /* MW 1 */
+.delay_slot
+ 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4797 "01100111" // /* MW 3 */
+ 4798 "10001010" // /* MW 2 */
+ 4799 "00000010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4801 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4803 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4805 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4807 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+ 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4809 "11111110" // /* MW 5 */
+ 4810 "00111111" // /* MW 4 */
+ 4811 "11111010" // /* MW 3 */
+ 4812 "11111111" // /* MW 2 */
+ 4813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71 first
+ 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4815 "01000100" // /* MW 3 */
+ 4816 "10100101" // /* MW 2 */
+ 4817 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4819 "00011100" // /* MW 13 */
+ 4820 "00000000" // /* MW 12 */
+ 4821 "00000000" // /* MW 11 */
+ 4822 "01010111" // /* MW 10 */
+ 4823 "00011010" // /* MW 9 */
+ 4824 "01000000" // /* MW 8 */
+ 4825 "00000000" // /* MW 7 */
+ 4826 "00000000" // /* MW 6 */
+ 4827 "10100011" // /* MW 5 */
+ 4828 "11101100" // /* MW 4 */
+ 4829 "11110110" // /* MW 3 */
+ 4830 "00101100" // /* MW 2 */
+ 4831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368
+.src_ref 2 "conv2d_bf16.h" 1841 65 first
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16.h" 1849 12 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4833 "01011000" // /* MW 9 */
+ 4834 "11111101" // /* MW 8 */
+ 4835 "11001111" // /* MW 7 */
+ 4836 "10000010" // /* MW 6 */
+ 4837 "01000100" // /* MW 5 */
+ 4838 "00100111" // /* MW 4 */
+ 4839 "11010000" // /* MW 3 */
+ 4840 "11010010" // /* MW 2 */
+ 4841 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1841 34
+.src_ref 2 "conv2d_bf16.h" 1842 36
+.src_ref 2 "conv2d_bf16.h" 1842 67
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4843 "01011000" // /* MW 9 */
+ 4844 "00100100" // /* MW 8 */
+ 4845 "00000000" // /* MW 7 */
+ 4846 "11111010" // /* MW 6 */
+ 4847 "01011111" // /* MW 5 */
+ 4848 "00101001" // /* MW 4 */
+ 4849 "00000000" // /* MW 3 */
+ 4850 "01010010" // /* MW 2 */
+ 4851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 67 first
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4853 "01011000" // /* MW 11 */
+ 4854 "11001100" // /* MW 10 */
+ 4855 "00000111" // /* MW 9 */
+ 4856 "00100110" // /* MW 8 */
+ 4857 "01101011" // /* MW 7 */
+ 4858 "10101011" // /* MW 6 */
+ 4859 "00101101" // /* MW 5 */
+ 4860 "11010000" // /* MW 4 */
+ 4861 "11010111" // /* MW 3 */
+ 4862 "01011010" // /* MW 2 */
+ 4863 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1845 80
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4865 "01011000" // /* MW 11 */
+ 4866 "11000100" // /* MW 10 */
+ 4867 "00000000" // /* MW 9 */
+ 4868 "11101010" // /* MW 8 */
+ 4869 "00110111" // /* MW 7 */
+ 4870 "10111111" // /* MW 6 */
+ 4871 "11010101" // /* MW 5 */
+ 4872 "11011110" // /* MW 4 */
+ 4873 "11010111" // /* MW 3 */
+ 4874 "01011110" // /* MW 2 */
+ 4875 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 63 first
+ 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4877 "10110110" // /* MW 3 */
+ 4878 "11111111" // /* MW 2 */
+ 4879 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52 first
+ 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4881 "11110110" // /* MW 3 */
+ 4882 "10001011" // /* MW 2 */
+ 4883 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4885 "10110110" // /* MW 3 */
+ 4886 "00000110" // /* MW 2 */
+ 4887 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+ 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4889 "01011011" // /* MW 5 */
+ 4890 "00100110" // /* MW 4 */
+ 4891 "11011010" // /* MW 3 */
+ 4892 "11010010" // /* MW 2 */
+ 4893 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4895 "11010110" // /* MW 3 */
+ 4896 "00000111" // /* MW 2 */
+ 4897 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+ 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4899 "00101101" // /* MW 3 */
+ 4900 "10101101" // /* MW 2 */
+ 4901 "00010101" // /* MW 1 */
+ 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4903 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 80 first
+ 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4905 "00111110" // /* MW 3 */
+ 4906 "01100111" // /* MW 2 */
+ 4907 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21 first
+ 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4909 "00011000" // /* MW 3 */
+ 4910 "11100011" // /* MW 2 */
+ 4911 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 12
+ 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */
+ 4913 "00000001" // /* MW 5 */
+ 4914 "01000000" // /* MW 4 */
+ 4915 "11010000" // /* MW 3 */
+ 4916 "00001001" // /* MW 2 */
+ 4917 "10001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+.src_ref 2 "conv2d_bf16.h" 1842 75 first
+.delay_slot
+ 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4919 "10110010" // /* MW 5 */
+ 4920 "10110101" // /* MW 4 */
+ 4921 "10111010" // /* MW 3 */
+ 4922 "10100101" // /* MW 2 */
+ 4923 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4925 "10110010" // /* MW 5 */
+ 4926 "10010101" // /* MW 4 */
+ 4927 "10110000" // /* MW 3 */
+ 4928 "01100101" // /* MW 2 */
+ 4929 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+.delay_slot
+ 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4931 "10100000" // /* MW 7 */
+ 4932 "01101000" // /* MW 6 */
+ 4933 "11001010" // /* MW 5 */
+ 4934 "00000001" // /* MW 4 */
+ 4935 "10110000" // /* MW 3 */
+ 4936 "10000100" // /* MW 2 */
+ 4937 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4939 "10000000" // /* MW 3 */
+ 4940 "11010000" // /* MW 2 */
+ 4941 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4943 "11111001" // /* MW 3 */
+ 4944 "01101010" // /* MW 2 */
+ 4945 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4947 "11010000" // /* MW 5 */
+ 4948 "11001000" // /* MW 4 */
+ 4949 "11001110" // /* MW 3 */
+ 4950 "00000111" // /* MW 2 */
+ 4951 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18 first
+ 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4953 "10000000" // /* MW 5 */
+ 4954 "10110100" // /* MW 4 */
+ 4955 "01010000" // /* MW 3 */
+ 4956 "11000100" // /* MW 2 */
+ 4957 "11100000" // /* MW 1 */
+ 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4959 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4961 "00000000" // /* MW 5 */
+ 4962 "00100000" // /* MW 4 */
+ 4963 "00001010" // /* MW 3 */
+ 4964 "01111111" // /* MW 2 */
+ 4965 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4967 "10010001" // /* MW 3 */
+ 4968 "00000010" // /* MW 2 */
+ 4969 "00011000" // /* MW 1 */
+ 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4971 "11100000" // /* MW 3 */
+ 4972 "00010101" // /* MW 2 */
+ 4973 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4975 "01011111" // /* MW 3 */
+ 4976 "01101010" // /* MW 2 */
+ 4977 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4979 "00100101" // /* MW 5 */
+ 4980 "00000001" // /* MW 4 */
+ 4981 "11100000" // /* MW 3 */
+ 4982 "11000110" // /* MW 2 */
+ 4983 "11100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4985 "10000000" // /* MW 3 */
+ 4986 "01111010" // /* MW 2 */
+ 4987 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4989 "00010110" // /* MW 3 */
+ 4990 "01000000" // /* MW 2 */
+ 4991 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4993 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4995 "00000001" // /* MW 3 */
+ 4996 "01000001" // /* MW 2 */
+ 4997 "00011100" // /* MW 1 */
+ 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4999 "00000000" // /* MW 1 */
+ 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5001 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5003 "00110010" // /* MW 3 */
+ 5004 "00000110" // /* MW 2 */
+ 5005 "00000111" // /* MW 1 */
+ 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5007 "00000000" // /* MW 1 */
+ 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5009 "00000000" // /* MW 1 */
+ 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5011 "00000000" // /* MW 1 */
+ 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5013 "00000000" // /* MW 1 */
+ 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5015 "00000000" // /* MW 1 */
+ 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5017 "00000000" // /* MW 1 */
+ 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5019 "01101011" // /* MW 5 */
+ 5020 "10100100" // /* MW 4 */
+ 5021 "11111111" // /* MW 3 */
+ 5022 "00101100" // /* MW 2 */
+ 5023 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560
+.src_ref 2 "conv2d_bf16.h" 881 76
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5025 "00010000" // /* MW 11 */
+ 5026 "00110100" // /* MW 10 */
+ 5027 "10110010" // /* MW 9 */
+ 5028 "11110001" // /* MW 8 */
+ 5029 "00000001" // /* MW 7 */
+ 5030 "00000000" // /* MW 6 */
+ 5031 "00001011" // /* MW 5 */
+ 5032 "10001110" // /* MW 4 */
+ 5033 "10000001" // /* MW 3 */
+ 5034 "10010000" // /* MW 2 */
+ 5035 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.src_ref 2 "conv2d_bf16.h" 876 51 first
+.src_ref 2 "conv2d_bf16.h" 881 76 first
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5037 "01011000" // /* MW 11 */
+ 5038 "00001011" // /* MW 10 */
+ 5039 "01101000" // /* MW 9 */
+ 5040 "10010010" // /* MW 8 */
+ 5041 "00011001" // /* MW 7 */
+ 5042 "00110011" // /* MW 6 */
+ 5043 "10001011" // /* MW 5 */
+ 5044 "10000100" // /* MW 4 */
+ 5045 "01010000" // /* MW 3 */
+ 5046 "01000101" // /* MW 2 */
+ 5047 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5049 "01111000" // /* MW 9 */
+ 5050 "01100000" // /* MW 8 */
+ 5051 "10101010" // /* MW 7 */
+ 5052 "01100101" // /* MW 6 */
+ 5053 "10111001" // /* MW 5 */
+ 5054 "00111001" // /* MW 4 */
+ 5055 "00000000" // /* MW 3 */
+ 5056 "10010110" // /* MW 2 */
+ 5057 "01100001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 883 4 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5059 "01100111" // /* MW 3 */
+ 5060 "00000110" // /* MW 2 */
+ 5061 "00000011" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5063 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 884 4 first
+.aggressive_scheduled_block_id 4
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5065 "00000001" // /* MW 5 */
+ 5066 "00000000" // /* MW 4 */
+ 5067 "00110000" // /* MW 3 */
+ 5068 "00001000" // /* MW 2 */
+ 5069 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5071 "00101101" // /* MW 3 */
+ 5072 "01101011" // /* MW 2 */
+ 5073 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.delay_slot
+ 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5075 "11111001" // /* MW 3 */
+ 5076 "01101010" // /* MW 2 */
+ 5077 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45 first
+.delay_slot
+ 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5079 "00010001" // /* MW 3 */
+ 5080 "01100011" // /* MW 2 */
+ 5081 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.delay_slot
+ 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5083 "00110101" // /* MW 5 */
+ 5084 "00101100" // /* MW 4 */
+ 5085 "10111010" // /* MW 3 */
+ 5086 "01100101" // /* MW 2 */
+ 5087 "10001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.delay_slot
+ 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5089 "00000000" // /* MW 15 */
+ 5090 "00000000" // /* MW 14 */
+ 5091 "10101000" // /* MW 13 */
+ 5092 "11100010" // /* MW 12 */
+ 5093 "10001011" // /* MW 11 */
+ 5094 "00010001" // /* MW 10 */
+ 5095 "10011010" // /* MW 9 */
+ 5096 "00101100" // /* MW 8 */
+ 5097 "01011011" // /* MW 7 */
+ 5098 "00000001" // /* MW 6 */
+ 5099 "00100000" // /* MW 5 */
+ 5100 "00000000" // /* MW 4 */
+ 5101 "11110000" // /* MW 3 */
+ 5102 "00101100" // /* MW 2 */
+ 5103 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.return_address
+ 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5105 "10011001" // /* MW 3 */
+ 5106 "11010100" // /* MW 2 */
+ 5107 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4 first
+.no_stack_arguments
+ 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5109 "00000001" // /* MW 5 */
+ 5110 "00000000" // /* MW 4 */
+ 5111 "00110000" // /* MW 3 */
+ 5112 "00001000" // /* MW 2 */
+ 5113 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5115 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5117 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.delay_slot
+ 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5119 "10010000" // /* MW 3 */
+ 5120 "01010110" // /* MW 2 */
+ 5121 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5123 "10100000" // /* MW 3 */
+ 5124 "01100110" // /* MW 2 */
+ 5125 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5127 "00000000" // /* MW 9 */
+ 5128 "00000000" // /* MW 8 */
+ 5129 "00000000" // /* MW 7 */
+ 5130 "00000000" // /* MW 6 */
+ 5131 "00001011" // /* MW 5 */
+ 5132 "10001111" // /* MW 4 */
+ 5133 "11110000" // /* MW 3 */
+ 5134 "00101100" // /* MW 2 */
+ 5135 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.return_address
+ 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5137 "00001000" // /* MW 9 */
+ 5138 "01100011" // /* MW 8 */
+ 5139 "00110011" // /* MW 7 */
+ 5140 "11101010" // /* MW 6 */
+ 5141 "00110111" // /* MW 5 */
+ 5142 "00000001" // /* MW 4 */
+ 5143 "10000000" // /* MW 3 */
+ 5144 "10011010" // /* MW 2 */
+ 5145 "11010110" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 886 4
+.src_ref 2 "conv2d_bf16.h" 896 23 first
+.src_ref 2 "conv2d_bf16.h" 1123 71
+ 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5147 "01100010" // /* MW 5 */
+ 5148 "00110100" // /* MW 4 */
+ 5149 "11010000" // /* MW 3 */
+ 5150 "10000100" // /* MW 2 */
+ 5151 "10000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5153 "01000110" // /* MW 3 */
+ 5154 "00011100" // /* MW 2 */
+ 5155 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5157 "00100110" // /* MW 3 */
+ 5158 "00011110" // /* MW 2 */
+ 5159 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5161 "01000110" // /* MW 3 */
+ 5162 "00011110" // /* MW 2 */
+ 5163 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5165 "00000110" // /* MW 3 */
+ 5166 "00011100" // /* MW 2 */
+ 5167 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5169 "01100110" // /* MW 3 */
+ 5170 "00011100" // /* MW 2 */
+ 5171 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5173 "01100110" // /* MW 3 */
+ 5174 "00011110" // /* MW 2 */
+ 5175 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23 first
+ 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5177 "11010110" // /* MW 3 */
+ 5178 "00011110" // /* MW 2 */
+ 5179 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5181 "00110110" // /* MW 3 */
+ 5182 "00011110" // /* MW 2 */
+ 5183 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5185 "10010110" // /* MW 3 */
+ 5186 "00011111" // /* MW 2 */
+ 5187 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5189 "10110110" // /* MW 3 */
+ 5190 "00011110" // /* MW 2 */
+ 5191 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5193 "11110110" // /* MW 3 */
+ 5194 "00011110" // /* MW 2 */
+ 5195 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5197 "10011110" // /* MW 3 */
+ 5198 "00011101" // /* MW 2 */
+ 5199 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5201 "00100110" // /* MW 3 */
+ 5202 "00011101" // /* MW 2 */
+ 5203 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5205 "10100110" // /* MW 3 */
+ 5206 "00011100" // /* MW 2 */
+ 5207 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5209 "11000110" // /* MW 3 */
+ 5210 "00011100" // /* MW 2 */
+ 5211 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5213 "10100110" // /* MW 3 */
+ 5214 "00011110" // /* MW 2 */
+ 5215 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5217 "11010110" // /* MW 3 */
+ 5218 "00011111" // /* MW 2 */
+ 5219 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5221 "10110110" // /* MW 3 */
+ 5222 "00011111" // /* MW 2 */
+ 5223 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5225 "11100110" // /* MW 3 */
+ 5226 "00011100" // /* MW 2 */
+ 5227 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5229 "01001010" // /* MW 3 */
+ 5230 "11000010" // /* MW 2 */
+ 5231 "00000100" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25
+ 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5233 "10010001" // /* MW 3 */
+ 5234 "11010010" // /* MW 2 */
+ 5235 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5237 "01010110" // /* MW 3 */
+ 5238 "00000100" // /* MW 2 */
+ 5239 "00000100" // /* MW 1 */
+ 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5241 "00000000" // /* MW 1 */
+ 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5243 "00000000" // /* MW 1 */
+ 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5245 "00000000" // /* MW 1 */
+ 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5247 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5249 "00101100" // /* MW 3 */
+ 5250 "11100111" // /* MW 2 */
+ 5251 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 12
+ 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */
+ 5253 "00000001" // /* MW 5 */
+ 5254 "01000000" // /* MW 4 */
+ 5255 "00010000" // /* MW 3 */
+ 5256 "00001100" // /* MW 2 */
+ 5257 "10011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4
+.delay_slot
+ 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5259 "11010000" // /* MW 5 */
+ 5260 "11001000" // /* MW 4 */
+ 5261 "11000100" // /* MW 3 */
+ 5262 "00000111" // /* MW 2 */
+ 5263 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4 first
+.delay_slot
+ 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5265 "10100111" // /* MW 3 */
+ 5266 "00000101" // /* MW 2 */
+ 5267 "00000010" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5269 "01110010" // /* MW 3 */
+ 5270 "11010001" // /* MW 2 */
+ 5271 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5273 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5275 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 71 first
+ 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5277 "01011000" // /* MW 9 */
+ 5278 "10000100" // /* MW 8 */
+ 5279 "10000000" // /* MW 7 */
+ 5280 "00111111" // /* MW 6 */
+ 5281 "10111001" // /* MW 5 */
+ 5282 "00011011" // /* MW 4 */
+ 5283 "00100000" // /* MW 3 */
+ 5284 "01000011" // /* MW 2 */
+ 5285 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+.src_ref 2 "conv2d_bf16.h" 1154 80
+ 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5287 "01011000" // /* MW 9 */
+ 5288 "00111100" // /* MW 8 */
+ 5289 "00000000" // /* MW 7 */
+ 5290 "00001010" // /* MW 6 */
+ 5291 "00100000" // /* MW 5 */
+ 5292 "00111101" // /* MW 4 */
+ 5293 "00000000" // /* MW 3 */
+ 5294 "00010011" // /* MW 2 */
+ 5295 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5297 "01111000" // /* MW 9 */
+ 5298 "11010000" // /* MW 8 */
+ 5299 "11100100" // /* MW 7 */
+ 5300 "00001011" // /* MW 6 */
+ 5301 "10100000" // /* MW 5 */
+ 5302 "00000001" // /* MW 4 */
+ 5303 "10000000" // /* MW 3 */
+ 5304 "00010100" // /* MW 2 */
+ 5305 "11111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 746 83
+ 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5307 "01111000" // /* MW 11 */
+ 5308 "11000000" // /* MW 10 */
+ 5309 "10100111" // /* MW 9 */
+ 5310 "00000001" // /* MW 8 */
+ 5311 "11010100" // /* MW 7 */
+ 5312 "00011011" // /* MW 6 */
+ 5313 "01001011" // /* MW 5 */
+ 5314 "00011100" // /* MW 4 */
+ 5315 "10000010" // /* MW 3 */
+ 5316 "10011000" // /* MW 2 */
+ 5317 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.src_ref 2 "conv2d_bf16.h" 1199 26
+.src_ref 2 "conv2d_bf16.h" 1200 26
+.src_ref 2 "conv2d_bf16.h" 1201 26
+.src_ref 2 "conv2d_bf16.h" 1202 26
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5319 "01011000" // /* MW 11 */
+ 5320 "00000111" // /* MW 10 */
+ 5321 "11101000" // /* MW 9 */
+ 5322 "10001001" // /* MW 8 */
+ 5323 "11110111" // /* MW 7 */
+ 5324 "00000001" // /* MW 6 */
+ 5325 "01001011" // /* MW 5 */
+ 5326 "00011100" // /* MW 4 */
+ 5327 "00100110" // /* MW 3 */
+ 5328 "10010110" // /* MW 2 */
+ 5329 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 12
+.src_ref 2 "conv2d_bf16.h" 1218 20
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5331 "00010000" // /* MW 9 */
+ 5332 "10100000" // /* MW 8 */
+ 5333 "00110010" // /* MW 7 */
+ 5334 "00000101" // /* MW 6 */
+ 5335 "00000000" // /* MW 5 */
+ 5336 "00000000" // /* MW 4 */
+ 5337 "00100000" // /* MW 3 */
+ 5338 "11001010" // /* MW 2 */
+ 5339 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 749 26
+.src_ref 2 "conv2d_bf16.h" 750 26
+.src_ref 2 "conv2d_bf16.h" 751 26
+.src_ref 2 "conv2d_bf16.h" 752 26
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5341 "01011000" // /* MW 9 */
+ 5342 "00001100" // /* MW 8 */
+ 5343 "10001011" // /* MW 7 */
+ 5344 "00010010" // /* MW 6 */
+ 5345 "01101001" // /* MW 5 */
+ 5346 "00110100" // /* MW 4 */
+ 5347 "00100000" // /* MW 3 */
+ 5348 "00110110" // /* MW 2 */
+ 5349 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1873
+ 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5351 "01011000" // /* MW 11 */
+ 5352 "00000000" // /* MW 10 */
+ 5353 "00001000" // /* MW 9 */
+ 5354 "00001011" // /* MW 8 */
+ 5355 "10010000" // /* MW 7 */
+ 5356 "00000001" // /* MW 6 */
+ 5357 "00100000" // /* MW 5 */
+ 5358 "11010111" // /* MW 4 */
+ 5359 "00101001" // /* MW 3 */
+ 5360 "10000111" // /* MW 2 */
+ 5361 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5363 "00010110" // /* MW 3 */
+ 5364 "10001000" // /* MW 2 */
+ 5365 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5367 "00100110" // /* MW 3 */
+ 5368 "10101011" // /* MW 2 */
+ 5369 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5371 "01110110" // /* MW 3 */
+ 5372 "00101111" // /* MW 2 */
+ 5373 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 80 first
+ 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5375 "10000110" // /* MW 3 */
+ 5376 "00011110" // /* MW 2 */
+ 5377 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 80 first
+ 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5379 "11000110" // /* MW 3 */
+ 5380 "10001010" // /* MW 2 */
+ 5381 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 87 first
+ 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5383 "00000110" // /* MW 3 */
+ 5384 "10011110" // /* MW 2 */
+ 5385 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 83 first
+ 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5387 "00110110" // /* MW 3 */
+ 5388 "00011100" // /* MW 2 */
+ 5389 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 83 first
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5391 "00000010" // /* MW 5 */
+ 5392 "00000110" // /* MW 4 */
+ 5393 "11011101" // /* MW 3 */
+ 5394 "00000010" // /* MW 2 */
+ 5395 "10011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 66 first
+ 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5397 "01110110" // /* MW 3 */
+ 5398 "00010100" // /* MW 2 */
+ 5399 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1206 63 first
+ 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5401 "10010110" // /* MW 3 */
+ 5402 "00000100" // /* MW 2 */
+ 5403 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89
+ 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5405 "00000000" // /* MW 3 */
+ 5406 "11011010" // /* MW 2 */
+ 5407 "00011001" // /* MW 1 */
+ 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5409 "10011001" // /* MW 3 */
+ 5410 "10000011" // /* MW 2 */
+ 5411 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89
+ 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5413 "00000000" // /* MW 3 */
+ 5414 "00011011" // /* MW 2 */
+ 5415 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5417 "10011001" // /* MW 3 */
+ 5418 "00001101" // /* MW 2 */
+ 5419 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89 first
+ 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5421 "11100000" // /* MW 3 */
+ 5422 "00000011" // /* MW 2 */
+ 5423 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89 first
+ 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5425 "11000000" // /* MW 5 */
+ 5426 "00010000" // /* MW 4 */
+ 5427 "11101110" // /* MW 3 */
+ 5428 "11111111" // /* MW 2 */
+ 5429 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5431 "01111110" // /* MW 9 */
+ 5432 "10000000" // /* MW 8 */
+ 5433 "10000010" // /* MW 7 */
+ 5434 "00000000" // /* MW 6 */
+ 5435 "00010000" // /* MW 5 */
+ 5436 "00000000" // /* MW 4 */
+ 5437 "11110000" // /* MW 3 */
+ 5438 "00101100" // /* MW 2 */
+ 5439 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1147 31 first
+.src_ref 2 "conv2d_bf16.h" 1187 40 first
+.loop_nesting 1
+ 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5441 "01111000" // /* MW 11 */
+ 5442 "10010000" // /* MW 10 */
+ 5443 "00110011" // /* MW 9 */
+ 5444 "11101100" // /* MW 8 */
+ 5445 "11100111" // /* MW 7 */
+ 5446 "00000100" // /* MW 6 */
+ 5447 "00001011" // /* MW 5 */
+ 5448 "10000101" // /* MW 4 */
+ 5449 "01110001" // /* MW 3 */
+ 5450 "10000101" // /* MW 2 */
+ 5451 "11000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1188 50 first
+ 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5453 "10100000" // /* MW 11 */
+ 5454 "10011000" // /* MW 10 */
+ 5455 "00110011" // /* MW 9 */
+ 5456 "00000010" // /* MW 8 */
+ 5457 "01001011" // /* MW 7 */
+ 5458 "00001110" // /* MW 6 */
+ 5459 "00101011" // /* MW 5 */
+ 5460 "00101000" // /* MW 4 */
+ 5461 "01111000" // /* MW 3 */
+ 5462 "10000001" // /* MW 2 */
+ 5463 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+ 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5465 "01110000" // /* MW 11 */
+ 5466 "10000000" // /* MW 10 */
+ 5467 "11000110" // /* MW 9 */
+ 5468 "00000011" // /* MW 8 */
+ 5469 "01001011" // /* MW 7 */
+ 5470 "01011010" // /* MW 6 */
+ 5471 "00101111" // /* MW 5 */
+ 5472 "00101000" // /* MW 4 */
+ 5473 "01111000" // /* MW 3 */
+ 5474 "00111001" // /* MW 2 */
+ 5475 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1149 31 first
+ 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5477 "01110000" // /* MW 11 */
+ 5478 "00000000" // /* MW 10 */
+ 5479 "10000010" // /* MW 9 */
+ 5480 "00000001" // /* MW 8 */
+ 5481 "00001011" // /* MW 7 */
+ 5482 "01010011" // /* MW 6 */
+ 5483 "00101011" // /* MW 5 */
+ 5484 "00000011" // /* MW 4 */
+ 5485 "01110100" // /* MW 3 */
+ 5486 "00001101" // /* MW 2 */
+ 5487 "11011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+ 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5489 "01011110" // /* MW 9 */
+ 5490 "00000000" // /* MW 8 */
+ 5491 "11000000" // /* MW 7 */
+ 5492 "00000001" // /* MW 6 */
+ 5493 "11010100" // /* MW 5 */
+ 5494 "00010010" // /* MW 4 */
+ 5495 "01110100" // /* MW 3 */
+ 5496 "01000001" // /* MW 2 */
+ 5497 "01110001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1152 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+ 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5499 "00010000" // /* MW 11 */
+ 5500 "01000000" // /* MW 10 */
+ 5501 "10111011" // /* MW 9 */
+ 5502 "00000101" // /* MW 8 */
+ 5503 "00000000" // /* MW 7 */
+ 5504 "00000000" // /* MW 6 */
+ 5505 "00101000" // /* MW 5 */
+ 5506 "00101000" // /* MW 4 */
+ 5507 "01111000" // /* MW 3 */
+ 5508 "10010101" // /* MW 2 */
+ 5509 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 1154 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8
+ 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5511 "00010000" // /* MW 11 */
+ 5512 "00101000" // /* MW 10 */
+ 5513 "01111011" // /* MW 9 */
+ 5514 "00000100" // /* MW 8 */
+ 5515 "00000000" // /* MW 7 */
+ 5516 "00000000" // /* MW 6 */
+ 5517 "00101000" // /* MW 5 */
+ 5518 "00101000" // /* MW 4 */
+ 5519 "01111000" // /* MW 3 */
+ 5520 "00011101" // /* MW 2 */
+ 5521 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+ 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5523 "00101000" // /* MW 5 */
+ 5524 "00000001" // /* MW 4 */
+ 5525 "01110100" // /* MW 3 */
+ 5526 "10110101" // /* MW 2 */
+ 5527 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1157 31 first
+ 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5529 "00101000" // /* MW 5 */
+ 5530 "00100010" // /* MW 4 */
+ 5531 "01111000" // /* MW 3 */
+ 5532 "10100101" // /* MW 2 */
+ 5533 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1159 31 first
+ 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5535 "00101000" // /* MW 5 */
+ 5536 "00101000" // /* MW 4 */
+ 5537 "01111000" // /* MW 3 */
+ 5538 "00101101" // /* MW 2 */
+ 5539 "11011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5541 "00101000" // /* MW 5 */
+ 5542 "00101000" // /* MW 4 */
+ 5543 "01111000" // /* MW 3 */
+ 5544 "10000001" // /* MW 2 */
+ 5545 "00100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1192 29 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5547 "00101000" // /* MW 5 */
+ 5548 "00000001" // /* MW 4 */
+ 5549 "01110100" // /* MW 3 */
+ 5550 "10111101" // /* MW 2 */
+ 5551 "10000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5553 "11101110" // /* MW 9 */
+ 5554 "11000011" // /* MW 8 */
+ 5555 "10011010" // /* MW 7 */
+ 5556 "00000010" // /* MW 6 */
+ 5557 "00010100" // /* MW 5 */
+ 5558 "00010001" // /* MW 4 */
+ 5559 "01110100" // /* MW 3 */
+ 5560 "11001101" // /* MW 2 */
+ 5561 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1162 81
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5563 "11100000" // /* MW 11 */
+ 5564 "11000001" // /* MW 10 */
+ 5565 "10011010" // /* MW 9 */
+ 5566 "00000001" // /* MW 8 */
+ 5567 "10001011" // /* MW 7 */
+ 5568 "10011000" // /* MW 6 */
+ 5569 "00101100" // /* MW 5 */
+ 5570 "00101000" // /* MW 4 */
+ 5571 "01111000" // /* MW 3 */
+ 5572 "11000101" // /* MW 2 */
+ 5573 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5575 "11101001" // /* MW 9 */
+ 5576 "00010100" // /* MW 8 */
+ 5577 "01001000" // /* MW 7 */
+ 5578 "00011101" // /* MW 6 */
+ 5579 "01010100" // /* MW 5 */
+ 5580 "00000000" // /* MW 4 */
+ 5581 "01110011" // /* MW 3 */
+ 5582 "10000001" // /* MW 2 */
+ 5583 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5585 "11101001" // /* MW 13 */
+ 5586 "00101100" // /* MW 12 */
+ 5587 "01001001" // /* MW 11 */
+ 5588 "00000111" // /* MW 10 */
+ 5589 "01011000" // /* MW 9 */
+ 5590 "01011100" // /* MW 8 */
+ 5591 "00000000" // /* MW 7 */
+ 5592 "00000000" // /* MW 6 */
+ 5593 "10010110" // /* MW 5 */
+ 5594 "10010100" // /* MW 4 */
+ 5595 "01110110" // /* MW 3 */
+ 5596 "00110101" // /* MW 2 */
+ 5597 "11001111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1162 81 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5599 "00001001" // /* MW 13 */
+ 5600 "01010101" // /* MW 12 */
+ 5601 "01001010" // /* MW 11 */
+ 5602 "00111110" // /* MW 10 */
+ 5603 "10010000" // /* MW 9 */
+ 5604 "01001100" // /* MW 8 */
+ 5605 "00000000" // /* MW 7 */
+ 5606 "00000000" // /* MW 6 */
+ 5607 "10010110" // /* MW 5 */
+ 5608 "00111000" // /* MW 4 */
+ 5609 "01111010" // /* MW 3 */
+ 5610 "10111101" // /* MW 2 */
+ 5611 "10000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1199 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5613 "00111101" // /* MW 13 */
+ 5614 "01100000" // /* MW 12 */
+ 5615 "11111000" // /* MW 11 */
+ 5616 "00011110" // /* MW 10 */
+ 5617 "10010000" // /* MW 9 */
+ 5618 "01010100" // /* MW 8 */
+ 5619 "00000000" // /* MW 7 */
+ 5620 "00000000" // /* MW 6 */
+ 5621 "10010110" // /* MW 5 */
+ 5622 "10011000" // /* MW 4 */
+ 5623 "01110100" // /* MW 3 */
+ 5624 "00000001" // /* MW 2 */
+ 5625 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1200 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5627 "00111101" // /* MW 7 */
+ 5628 "01100100" // /* MW 6 */
+ 5629 "11111001" // /* MW 5 */
+ 5630 "00000100" // /* MW 4 */
+ 5631 "01110000" // /* MW 3 */
+ 5632 "10000001" // /* MW 2 */
+ 5633 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1201 26 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5635 "00111101" // /* MW 7 */
+ 5636 "10001000" // /* MW 6 */
+ 5637 "11111010" // /* MW 5 */
+ 5638 "00000100" // /* MW 4 */
+ 5639 "01110000" // /* MW 3 */
+ 5640 "00001001" // /* MW 2 */
+ 5641 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5643 "00001001" // /* MW 7 */
+ 5644 "01101101" // /* MW 6 */
+ 5645 "01001011" // /* MW 5 */
+ 5646 "00000100" // /* MW 4 */
+ 5647 "01110000" // /* MW 3 */
+ 5648 "00000001" // /* MW 2 */
+ 5649 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5651 "00101000" // /* MW 5 */
+ 5652 "00000001" // /* MW 4 */
+ 5653 "01110100" // /* MW 3 */
+ 5654 "10000001" // /* MW 2 */
+ 5655 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5657 "00010100" // /* MW 3 */
+ 5658 "00010001" // /* MW 2 */
+ 5659 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1202 26 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5661 "00111101" // /* MW 11 */
+ 5662 "10001100" // /* MW 10 */
+ 5663 "11111011" // /* MW 9 */
+ 5664 "10000010" // /* MW 8 */
+ 5665 "01111101" // /* MW 7 */
+ 5666 "01110010" // /* MW 6 */
+ 5667 "00101101" // /* MW 5 */
+ 5668 "00101000" // /* MW 4 */
+ 5669 "01111000" // /* MW 3 */
+ 5670 "00001001" // /* MW 2 */
+ 5671 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5673 "00101001" // /* MW 9 */
+ 5674 "00000110" // /* MW 8 */
+ 5675 "10100000" // /* MW 7 */
+ 5676 "00011101" // /* MW 6 */
+ 5677 "00010100" // /* MW 5 */
+ 5678 "00010100" // /* MW 4 */
+ 5679 "01110100" // /* MW 3 */
+ 5680 "00000001" // /* MW 2 */
+ 5681 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5683 "00001001" // /* MW 13 */
+ 5684 "01000110" // /* MW 12 */
+ 5685 "10100010" // /* MW 11 */
+ 5686 "00001111" // /* MW 10 */
+ 5687 "10101010" // /* MW 9 */
+ 5688 "01011000" // /* MW 8 */
+ 5689 "00000000" // /* MW 7 */
+ 5690 "00000000" // /* MW 6 */
+ 5691 "00101000" // /* MW 5 */
+ 5692 "00000001" // /* MW 4 */
+ 5693 "01110100" // /* MW 3 */
+ 5694 "10000001" // /* MW 2 */
+ 5695 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5697 "01010001" // /* MW 15 */
+ 5698 "00001001" // /* MW 14 */
+ 5699 "11101101" // /* MW 13 */
+ 5700 "00000011" // /* MW 12 */
+ 5701 "11001001" // /* MW 11 */
+ 5702 "00000000" // /* MW 10 */
+ 5703 "00000000" // /* MW 9 */
+ 5704 "00000000" // /* MW 8 */
+ 5705 "01011011" // /* MW 7 */
+ 5706 "00000001" // /* MW 6 */
+ 5707 "00101000" // /* MW 5 */
+ 5708 "00100010" // /* MW 4 */
+ 5709 "11111000" // /* MW 3 */
+ 5710 "00101100" // /* MW 2 */
+ 5711 "00000000" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.begin_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5713 "01010000" // /* MW 15 */
+ 5714 "00011011" // /* MW 14 */
+ 5715 "11101101" // /* MW 13 */
+ 5716 "00000001" // /* MW 12 */
+ 5717 "01001001" // /* MW 11 */
+ 5718 "00000001" // /* MW 10 */
+ 5719 "00000000" // /* MW 9 */
+ 5720 "00000000" // /* MW 8 */
+ 5721 "01011011" // /* MW 7 */
+ 5722 "00000001" // /* MW 6 */
+ 5723 "00101000" // /* MW 5 */
+ 5724 "00101000" // /* MW 4 */
+ 5725 "01111000" // /* MW 3 */
+ 5726 "00001001" // /* MW 2 */
+ 5727 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5729 "00110001" // /* MW 15 */
+ 5730 "00000000" // /* MW 14 */
+ 5731 "01111101" // /* MW 13 */
+ 5732 "10100101" // /* MW 12 */
+ 5733 "00000001" // /* MW 11 */
+ 5734 "00000000" // /* MW 10 */
+ 5735 "00000000" // /* MW 9 */
+ 5736 "00000000" // /* MW 8 */
+ 5737 "01011011" // /* MW 7 */
+ 5738 "00000001" // /* MW 6 */
+ 5739 "00101000" // /* MW 5 */
+ 5740 "00101000" // /* MW 4 */
+ 5741 "01111000" // /* MW 3 */
+ 5742 "00000001" // /* MW 2 */
+ 5743 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5745 "00110000" // /* MW 15 */
+ 5746 "00010010" // /* MW 14 */
+ 5747 "01111101" // /* MW 13 */
+ 5748 "10100101" // /* MW 12 */
+ 5749 "00000001" // /* MW 11 */
+ 5750 "00000000" // /* MW 10 */
+ 5751 "00000000" // /* MW 9 */
+ 5752 "00000000" // /* MW 8 */
+ 5753 "01011011" // /* MW 7 */
+ 5754 "00000001" // /* MW 6 */
+ 5755 "00101000" // /* MW 5 */
+ 5756 "00000001" // /* MW 4 */
+ 5757 "01110100" // /* MW 3 */
+ 5758 "10000001" // /* MW 2 */
+ 5759 "00100010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.end_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5761 "01010001" // /* MW 15 */
+ 5762 "00001001" // /* MW 14 */
+ 5763 "11101101" // /* MW 13 */
+ 5764 "00000011" // /* MW 12 */
+ 5765 "11001001" // /* MW 11 */
+ 5766 "00000000" // /* MW 10 */
+ 5767 "00000000" // /* MW 9 */
+ 5768 "00000000" // /* MW 8 */
+ 5769 "01011011" // /* MW 7 */
+ 5770 "00000001" // /* MW 6 */
+ 5771 "00101000" // /* MW 5 */
+ 5772 "00100010" // /* MW 4 */
+ 5773 "11111000" // /* MW 3 */
+ 5774 "00101100" // /* MW 2 */
+ 5775 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5777 "00001001" // /* MW 13 */
+ 5778 "01101010" // /* MW 12 */
+ 5779 "10100011" // /* MW 11 */
+ 5780 "00011110" // /* MW 10 */
+ 5781 "10010000" // /* MW 9 */
+ 5782 "01010100" // /* MW 8 */
+ 5783 "00000000" // /* MW 7 */
+ 5784 "00000000" // /* MW 6 */
+ 5785 "10010110" // /* MW 5 */
+ 5786 "10111100" // /* MW 4 */
+ 5787 "01111100" // /* MW 3 */
+ 5788 "00001001" // /* MW 2 */
+ 5789 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5791 "00101001" // /* MW 13 */
+ 5792 "00000110" // /* MW 12 */
+ 5793 "10100000" // /* MW 11 */
+ 5794 "00000111" // /* MW 10 */
+ 5795 "00111000" // /* MW 9 */
+ 5796 "01111100" // /* MW 8 */
+ 5797 "00000000" // /* MW 7 */
+ 5798 "00000000" // /* MW 6 */
+ 5799 "10010110" // /* MW 5 */
+ 5800 "00011100" // /* MW 4 */
+ 5801 "01111110" // /* MW 3 */
+ 5802 "00000001" // /* MW 2 */
+ 5803 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5805 "00001001" // /* MW 9 */
+ 5806 "01000110" // /* MW 8 */
+ 5807 "10100010" // /* MW 7 */
+ 5808 "11100100" // /* MW 6 */
+ 5809 "00000000" // /* MW 5 */
+ 5810 "01010101" // /* MW 4 */
+ 5811 "01100001" // /* MW 3 */
+ 5812 "10010001" // /* MW 2 */
+ 5813 "01100001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5815 "00101001" // /* MW 9 */
+ 5816 "00101010" // /* MW 8 */
+ 5817 "10100001" // /* MW 7 */
+ 5818 "11000100" // /* MW 6 */
+ 5819 "00000111" // /* MW 5 */
+ 5820 "10010010" // /* MW 4 */
+ 5821 "01100001" // /* MW 3 */
+ 5822 "11000001" // /* MW 2 */
+ 5823 "01101010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5825 "00001001" // /* MW 9 */
+ 5826 "01101010" // /* MW 8 */
+ 5827 "10100011" // /* MW 7 */
+ 5828 "11000100" // /* MW 6 */
+ 5829 "00000011" // /* MW 5 */
+ 5830 "10010010" // /* MW 4 */
+ 5831 "01100010" // /* MW 3 */
+ 5832 "10000001" // /* MW 2 */
+ 5833 "11101011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1285 32 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5835 "00101001" // /* MW 11 */
+ 5836 "00000110" // /* MW 10 */
+ 5837 "10100000" // /* MW 9 */
+ 5838 "11100110" // /* MW 8 */
+ 5839 "00000000" // /* MW 7 */
+ 5840 "10001111" // /* MW 6 */
+ 5841 "00100010" // /* MW 5 */
+ 5842 "01010111" // /* MW 4 */
+ 5843 "01101111" // /* MW 3 */
+ 5844 "10010001" // /* MW 2 */
+ 5845 "10110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5847 "00001001" // /* MW 9 */
+ 5848 "01000110" // /* MW 8 */
+ 5849 "10100010" // /* MW 7 */
+ 5850 "11100100" // /* MW 6 */
+ 5851 "00000000" // /* MW 5 */
+ 5852 "00000110" // /* MW 4 */
+ 5853 "01100010" // /* MW 3 */
+ 5854 "10010001" // /* MW 2 */
+ 5855 "10010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5857 "00101001" // /* MW 7 */
+ 5858 "00101010" // /* MW 6 */
+ 5859 "10100001" // /* MW 5 */
+ 5860 "11000110" // /* MW 4 */
+ 5861 "00000011" // /* MW 3 */
+ 5862 "10010010" // /* MW 2 */
+ 5863 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5865 "00001001" // /* MW 7 */
+ 5866 "01101010" // /* MW 6 */
+ 5867 "10100011" // /* MW 5 */
+ 5868 "11000110" // /* MW 4 */
+ 5869 "00000111" // /* MW 3 */
+ 5870 "10010010" // /* MW 2 */
+ 5871 "00000001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+ 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5873 "00000000" // /* MW 3 */
+ 5874 "10001011" // /* MW 2 */
+ 5875 "00011111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+ 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5877 "00101001" // /* MW 7 */
+ 5878 "00101010" // /* MW 6 */
+ 5879 "10100001" // /* MW 5 */
+ 5880 "11100110" // /* MW 4 */
+ 5881 "10100000" // /* MW 3 */
+ 5882 "00001011" // /* MW 2 */
+ 5883 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+ 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5885 "00101001" // /* MW 7 */
+ 5886 "00000110" // /* MW 6 */
+ 5887 "10100000" // /* MW 5 */
+ 5888 "11100110" // /* MW 4 */
+ 5889 "10100000" // /* MW 3 */
+ 5890 "10001000" // /* MW 2 */
+ 5891 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+ 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5893 "00001001" // /* MW 9 */
+ 5894 "01101010" // /* MW 8 */
+ 5895 "10100011" // /* MW 7 */
+ 5896 "11100110" // /* MW 6 */
+ 5897 "00000000" // /* MW 5 */
+ 5898 "00000101" // /* MW 4 */
+ 5899 "00100011" // /* MW 3 */
+ 5900 "11110111" // /* MW 2 */
+ 5901 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32 first
+ 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5903 "00001001" // /* MW 11 */
+ 5904 "01000110" // /* MW 10 */
+ 5905 "10100010" // /* MW 9 */
+ 5906 "11100110" // /* MW 8 */
+ 5907 "10100000" // /* MW 7 */
+ 5908 "10000010" // /* MW 6 */
+ 5909 "00100101" // /* MW 5 */
+ 5910 "11010111" // /* MW 4 */
+ 5911 "01101110" // /* MW 3 */
+ 5912 "10001001" // /* MW 2 */
+ 5913 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+ 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5915 "01110000" // /* MW 7 */
+ 5916 "10000000" // /* MW 6 */
+ 5917 "11000101" // /* MW 5 */
+ 5918 "00000011" // /* MW 4 */
+ 5919 "01100000" // /* MW 3 */
+ 5920 "10001001" // /* MW 2 */
+ 5921 "01100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5923 "01110000" // /* MW 7 */
+ 5924 "00000000" // /* MW 6 */
+ 5925 "10000001" // /* MW 5 */
+ 5926 "00000001" // /* MW 4 */
+ 5927 "01100000" // /* MW 3 */
+ 5928 "01000001" // /* MW 2 */
+ 5929 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5931 "01110000" // /* MW 7 */
+ 5932 "01010000" // /* MW 6 */
+ 5933 "10000111" // /* MW 5 */
+ 5934 "00000000" // /* MW 4 */
+ 5935 "11000000" // /* MW 3 */
+ 5936 "00010010" // /* MW 2 */
+ 5937 "10110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5939 "01110000" // /* MW 7 */
+ 5940 "10010000" // /* MW 6 */
+ 5941 "11000111" // /* MW 5 */
+ 5942 "00000010" // /* MW 4 */
+ 5943 "11000000" // /* MW 3 */
+ 5944 "00000010" // /* MW 2 */
+ 5945 "10100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5947 "01110110" // /* MW 9 */
+ 5948 "01100000" // /* MW 8 */
+ 5949 "11001000" // /* MW 7 */
+ 5950 "00000001" // /* MW 6 */
+ 5951 "10010000" // /* MW 5 */
+ 5952 "00111011" // /* MW 4 */
+ 5953 "01100001" // /* MW 3 */
+ 5954 "10010001" // /* MW 2 */
+ 5955 "00010011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5957 "01110000" // /* MW 7 */
+ 5958 "00000000" // /* MW 6 */
+ 5959 "10000011" // /* MW 5 */
+ 5960 "00000000" // /* MW 4 */
+ 5961 "11000000" // /* MW 3 */
+ 5962 "00001010" // /* MW 2 */
+ 5963 "01100010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1218 20 first
+.src_ref 2 "conv2d_bf16.h" 1287 37 first
+ 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */
+ 5965 "01100000" // /* MW 11 */
+ 5966 "00000000" // /* MW 10 */
+ 5967 "00000000" // /* MW 9 */
+ 5968 "11111010" // /* MW 8 */
+ 5969 "00000010" // /* MW 7 */
+ 5970 "00100100" // /* MW 6 */
+ 5971 "00100000" // /* MW 5 */
+ 5972 "01010111" // /* MW 4 */
+ 5973 "11000000" // /* MW 3 */
+ 5974 "00100010" // /* MW 2 */
+ 5975 "01010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 738 8
+.delay_slot
+ 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5977 "01110000" // /* MW 7 */
+ 5978 "01100000" // /* MW 6 */
+ 5979 "10101001" // /* MW 5 */
+ 5980 "00000000" // /* MW 4 */
+ 5981 "11000000" // /* MW 3 */
+ 5982 "00011010" // /* MW 2 */
+ 5983 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5985 "01110000" // /* MW 7 */
+ 5986 "11000000" // /* MW 6 */
+ 5987 "10100111" // /* MW 5 */
+ 5988 "00000011" // /* MW 4 */
+ 5989 "11000000" // /* MW 3 */
+ 5990 "00110010" // /* MW 2 */
+ 5991 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5993 "01110110" // /* MW 9 */
+ 5994 "01100000" // /* MW 8 */
+ 5995 "10110101" // /* MW 7 */
+ 5996 "00000000" // /* MW 6 */
+ 5997 "10010000" // /* MW 5 */
+ 5998 "00101011" // /* MW 4 */
+ 5999 "11000101" // /* MW 3 */
+ 6000 "00111010" // /* MW 2 */
+ 6001 "00010010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.delay_slot
+ 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6003 "01110000" // /* MW 7 */
+ 6004 "10000000" // /* MW 6 */
+ 6005 "11000010" // /* MW 5 */
+ 6006 "00000010" // /* MW 4 */
+ 6007 "11000000" // /* MW 3 */
+ 6008 "00101010" // /* MW 2 */
+ 6009 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.delay_slot
+ 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6011 "01110000" // /* MW 7 */
+ 6012 "11000000" // /* MW 6 */
+ 6013 "01001101" // /* MW 5 */
+ 6014 "00000000" // /* MW 4 */
+ 6015 "01100000" // /* MW 3 */
+ 6016 "10001001" // /* MW 2 */
+ 6017 "11100001" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6019 "11101100" // /* MW 3 */
+ 6020 "11011100" // /* MW 2 */
+ 6021 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6023 "11101100" // /* MW 3 */
+ 6024 "10111100" // /* MW 2 */
+ 6025 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6027 "01110000" // /* MW 7 */
+ 6028 "01110110" // /* MW 6 */
+ 6029 "10101010" // /* MW 5 */
+ 6030 "00000010" // /* MW 4 */
+ 6031 "01100000" // /* MW 3 */
+ 6032 "01011010" // /* MW 2 */
+ 6033 "00111100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6035 "01110000" // /* MW 7 */
+ 6036 "01110110" // /* MW 6 */
+ 6037 "11011010" // /* MW 5 */
+ 6038 "00000001" // /* MW 4 */
+ 6039 "01100000" // /* MW 3 */
+ 6040 "10111010" // /* MW 2 */
+ 6041 "10100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */
+ 6043 "00100001" // /* MW 9 */
+ 6044 "00000000" // /* MW 8 */
+ 6045 "00000000" // /* MW 7 */
+ 6046 "11111110" // /* MW 6 */
+ 6047 "00000010" // /* MW 5 */
+ 6048 "00000000" // /* MW 4 */
+ 6049 "01100000" // /* MW 3 */
+ 6050 "11010010" // /* MW 2 */
+ 6051 "00100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6053 "01110000" // /* MW 7 */
+ 6054 "01110110" // /* MW 6 */
+ 6055 "10100010" // /* MW 5 */
+ 6056 "00000010" // /* MW 4 */
+ 6057 "01100000" // /* MW 3 */
+ 6058 "10111010" // /* MW 2 */
+ 6059 "00100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6061 "11101100" // /* MW 3 */
+ 6062 "10001100" // /* MW 2 */
+ 6063 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6065 "01110000" // /* MW 7 */
+ 6066 "01110110" // /* MW 6 */
+ 6067 "10010110" // /* MW 5 */
+ 6068 "00000010" // /* MW 4 */
+ 6069 "01100000" // /* MW 3 */
+ 6070 "11010010" // /* MW 2 */
+ 6071 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6073 "01110000" // /* MW 7 */
+ 6074 "01110110" // /* MW 6 */
+ 6075 "10001010" // /* MW 5 */
+ 6076 "00000000" // /* MW 4 */
+ 6077 "01100000" // /* MW 3 */
+ 6078 "10111010" // /* MW 2 */
+ 6079 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6081 "00000000" // /* MW 15 */
+ 6082 "00000000" // /* MW 14 */
+ 6083 "01111000" // /* MW 13 */
+ 6084 "10100101" // /* MW 12 */
+ 6085 "00000001" // /* MW 11 */
+ 6086 "00000000" // /* MW 10 */
+ 6087 "00000000" // /* MW 9 */
+ 6088 "00000000" // /* MW 8 */
+ 6089 "10010011" // /* MW 7 */
+ 6090 "10100010" // /* MW 6 */
+ 6091 "00100100" // /* MW 5 */
+ 6092 "00000000" // /* MW 4 */
+ 6093 "11110000" // /* MW 3 */
+ 6094 "00101100" // /* MW 2 */
+ 6095 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632
+.src_ref 4 "vector.hpp" 1152 43
+ 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6097 "10100011" // /* MW 3 */
+ 6098 "11100000" // /* MW 2 */
+ 6099 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6101 "11100011" // /* MW 3 */
+ 6102 "00010100" // /* MW 2 */
+ 6103 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6105 "00100011" // /* MW 3 */
+ 6106 "00000100" // /* MW 2 */
+ 6107 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6109 "01100011" // /* MW 3 */
+ 6110 "00010100" // /* MW 2 */
+ 6111 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6113 "00010011" // /* MW 3 */
+ 6114 "00000110" // /* MW 2 */
+ 6115 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6117 "11100011" // /* MW 3 */
+ 6118 "00010101" // /* MW 2 */
+ 6119 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6121 "01110000" // /* MW 7 */
+ 6122 "10100101" // /* MW 6 */
+ 6123 "00000001" // /* MW 5 */
+ 6124 "00000000" // /* MW 4 */
+ 6125 "01100000" // /* MW 3 */
+ 6126 "00100100" // /* MW 2 */
+ 6127 "10010100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1143 12 first
+ 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6129 "01011000" // /* MW 11 */
+ 6130 "00000000" // /* MW 10 */
+ 6131 "01000000" // /* MW 9 */
+ 6132 "00000001" // /* MW 8 */
+ 6133 "00110101" // /* MW 7 */
+ 6134 "00000110" // /* MW 6 */
+ 6135 "00100000" // /* MW 5 */
+ 6136 "01010111" // /* MW 4 */
+ 6137 "01101111" // /* MW 3 */
+ 6138 "10010010" // /* MW 2 */
+ 6139 "11100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.delay_slot
+ 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6141 "10000000" // /* MW 3 */
+ 6142 "01000100" // /* MW 2 */
+ 6143 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.delay_slot
+ 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6145 "10100000" // /* MW 3 */
+ 6146 "01001001" // /* MW 2 */
+ 6147 "00011010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.delay_slot
+ 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6149 "00000001" // /* MW 5 */
+ 6150 "00011110" // /* MW 4 */
+ 6151 "00000101" // /* MW 3 */
+ 6152 "01110010" // /* MW 2 */
+ 6153 "11101011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.delay_slot
+ 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6155 "10000000" // /* MW 3 */
+ 6156 "01001110" // /* MW 2 */
+ 6157 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6159 "00000000" // /* MW 1 */
+.loop_nesting 0
+ 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */
+ 6161 "00000000" // /* MW 5 */
+ 6162 "00000000" // /* MW 4 */
+ 6163 "01011000" // /* MW 3 */
+ 6164 "00001101" // /* MW 2 */
+ 6165 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6167 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6169 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6171 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6173 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6175 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 1364 80
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6177 "01111000" // /* MW 11 */
+ 6178 "10010000" // /* MW 10 */
+ 6179 "10110011" // /* MW 9 */
+ 6180 "00001000" // /* MW 8 */
+ 6181 "11100001" // /* MW 7 */
+ 6182 "00000100" // /* MW 6 */
+ 6183 "10001011" // /* MW 5 */
+ 6184 "00001100" // /* MW 4 */
+ 6185 "00100010" // /* MW 3 */
+ 6186 "01111110" // /* MW 2 */
+ 6187 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1369 80
+ 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6189 "01111000" // /* MW 11 */
+ 6190 "01000000" // /* MW 10 */
+ 6191 "01100010" // /* MW 9 */
+ 6192 "00000011" // /* MW 8 */
+ 6193 "11010100" // /* MW 7 */
+ 6194 "00011011" // /* MW 6 */
+ 6195 "00001011" // /* MW 5 */
+ 6196 "01010110" // /* MW 4 */
+ 6197 "10000010" // /* MW 3 */
+ 6198 "10010000" // /* MW 2 */
+ 6199 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 807 26
+.src_ref 2 "conv2d_bf16.h" 808 26
+.src_ref 2 "conv2d_bf16.h" 809 26
+.src_ref 2 "conv2d_bf16.h" 810 26
+.src_ref 2 "conv2d_bf16.h" 1436 26
+.src_ref 2 "conv2d_bf16.h" 1437 26
+.src_ref 2 "conv2d_bf16.h" 1438 26
+.src_ref 2 "conv2d_bf16.h" 1439 26
+ 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6201 "01111000" // /* MW 9 */
+ 6202 "11010000" // /* MW 8 */
+ 6203 "00000101" // /* MW 7 */
+ 6204 "10001001" // /* MW 6 */
+ 6205 "00110001" // /* MW 5 */
+ 6206 "00011001" // /* MW 4 */
+ 6207 "00000000" // /* MW 3 */
+ 6208 "10010100" // /* MW 2 */
+ 6209 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 802 83
+.src_ref 2 "conv2d_bf16.h" 1428 39
+ 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6211 "01111000" // /* MW 11 */
+ 6212 "10010000" // /* MW 10 */
+ 6213 "11000111" // /* MW 9 */
+ 6214 "11001010" // /* MW 8 */
+ 6215 "00100000" // /* MW 7 */
+ 6216 "00000001" // /* MW 6 */
+ 6217 "00001011" // /* MW 5 */
+ 6218 "01011100" // /* MW 4 */
+ 6219 "10000110" // /* MW 3 */
+ 6220 "10011000" // /* MW 2 */
+ 6221 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 794 8
+ 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6223 "01111000" // /* MW 11 */
+ 6224 "01010000" // /* MW 10 */
+ 6225 "10000111" // /* MW 9 */
+ 6226 "00001000" // /* MW 8 */
+ 6227 "10010000" // /* MW 7 */
+ 6228 "00000001" // /* MW 6 */
+ 6229 "00001011" // /* MW 5 */
+ 6230 "00000010" // /* MW 4 */
+ 6231 "00100101" // /* MW 3 */
+ 6232 "10000011" // /* MW 2 */
+ 6233 "11111010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 794 8
+.src_ref 2 "conv2d_bf16.h" 1455 20
+ 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6235 "01111000" // /* MW 9 */
+ 6236 "01010000" // /* MW 8 */
+ 6237 "01000101" // /* MW 7 */
+ 6238 "00001011" // /* MW 6 */
+ 6239 "10000000" // /* MW 5 */
+ 6240 "00000001" // /* MW 4 */
+ 6241 "00100000" // /* MW 3 */
+ 6242 "11010110" // /* MW 2 */
+ 6243 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 12
+ 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6245 "00010000" // /* MW 9 */
+ 6246 "01011000" // /* MW 8 */
+ 6247 "00110100" // /* MW 7 */
+ 6248 "00000101" // /* MW 6 */
+ 6249 "00000000" // /* MW 5 */
+ 6250 "00000000" // /* MW 4 */
+ 6251 "00100000" // /* MW 3 */
+ 6252 "00110110" // /* MW 2 */
+ 6253 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80 first
+.src_ref 2 "conv2d_bf16.h" 1873
+ 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6255 "01110010" // /* MW 5 */
+ 6256 "11011111" // /* MW 4 */
+ 6257 "00100110" // /* MW 3 */
+ 6258 "10000111" // /* MW 2 */
+ 6259 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6261 "11000110" // /* MW 3 */
+ 6262 "00011101" // /* MW 2 */
+ 6263 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 80 first
+ 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6265 "00000110" // /* MW 3 */
+ 6266 "10001010" // /* MW 2 */
+ 6267 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 799 87 first
+ 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6269 "10000110" // /* MW 3 */
+ 6270 "10011110" // /* MW 2 */
+ 6271 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 83 first
+ 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6273 "11010110" // /* MW 3 */
+ 6274 "00011110" // /* MW 2 */
+ 6275 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 83 first
+ 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6277 "11110110" // /* MW 3 */
+ 6278 "11001010" // /* MW 2 */
+ 6279 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 66 first
+ 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6281 "10110110" // /* MW 3 */
+ 6282 "00010111" // /* MW 2 */
+ 6283 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1443 71 first
+ 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6285 "10010110" // /* MW 3 */
+ 6286 "00000111" // /* MW 2 */
+ 6287 "00000011" // /* MW 1 */
+ 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6289 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1369 89
+ 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6291 "00000000" // /* MW 3 */
+ 6292 "10011000" // /* MW 2 */
+ 6293 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+.src_ref 2 "conv2d_bf16.h" 1518 37
+ 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6295 "00000000" // /* MW 3 */
+ 6296 "00000111" // /* MW 2 */
+ 6297 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+ 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6299 "00000000" // /* MW 3 */
+ 6300 "11011100" // /* MW 2 */
+ 6301 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89 first
+ 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6303 "11100000" // /* MW 3 */
+ 6304 "00001111" // /* MW 2 */
+ 6305 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 89 first
+ 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6307 "11000000" // /* MW 5 */
+ 6308 "00011110" // /* MW 4 */
+ 6309 "11101110" // /* MW 3 */
+ 6310 "01111111" // /* MW 2 */
+ 6311 "11101111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+ 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6313 "01110000" // /* MW 7 */
+ 6314 "10010000" // /* MW 6 */
+ 6315 "11000111" // /* MW 5 */
+ 6316 "00000011" // /* MW 4 */
+ 6317 "01100000" // /* MW 3 */
+ 6318 "00101011" // /* MW 2 */
+ 6319 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1362 31 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+.loop_nesting 1
+ 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6321 "01100000" // /* MW 13 */
+ 6322 "10000001" // /* MW 12 */
+ 6323 "01110001" // /* MW 11 */
+ 6324 "00000010" // /* MW 10 */
+ 6325 "10010110" // /* MW 9 */
+ 6326 "10001111" // /* MW 8 */
+ 6327 "00000000" // /* MW 7 */
+ 6328 "00000000" // /* MW 6 */
+ 6329 "00101000" // /* MW 5 */
+ 6330 "00101000" // /* MW 4 */
+ 6331 "01111010" // /* MW 3 */
+ 6332 "10000101" // /* MW 2 */
+ 6333 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1364 31 first
+.src_ref 2 "conv2d_bf16.h" 1443 16
+ 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6335 "00010000" // /* MW 11 */
+ 6336 "11001000" // /* MW 10 */
+ 6337 "10111100" // /* MW 9 */
+ 6338 "00000101" // /* MW 8 */
+ 6339 "00000000" // /* MW 7 */
+ 6340 "00000000" // /* MW 6 */
+ 6341 "00101000" // /* MW 5 */
+ 6342 "00101000" // /* MW 4 */
+ 6343 "01111010" // /* MW 3 */
+ 6344 "00001101" // /* MW 2 */
+ 6345 "11001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1428 39 first
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+ 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6347 "01001000" // /* MW 11 */
+ 6348 "00111111" // /* MW 10 */
+ 6349 "10111111" // /* MW 9 */
+ 6350 "01101110" // /* MW 8 */
+ 6351 "11101001" // /* MW 7 */
+ 6352 "00000101" // /* MW 6 */
+ 6353 "00101000" // /* MW 5 */
+ 6354 "00000101" // /* MW 4 */
+ 6355 "01110110" // /* MW 3 */
+ 6356 "10000001" // /* MW 2 */
+ 6357 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6359 "01111110" // /* MW 9 */
+ 6360 "10010000" // /* MW 8 */
+ 6361 "01000111" // /* MW 7 */
+ 6362 "00000001" // /* MW 6 */
+ 6363 "00010100" // /* MW 5 */
+ 6364 "00000001" // /* MW 4 */
+ 6365 "01110011" // /* MW 3 */
+ 6366 "01011001" // /* MW 2 */
+ 6367 "01010101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1367 31 first
+ 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6369 "00101000" // /* MW 5 */
+ 6370 "00000001" // /* MW 4 */
+ 6371 "01110110" // /* MW 3 */
+ 6372 "10010101" // /* MW 2 */
+ 6373 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1369 31 first
+ 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6375 "10101000" // /* MW 5 */
+ 6376 "00100001" // /* MW 4 */
+ 6377 "01111010" // /* MW 3 */
+ 6378 "00011101" // /* MW 2 */
+ 6379 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1372 31 first
+ 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6381 "00101000" // /* MW 5 */
+ 6382 "00101000" // /* MW 4 */
+ 6383 "01111010" // /* MW 3 */
+ 6384 "10100101" // /* MW 2 */
+ 6385 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1374 31 first
+ 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6387 "00101000" // /* MW 5 */
+ 6388 "00101000" // /* MW 4 */
+ 6389 "01111010" // /* MW 3 */
+ 6390 "00101101" // /* MW 2 */
+ 6391 "11001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1377 31 first
+ 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6393 "10101000" // /* MW 5 */
+ 6394 "00000000" // /* MW 4 */
+ 6395 "01110110" // /* MW 3 */
+ 6396 "10110101" // /* MW 2 */
+ 6397 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1379 31 first
+ 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6399 "00101000" // /* MW 5 */
+ 6400 "00000011" // /* MW 4 */
+ 6401 "01110110" // /* MW 3 */
+ 6402 "00111101" // /* MW 2 */
+ 6403 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50 first
+ 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6405 "10101000" // /* MW 5 */
+ 6406 "00000011" // /* MW 4 */
+ 6407 "01110110" // /* MW 3 */
+ 6408 "01000101" // /* MW 2 */
+ 6409 "01101000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6411 "11101110" // /* MW 9 */
+ 6412 "00101101" // /* MW 8 */
+ 6413 "01101001" // /* MW 7 */
+ 6414 "00000001" // /* MW 6 */
+ 6415 "00010100" // /* MW 5 */
+ 6416 "00010010" // /* MW 4 */
+ 6417 "01110101" // /* MW 3 */
+ 6418 "01001101" // /* MW 2 */
+ 6419 "01101000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6421 "11101110" // /* MW 9 */
+ 6422 "00101111" // /* MW 8 */
+ 6423 "10101001" // /* MW 7 */
+ 6424 "00000010" // /* MW 6 */
+ 6425 "00010100" // /* MW 5 */
+ 6426 "00010100" // /* MW 4 */
+ 6427 "01110101" // /* MW 3 */
+ 6428 "10000001" // /* MW 2 */
+ 6429 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6431 "01101001" // /* MW 11 */
+ 6432 "00001011" // /* MW 10 */
+ 6433 "01001000" // /* MW 9 */
+ 6434 "11000010" // /* MW 8 */
+ 6435 "11011011" // /* MW 7 */
+ 6436 "00010001" // /* MW 6 */
+ 6437 "00101010" // /* MW 5 */
+ 6438 "00101000" // /* MW 4 */
+ 6439 "01111010" // /* MW 3 */
+ 6440 "00000001" // /* MW 2 */
+ 6441 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6443 "01101001" // /* MW 9 */
+ 6444 "00110101" // /* MW 8 */
+ 6445 "01001001" // /* MW 7 */
+ 6446 "11000010" // /* MW 6 */
+ 6447 "11011111" // /* MW 5 */
+ 6448 "00010001" // /* MW 4 */
+ 6449 "01110101" // /* MW 3 */
+ 6450 "10000001" // /* MW 2 */
+ 6451 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6453 "01101001" // /* MW 3 */
+ 6454 "01001001" // /* MW 2 */
+ 6455 "01001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6457 "01101001" // /* MW 3 */
+ 6458 "01110101" // /* MW 2 */
+ 6459 "01001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.src_ref 2 "conv2d_bf16.h" 1437 26 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6461 "00111101" // /* MW 9 */
+ 6462 "10000100" // /* MW 8 */
+ 6463 "10100001" // /* MW 7 */
+ 6464 "11000110" // /* MW 6 */
+ 6465 "01011111" // /* MW 5 */
+ 6466 "10001011" // /* MW 4 */
+ 6467 "10101010" // /* MW 3 */
+ 6468 "00000000" // /* MW 2 */
+ 6469 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1436 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6471 "00111101" // /* MW 7 */
+ 6472 "10000000" // /* MW 6 */
+ 6473 "10100000" // /* MW 5 */
+ 6474 "00000000" // /* MW 4 */
+ 6475 "10010100" // /* MW 3 */
+ 6476 "00000001" // /* MW 2 */
+ 6477 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1438 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6479 "00111101" // /* MW 7 */
+ 6480 "10001000" // /* MW 6 */
+ 6481 "10100010" // /* MW 5 */
+ 6482 "00000000" // /* MW 4 */
+ 6483 "11010100" // /* MW 3 */
+ 6484 "00000001" // /* MW 2 */
+ 6485 "00000011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1439 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6487 "00111101" // /* MW 9 */
+ 6488 "10001100" // /* MW 8 */
+ 6489 "10100011" // /* MW 7 */
+ 6490 "00011101" // /* MW 6 */
+ 6491 "00010100" // /* MW 5 */
+ 6492 "00010010" // /* MW 4 */
+ 6493 "01110101" // /* MW 3 */
+ 6494 "00000001" // /* MW 2 */
+ 6495 "01010101" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.begin_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6497 "10110111" // /* MW 5 */
+ 6498 "00010110" // /* MW 4 */
+ 6499 "10000010" // /* MW 3 */
+ 6500 "10000010" // /* MW 2 */
+ 6501 "10100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6503 "00001001" // /* MW 9 */
+ 6504 "00101010" // /* MW 8 */
+ 6505 "10011001" // /* MW 7 */
+ 6506 "11000110" // /* MW 6 */
+ 6507 "01011111" // /* MW 5 */
+ 6508 "00111100" // /* MW 4 */
+ 6509 "00101010" // /* MW 3 */
+ 6510 "00101000" // /* MW 2 */
+ 6511 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6513 "00001001" // /* MW 9 */
+ 6514 "00000100" // /* MW 8 */
+ 6515 "10011000" // /* MW 7 */
+ 6516 "11000110" // /* MW 6 */
+ 6517 "01011011" // /* MW 5 */
+ 6518 "10111100" // /* MW 4 */
+ 6519 "10101001" // /* MW 3 */
+ 6520 "00000000" // /* MW 2 */
+ 6521 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6523 "00001001" // /* MW 7 */
+ 6524 "01101000" // /* MW 6 */
+ 6525 "10011011" // /* MW 5 */
+ 6526 "00000000" // /* MW 4 */
+ 6527 "10010100" // /* MW 3 */
+ 6528 "00000001" // /* MW 2 */
+ 6529 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6531 "00001001" // /* MW 13 */
+ 6532 "01000110" // /* MW 12 */
+ 6533 "10011010" // /* MW 11 */
+ 6534 "01101100" // /* MW 10 */
+ 6535 "00000101" // /* MW 9 */
+ 6536 "00000000" // /* MW 8 */
+ 6537 "00000000" // /* MW 7 */
+ 6538 "00000000" // /* MW 6 */
+ 6539 "10101000" // /* MW 5 */
+ 6540 "00000011" // /* MW 4 */
+ 6541 "01110110" // /* MW 3 */
+ 6542 "10000001" // /* MW 2 */
+ 6543 "00000010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.end_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6545 "00000000" // /* MW 15 */
+ 6546 "00000000" // /* MW 14 */
+ 6547 "11101000" // /* MW 13 */
+ 6548 "10101111" // /* MW 12 */
+ 6549 "01000101" // /* MW 11 */
+ 6550 "00000001" // /* MW 10 */
+ 6551 "00000000" // /* MW 9 */
+ 6552 "00000000" // /* MW 8 */
+ 6553 "01011011" // /* MW 7 */
+ 6554 "00000001" // /* MW 6 */
+ 6555 "00101000" // /* MW 5 */
+ 6556 "00100100" // /* MW 4 */
+ 6557 "01111010" // /* MW 3 */
+ 6558 "00000001" // /* MW 2 */
+ 6559 "01010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6561 "11100000" // /* MW 11 */
+ 6562 "10101101" // /* MW 10 */
+ 6563 "10000101" // /* MW 9 */
+ 6564 "00000000" // /* MW 8 */
+ 6565 "10001011" // /* MW 7 */
+ 6566 "10011100" // /* MW 6 */
+ 6567 "00100101" // /* MW 5 */
+ 6568 "10010111" // /* MW 4 */
+ 6569 "11111111" // /* MW 3 */
+ 6570 "00001100" // /* MW 2 */
+ 6571 "00000111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.src_ref 2 "conv2d_bf16.h" 1517 32 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6573 "00001001" // /* MW 11 */
+ 6574 "00101010" // /* MW 10 */
+ 6575 "10011001" // /* MW 9 */
+ 6576 "11000110" // /* MW 8 */
+ 6577 "01011111" // /* MW 7 */
+ 6578 "00111100" // /* MW 6 */
+ 6579 "00100010" // /* MW 5 */
+ 6580 "00010111" // /* MW 4 */
+ 6581 "01101111" // /* MW 3 */
+ 6582 "10010001" // /* MW 2 */
+ 6583 "10010011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.src_ref 2 "conv2d_bf16.h" 1518 37 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6585 "00001001" // /* MW 11 */
+ 6586 "00000100" // /* MW 10 */
+ 6587 "10011000" // /* MW 9 */
+ 6588 "11000110" // /* MW 8 */
+ 6589 "01011011" // /* MW 7 */
+ 6590 "10111100" // /* MW 6 */
+ 6591 "00100001" // /* MW 5 */
+ 6592 "10010111" // /* MW 4 */
+ 6593 "01101111" // /* MW 3 */
+ 6594 "10010001" // /* MW 2 */
+ 6595 "01110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6597 "00001001" // /* MW 7 */
+ 6598 "01101000" // /* MW 6 */
+ 6599 "10011011" // /* MW 5 */
+ 6600 "11100110" // /* MW 4 */
+ 6601 "10100000" // /* MW 3 */
+ 6602 "10001000" // /* MW 2 */
+ 6603 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.src_ref 2 "conv2d_bf16.h" 1428 39
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6605 "00001001" // /* MW 9 */
+ 6606 "01000110" // /* MW 8 */
+ 6607 "10011010" // /* MW 7 */
+ 6608 "11100110" // /* MW 6 */
+ 6609 "10000000" // /* MW 5 */
+ 6610 "10011011" // /* MW 4 */
+ 6611 "00100000" // /* MW 3 */
+ 6612 "10110111" // /* MW 2 */
+ 6613 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+ 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6615 "01011011" // /* MW 3 */
+ 6616 "00001011" // /* MW 2 */
+ 6617 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6619 "01011111" // /* MW 3 */
+ 6620 "10001011" // /* MW 2 */
+ 6621 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6623 "00001001" // /* MW 7 */
+ 6624 "00000100" // /* MW 6 */
+ 6625 "10011000" // /* MW 5 */
+ 6626 "11000110" // /* MW 4 */
+ 6627 "01011011" // /* MW 3 */
+ 6628 "10111100" // /* MW 2 */
+ 6629 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6631 "00001001" // /* MW 7 */
+ 6632 "00101010" // /* MW 6 */
+ 6633 "10011001" // /* MW 5 */
+ 6634 "11000110" // /* MW 4 */
+ 6635 "01011111" // /* MW 3 */
+ 6636 "00111100" // /* MW 2 */
+ 6637 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6639 "00001001" // /* MW 3 */
+ 6640 "01000110" // /* MW 2 */
+ 6641 "10011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+ 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6643 "00001001" // /* MW 3 */
+ 6644 "01101000" // /* MW 2 */
+ 6645 "10011011" // /* MW 1 */
+ 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6647 "00000000" // /* MW 1 */
+ 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6649 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6651 "00010110" // /* MW 3 */
+ 6652 "00010000" // /* MW 2 */
+ 6653 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6655 "10010110" // /* MW 3 */
+ 6656 "10010000" // /* MW 2 */
+ 6657 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1455 20 first
+ 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */
+ 6659 "01100001" // /* MW 9 */
+ 6660 "00000000" // /* MW 8 */
+ 6661 "00000000" // /* MW 7 */
+ 6662 "01001110" // /* MW 6 */
+ 6663 "00000011" // /* MW 5 */
+ 6664 "00101010" // /* MW 4 */
+ 6665 "11000000" // /* MW 3 */
+ 6666 "00011010" // /* MW 2 */
+ 6667 "00010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.delay_slot
+ 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6669 "01010110" // /* MW 3 */
+ 6670 "00010000" // /* MW 2 */
+ 6671 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6673 "10010110" // /* MW 3 */
+ 6674 "00010001" // /* MW 2 */
+ 6675 "00001001" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6677 "11010110" // /* MW 3 */
+ 6678 "10010001" // /* MW 2 */
+ 6679 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6681 "00010110" // /* MW 3 */
+ 6682 "10010001" // /* MW 2 */
+ 6683 "00001010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6685 "01010110" // /* MW 3 */
+ 6686 "00010001" // /* MW 2 */
+ 6687 "00001100" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6689 "11101100" // /* MW 3 */
+ 6690 "11011100" // /* MW 2 */
+ 6691 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6693 "11101100" // /* MW 3 */
+ 6694 "10001100" // /* MW 2 */
+ 6695 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6697 "01110000" // /* MW 7 */
+ 6698 "01110110" // /* MW 6 */
+ 6699 "10101010" // /* MW 5 */
+ 6700 "00000010" // /* MW 4 */
+ 6701 "01100000" // /* MW 3 */
+ 6702 "01011010" // /* MW 2 */
+ 6703 "10101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6705 "01110000" // /* MW 7 */
+ 6706 "01110110" // /* MW 6 */
+ 6707 "01011010" // /* MW 5 */
+ 6708 "00000000" // /* MW 4 */
+ 6709 "01100000" // /* MW 3 */
+ 6710 "10001010" // /* MW 2 */
+ 6711 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */
+ 6713 "00100001" // /* MW 9 */
+ 6714 "00000000" // /* MW 8 */
+ 6715 "00000000" // /* MW 7 */
+ 6716 "01010010" // /* MW 6 */
+ 6717 "00000011" // /* MW 5 */
+ 6718 "00000000" // /* MW 4 */
+ 6719 "01100000" // /* MW 3 */
+ 6720 "11010010" // /* MW 2 */
+ 6721 "10100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6723 "01110000" // /* MW 7 */
+ 6724 "01110110" // /* MW 6 */
+ 6725 "10001010" // /* MW 5 */
+ 6726 "00000010" // /* MW 4 */
+ 6727 "01100000" // /* MW 3 */
+ 6728 "10001010" // /* MW 2 */
+ 6729 "10100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6731 "11101100" // /* MW 3 */
+ 6732 "10111100" // /* MW 2 */
+ 6733 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6735 "01110000" // /* MW 7 */
+ 6736 "01110110" // /* MW 6 */
+ 6737 "10010110" // /* MW 5 */
+ 6738 "00000010" // /* MW 4 */
+ 6739 "01100000" // /* MW 3 */
+ 6740 "01010010" // /* MW 2 */
+ 6741 "01101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6743 "01110010" // /* MW 9 */
+ 6744 "01110110" // /* MW 8 */
+ 6745 "00100010" // /* MW 7 */
+ 6746 "00000010" // /* MW 6 */
+ 6747 "01010011" // /* MW 5 */
+ 6748 "00010100" // /* MW 4 */
+ 6749 "11110111" // /* MW 3 */
+ 6750 "00101100" // /* MW 2 */
+ 6751 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6753 "00000000" // /* MW 15 */
+ 6754 "00000000" // /* MW 14 */
+ 6755 "01111000" // /* MW 13 */
+ 6756 "10100101" // /* MW 12 */
+ 6757 "00000001" // /* MW 11 */
+ 6758 "00000000" // /* MW 10 */
+ 6759 "00000000" // /* MW 9 */
+ 6760 "00000000" // /* MW 8 */
+ 6761 "10010011" // /* MW 7 */
+ 6762 "11100010" // /* MW 6 */
+ 6763 "00100100" // /* MW 5 */
+ 6764 "00000000" // /* MW 4 */
+ 6765 "11110000" // /* MW 3 */
+ 6766 "00101100" // /* MW 2 */
+ 6767 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304
+.src_ref 4 "vector.hpp" 1152 43
+ 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6769 "10100011" // /* MW 3 */
+ 6770 "01100000" // /* MW 2 */
+ 6771 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6773 "11100011" // /* MW 3 */
+ 6774 "00010100" // /* MW 2 */
+ 6775 "00001100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6777 "00100011" // /* MW 3 */
+ 6778 "00000100" // /* MW 2 */
+ 6779 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6781 "01100011" // /* MW 3 */
+ 6782 "00010100" // /* MW 2 */
+ 6783 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6785 "10100011" // /* MW 3 */
+ 6786 "01100001" // /* MW 2 */
+ 6787 "00001011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6789 "11100011" // /* MW 3 */
+ 6790 "00010101" // /* MW 2 */
+ 6791 "00001111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6793 "01110000" // /* MW 7 */
+ 6794 "10100101" // /* MW 6 */
+ 6795 "00000001" // /* MW 5 */
+ 6796 "00000000" // /* MW 4 */
+ 6797 "01100000" // /* MW 3 */
+ 6798 "00100100" // /* MW 2 */
+ 6799 "10011100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1337 12 first
+ 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6801 "01000000" // /* MW 5 */
+ 6802 "11110101" // /* MW 4 */
+ 6803 "01101110" // /* MW 3 */
+ 6804 "11000010" // /* MW 2 */
+ 6805 "01100010" // /* MW 1 */
+.delay_slot
+ 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6807 "10010000" // /* MW 3 */
+ 6808 "10001011" // /* MW 2 */
+ 6809 "00111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6811 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6813 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6815 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6817 "00000000" // /* MW 15 */
+ 6818 "00000000" // /* MW 14 */
+ 6819 "01111000" // /* MW 13 */
+ 6820 "10100101" // /* MW 12 */
+ 6821 "00000001" // /* MW 11 */
+ 6822 "00000000" // /* MW 10 */
+ 6823 "00000000" // /* MW 9 */
+ 6824 "00000000" // /* MW 8 */
+ 6825 "01011011" // /* MW 7 */
+ 6826 "00000001" // /* MW 6 */
+ 6827 "00100000" // /* MW 5 */
+ 6828 "00000000" // /* MW 4 */
+ 6829 "11110000" // /* MW 3 */
+ 6830 "00101100" // /* MW 2 */
+ 6831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368
+.loop_nesting 0
+ 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6833 "11110001" // /* MW 3 */
+ 6834 "11101101" // /* MW 2 */
+ 6835 "00000111" // /* MW 1 */
+ 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6837 "10010001" // /* MW 3 */
+ 6838 "11110001" // /* MW 2 */
+ 6839 "00000111" // /* MW 1 */
+ 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6841 "00110001" // /* MW 3 */
+ 6842 "11110101" // /* MW 2 */
+ 6843 "00000111" // /* MW 1 */
+ 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6845 "00011001" // /* MW 3 */
+ 6846 "11101011" // /* MW 2 */
+ 6847 "00000111" // /* MW 1 */
+ 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6849 "10011001" // /* MW 3 */
+ 6850 "11111011" // /* MW 2 */
+ 6851 "00000111" // /* MW 1 */
+ 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6853 "11010001" // /* MW 3 */
+ 6854 "11111101" // /* MW 2 */
+ 6855 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873 first
+ 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 6857 "00000000" // /* MW 3 */
+ 6858 "00101000" // /* MW 2 */
+ 6859 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873
+.delay_slot
+ 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6861 "00000001" // /* MW 5 */
+ 6862 "00000000" // /* MW 4 */
+ 6863 "00000000" // /* MW 3 */
+ 6864 "11110000" // /* MW 2 */
+ 6865 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6869 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6871 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0
+ 6873 "00000000" // /* MW 1 */
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 74 first
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 81 4
+.function_start
+ 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6881 "00010000" // /* MW 9 */
+ 6882 "00100000" // /* MW 8 */
+ 6883 "00110010" // /* MW 7 */
+ 6884 "11110010" // /* MW 6 */
+ 6885 "00000001" // /* MW 5 */
+ 6886 "00000000" // /* MW 4 */
+ 6887 "00000000" // /* MW 3 */
+ 6888 "00100000" // /* MW 2 */
+ 6889 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6 first
+.src_ref 7 "superkernels.cpp" 81 4
+ 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6891 "01111000" // /* MW 9 */
+ 6892 "11010000" // /* MW 8 */
+ 6893 "01001011" // /* MW 7 */
+ 6894 "00001000" // /* MW 6 */
+ 6895 "00010000" // /* MW 5 */
+ 6896 "00000000" // /* MW 4 */
+ 6897 "11010000" // /* MW 3 */
+ 6898 "11000010" // /* MW 2 */
+ 6899 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 74
+ 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6901 "00000001" // /* MW 5 */
+ 6902 "00000000" // /* MW 4 */
+ 6903 "00000000" // /* MW 3 */
+ 6904 "00001000" // /* MW 2 */
+ 6905 "00000000" // /* MW 1 */
+ 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6907 "01010101" // /* MW 3 */
+ 6908 "11110000" // /* MW 2 */
+ 6909 "00001111" // /* MW 1 */
+ 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6911 "00000000" // /* MW 1 */
+ 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6913 "00000000" // /* MW 1 */
+ 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6915 "00000000" // /* MW 1 */
+ 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6917 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 79 16
+ 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */
+ 6919 "00000001" // /* MW 5 */
+ 6920 "01000000" // /* MW 4 */
+ 6921 "11011000" // /* MW 3 */
+ 6922 "00001101" // /* MW 2 */
+ 6923 "10000000" // /* MW 1 */
+.delay_slot
+ 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6925 "10011101" // /* MW 3 */
+ 6926 "11111011" // /* MW 2 */
+ 6927 "00001111" // /* MW 1 */
+.delay_slot
+ 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6929 "00011101" // /* MW 3 */
+ 6930 "11111111" // /* MW 2 */
+ 6931 "00001111" // /* MW 1 */
+.delay_slot
+ 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6933 "10011101" // /* MW 3 */
+ 6934 "11101101" // /* MW 2 */
+ 6935 "00001111" // /* MW 1 */
+.delay_slot
+ 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6937 "00111101" // /* MW 3 */
+ 6938 "11110100" // /* MW 2 */
+ 6939 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6940 "01000100" // MOVXM r15, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6941 "00000000" // /* MW 5 */
+ 6942 "10101100" // /* MW 4 */
+ 6943 "11000111" // /* MW 3 */
+ 6944 "00000111" // /* MW 2 */
+ 6945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6947 "00010001" // /* MW 9 */
+ 6948 "00110100" // /* MW 8 */
+ 6949 "10110010" // /* MW 7 */
+ 6950 "11110011" // /* MW 6 */
+ 6951 "00000001" // /* MW 5 */
+ 6952 "00000000" // /* MW 4 */
+ 6953 "01100000" // /* MW 3 */
+ 6954 "10010001" // /* MW 2 */
+ 6955 "11010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6957 "00010000" // /* MW 11 */
+ 6958 "00110010" // /* MW 10 */
+ 6959 "10110010" // /* MW 9 */
+ 6960 "11110011" // /* MW 8 */
+ 6961 "00000001" // /* MW 7 */
+ 6962 "00000000" // /* MW 6 */
+ 6963 "00001011" // /* MW 5 */
+ 6964 "10001111" // /* MW 4 */
+ 6965 "11100001" // /* MW 3 */
+ 6966 "11000000" // /* MW 2 */
+ 6967 "11100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6969 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6971 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */
+ 6973 "00000001" // /* MW 5 */
+ 6974 "00000000" // /* MW 4 */
+ 6975 "01100000" // /* MW 3 */
+ 6976 "00000101" // /* MW 2 */
+ 6977 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6979 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6981 "00110001" // /* MW 3 */
+ 6982 "00100000" // /* MW 2 */
+ 6983 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6985 "00000101" // /* MW 3 */
+ 6986 "00100000" // /* MW 2 */
+ 6987 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6989 "01110000" // /* MW 7 */
+ 6990 "01100000" // /* MW 6 */
+ 6991 "10110000" // /* MW 5 */
+ 6992 "00000011" // /* MW 4 */
+ 6993 "00110000" // /* MW 3 */
+ 6994 "11000010" // /* MW 2 */
+ 6995 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6997 "01110000" // /* MW 11 */
+ 6998 "01100000" // /* MW 10 */
+ 6999 "00110010" // /* MW 9 */
+ 7000 "00000000" // /* MW 8 */
+ 7001 "01011011" // /* MW 7 */
+ 7002 "00000001" // /* MW 6 */
+ 7003 "00100000" // /* MW 5 */
+ 7004 "00000000" // /* MW 4 */
+ 7005 "11110000" // /* MW 3 */
+ 7006 "00101100" // /* MW 2 */
+ 7007 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.return_address
+ 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7009 "10000101" // /* MW 3 */
+ 7010 "01100111" // /* MW 2 */
+ 7011 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19
+.src_ref 7 "superkernels.cpp" 87 35 first
+ 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7013 "00010000" // /* MW 9 */
+ 7014 "00100010" // /* MW 8 */
+ 7015 "10110010" // /* MW 7 */
+ 7016 "11110000" // /* MW 6 */
+ 7017 "00000001" // /* MW 5 */
+ 7018 "00000000" // /* MW 4 */
+ 7019 "01010000" // /* MW 3 */
+ 7020 "11000001" // /* MW 2 */
+ 7021 "01001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 37 first
+.src_ref 7 "superkernels.cpp" 89 13
+ 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7023 "00010000" // /* MW 9 */
+ 7024 "00110000" // /* MW 8 */
+ 7025 "00110010" // /* MW 7 */
+ 7026 "11110000" // /* MW 6 */
+ 7027 "00000001" // /* MW 5 */
+ 7028 "00000000" // /* MW 4 */
+ 7029 "01010000" // /* MW 3 */
+ 7030 "11001111" // /* MW 2 */
+ 7031 "01000011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 73
+ 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7033 "00111010" // /* MW 3 */
+ 7034 "00000110" // /* MW 2 */
+ 7035 "00000010" // /* MW 1 */
+ 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7037 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 110
+ 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7039 "01011010" // /* MW 3 */
+ 7040 "00010110" // /* MW 2 */
+ 7041 "00000010" // /* MW 1 */
+ 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7043 "00000000" // /* MW 1 */
+ 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7045 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19 first
+.src_ref 7 "superkernels.cpp" 113 2
+ 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7047 "01110000" // /* MW 7 */
+ 7048 "01100000" // /* MW 6 */
+ 7049 "10110110" // /* MW 5 */
+ 7050 "00000000" // /* MW 4 */
+ 7051 "00110000" // /* MW 3 */
+ 7052 "11000010" // /* MW 2 */
+ 7053 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 57 first
+ 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7055 "00001111" // /* MW 3 */
+ 7056 "11100001" // /* MW 2 */
+ 7057 "00010100" // /* MW 1 */
+ 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7059 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 94
+ 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7061 "00001111" // /* MW 3 */
+ 7062 "01100001" // /* MW 2 */
+ 7063 "00010100" // /* MW 1 */
+ 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7065 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 28 first
+ 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7067 "00001111" // /* MW 3 */
+ 7068 "10100001" // /* MW 2 */
+ 7069 "00010100" // /* MW 1 */
+ 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7071 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 13
+.src_ref 7 "superkernels.cpp" 113 2
+ 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7073 "00000000" // /* MW 15 */
+ 7074 "00000000" // /* MW 14 */
+ 7075 "01111000" // /* MW 13 */
+ 7076 "01100000" // /* MW 12 */
+ 7077 "00110111" // /* MW 11 */
+ 7078 "00000000" // /* MW 10 */
+ 7079 "00000000" // /* MW 9 */
+ 7080 "10000000" // /* MW 8 */
+ 7081 "00010001" // /* MW 7 */
+ 7082 "00000110" // /* MW 6 */
+ 7083 "00100000" // /* MW 5 */
+ 7084 "00000000" // /* MW 4 */
+ 7085 "11110000" // /* MW 3 */
+ 7086 "00101100" // /* MW 2 */
+ 7087 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 106 12
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 7 "superkernels.cpp" 117 6
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7089 "00010000" // /* MW 9 */
+ 7090 "00100100" // /* MW 8 */
+ 7091 "00110010" // /* MW 7 */
+ 7092 "11110011" // /* MW 6 */
+ 7093 "00000001" // /* MW 5 */
+ 7094 "00000000" // /* MW 4 */
+ 7095 "00100000" // /* MW 3 */
+ 7096 "10111110" // /* MW 2 */
+ 7097 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.src_ref 7 "superkernels.cpp" 108 13
+ 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7099 "00010000" // /* MW 9 */
+ 7100 "00100110" // /* MW 8 */
+ 7101 "00110010" // /* MW 7 */
+ 7102 "11110001" // /* MW 6 */
+ 7103 "00000001" // /* MW 5 */
+ 7104 "00000000" // /* MW 4 */
+ 7105 "11010000" // /* MW 3 */
+ 7106 "11000010" // /* MW 2 */
+ 7107 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11
+.src_ref 7 "superkernels.cpp" 108 13 first
+.src_ref 7 "superkernels.cpp" 139 6
+.src_ref 7 "superkernels.cpp" 140 14
+ 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7109 "00010000" // /* MW 9 */
+ 7110 "00100000" // /* MW 8 */
+ 7111 "10110010" // /* MW 7 */
+ 7112 "11110011" // /* MW 6 */
+ 7113 "00000001" // /* MW 5 */
+ 7114 "00000000" // /* MW 4 */
+ 7115 "11010000" // /* MW 3 */
+ 7116 "11000110" // /* MW 2 */
+ 7117 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+ 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7119 "01010110" // /* MW 3 */
+ 7120 "00000110" // /* MW 2 */
+ 7121 "00000111" // /* MW 1 */
+ 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7123 "00000000" // /* MW 1 */
+ 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7125 "00000000" // /* MW 1 */
+ 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7127 "00000000" // /* MW 1 */
+ 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7129 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 110 6 first
+.src_ref 7 "superkernels.cpp" 110 17 first
+ 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */
+ 7131 "00000001" // /* MW 5 */
+ 7132 "01000000" // /* MW 4 */
+ 7133 "00011000" // /* MW 3 */
+ 7134 "00001110" // /* MW 2 */
+ 7135 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 108 13 first
+.delay_slot
+ 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7137 "00000111" // /* MW 3 */
+ 7138 "01100010" // /* MW 2 */
+ 7139 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.src_ref 7 "superkernels.cpp" 108 13
+.delay_slot
+ 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7141 "00001110" // /* MW 5 */
+ 7142 "01000100" // /* MW 4 */
+ 7143 "00111001" // /* MW 3 */
+ 7144 "11000110" // /* MW 2 */
+ 7145 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.delay_slot
+ 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7147 "00000111" // /* MW 3 */
+ 7148 "00100110" // /* MW 2 */
+ 7149 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12
+.delay_slot
+ 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7151 "01110001" // /* MW 3 */
+ 7152 "00000110" // /* MW 2 */
+ 7153 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.delay_slot
+ 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7155 "00110001" // /* MW 3 */
+ 7156 "00000110" // /* MW 2 */
+ 7157 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7159 "10000110" // /* MW 3 */
+ 7160 "01100111" // /* MW 2 */
+ 7161 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7163 "01110110" // /* MW 3 */
+ 7164 "11111111" // /* MW 2 */
+ 7165 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7167 "00010110" // /* MW 3 */
+ 7168 "11111110" // /* MW 2 */
+ 7169 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7171 "00110110" // /* MW 3 */
+ 7172 "11111110" // /* MW 2 */
+ 7173 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7175 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7177 "00010110" // /* MW 3 */
+ 7178 "01000110" // /* MW 2 */
+ 7179 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7181 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7183 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7185 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7187 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7189 "00000010" // /* MW 3 */
+ 7190 "01100001" // /* MW 2 */
+ 7191 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7193 "00010001" // /* MW 3 */
+ 7194 "00000110" // /* MW 2 */
+ 7195 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7197 "11111101" // /* MW 3 */
+ 7198 "11100010" // /* MW 2 */
+ 7199 "00010111" // /* MW 1 */
+ 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7201 "00000000" // /* MW 1 */
+ 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7203 "00000000" // /* MW 1 */
+ 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7205 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7207 "00011000" // /* MW 9 */
+ 7208 "00010011" // /* MW 8 */
+ 7209 "00000100" // /* MW 7 */
+ 7210 "00000000" // /* MW 6 */
+ 7211 "01011011" // /* MW 5 */
+ 7212 "00000001" // /* MW 4 */
+ 7213 "11110000" // /* MW 3 */
+ 7214 "00101100" // /* MW 2 */
+ 7215 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336
+.src_ref 7 "superkernels.cpp" 113 2 first
+.no_stack_arguments
+ 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */
+ 7217 "00000001" // /* MW 5 */
+ 7218 "00000000" // /* MW 4 */
+ 7219 "10111000" // /* MW 3 */
+ 7220 "00001000" // /* MW 2 */
+ 7221 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7222 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7223 "00000000" // /* MW 5 */
+ 7224 "11001100" // /* MW 4 */
+ 7225 "11000110" // /* MW 3 */
+ 7226 "00000111" // /* MW 2 */
+ 7227 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7229 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7233 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7235 "00011100" // /* MW 13 */
+ 7236 "00000000" // /* MW 12 */
+ 7237 "00000000" // /* MW 11 */
+ 7238 "00000111" // /* MW 10 */
+ 7239 "00111101" // /* MW 9 */
+ 7240 "01010011" // /* MW 8 */
+ 7241 "00000000" // /* MW 7 */
+ 7242 "00000000" // /* MW 6 */
+ 7243 "10110110" // /* MW 5 */
+ 7244 "00000010" // /* MW 4 */
+ 7245 "11110000" // /* MW 3 */
+ 7246 "00101100" // /* MW 2 */
+ 7247 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6 first
+.src_ref 7 "superkernels.cpp" 117 20
+.return_address
+ 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7249 "00010000" // /* MW 9 */
+ 7250 "00100010" // /* MW 8 */
+ 7251 "10110010" // /* MW 7 */
+ 7252 "11110000" // /* MW 6 */
+ 7253 "00000001" // /* MW 5 */
+ 7254 "00000000" // /* MW 4 */
+ 7255 "11010000" // /* MW 3 */
+ 7256 "11000010" // /* MW 2 */
+ 7257 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 20
+ 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7259 "00110110" // /* MW 3 */
+ 7260 "00000110" // /* MW 2 */
+ 7261 "00000001" // /* MW 1 */
+ 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7263 "00010001" // /* MW 3 */
+ 7264 "11110000" // /* MW 2 */
+ 7265 "00000111" // /* MW 1 */
+ 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7267 "00000000" // /* MW 1 */
+ 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7269 "00000000" // /* MW 1 */
+ 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7271 "00000000" // /* MW 1 */
+ 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7273 "00000000" // /* MW 1 */
+ 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7275 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 17
+ 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7277 "00001000" // /* MW 3 */
+ 7278 "01100001" // /* MW 2 */
+ 7279 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6
+ 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */
+ 7281 "00000001" // /* MW 5 */
+ 7282 "01000000" // /* MW 4 */
+ 7283 "01100000" // /* MW 3 */
+ 7284 "00001110" // /* MW 2 */
+ 7285 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 7 "superkernels.cpp" 140 14
+.delay_slot
+ 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7287 "00000001" // /* MW 3 */
+ 7288 "00110000" // /* MW 2 */
+ 7289 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7293 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7295 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7297 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7299 "00010100" // /* MW 5 */
+ 7300 "11001111" // /* MW 4 */
+ 7301 "10100010" // /* MW 3 */
+ 7302 "00000000" // /* MW 2 */
+ 7303 "00000100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7305 "00110110" // /* MW 3 */
+ 7306 "00000110" // /* MW 2 */
+ 7307 "00000001" // /* MW 1 */
+ 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7309 "00000000" // /* MW 1 */
+ 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7311 "00000000" // /* MW 1 */
+ 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7313 "00000000" // /* MW 1 */
+ 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7315 "00000000" // /* MW 1 */
+ 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7317 "00000000" // /* MW 1 */
+ 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7319 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7321 "00001000" // /* MW 3 */
+ 7322 "01010001" // /* MW 2 */
+ 7323 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15 first
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7325 "00100011" // /* MW 5 */
+ 7326 "00001110" // /* MW 4 */
+ 7327 "11011100" // /* MW 3 */
+ 7328 "11000110" // /* MW 2 */
+ 7329 "00111100" // /* MW 1 */
+ 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7331 "00000000" // /* MW 1 */
+ 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7333 "00000000" // /* MW 1 */
+ 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7335 "00000000" // /* MW 1 */
+ 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7337 "00000000" // /* MW 1 */
+ 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7339 "00000000" // /* MW 1 */
+ 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7341 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7343 "00010001" // /* MW 3 */
+ 7344 "00100001" // /* MW 2 */
+ 7345 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7347 "00011100" // /* MW 13 */
+ 7348 "00000000" // /* MW 12 */
+ 7349 "00000000" // /* MW 11 */
+ 7350 "01010111" // /* MW 10 */
+ 7351 "00011010" // /* MW 9 */
+ 7352 "01000000" // /* MW 8 */
+ 7353 "00000000" // /* MW 7 */
+ 7354 "00000000" // /* MW 6 */
+ 7355 "00100011" // /* MW 5 */
+ 7356 "11001100" // /* MW 4 */
+ 7357 "11110011" // /* MW 3 */
+ 7358 "00101100" // /* MW 2 */
+ 7359 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480
+.src_ref 7 "superkernels.cpp" 139 6 first
+.src_ref 7 "superkernels.cpp" 139 19
+ 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7361 "00010000" // /* MW 9 */
+ 7362 "00110000" // /* MW 8 */
+ 7363 "00110010" // /* MW 7 */
+ 7364 "11110011" // /* MW 6 */
+ 7365 "00000001" // /* MW 5 */
+ 7366 "00000000" // /* MW 4 */
+ 7367 "11010000" // /* MW 3 */
+ 7368 "11000010" // /* MW 2 */
+ 7369 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 19
+ 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7371 "00110110" // /* MW 3 */
+ 7372 "00000110" // /* MW 2 */
+ 7373 "00000110" // /* MW 1 */
+ 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7375 "10011001" // /* MW 3 */
+ 7376 "11111000" // /* MW 2 */
+ 7377 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+ 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7379 "00111001" // /* MW 3 */
+ 7380 "11110100" // /* MW 2 */
+ 7381 "00000111" // /* MW 1 */
+ 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7383 "00000000" // /* MW 1 */
+ 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7385 "00000000" // /* MW 1 */
+ 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7387 "00000000" // /* MW 1 */
+ 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7389 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 16
+ 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7391 "00001000" // /* MW 3 */
+ 7392 "01100001" // /* MW 2 */
+ 7393 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 6
+ 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */
+ 7395 "00000001" // /* MW 5 */
+ 7396 "01000000" // /* MW 4 */
+ 7397 "10000000" // /* MW 3 */
+ 7398 "00001110" // /* MW 2 */
+ 7399 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7407 "00000000" // /* MW 1 */
+.delay_slot
+ 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7409 "00100000" // /* MW 3 */
+ 7410 "11010000" // /* MW 2 */
+ 7411 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 140 14 first
+ 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7413 "11000001" // /* MW 11 */
+ 7414 "10001000" // /* MW 10 */
+ 7415 "10000011" // /* MW 9 */
+ 7416 "00000011" // /* MW 8 */
+ 7417 "00000000" // /* MW 7 */
+ 7418 "00000000" // /* MW 6 */
+ 7419 "00100000" // /* MW 5 */
+ 7420 "00000000" // /* MW 4 */
+ 7421 "11110000" // /* MW 3 */
+ 7422 "00101100" // /* MW 2 */
+ 7423 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544
+ 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7425 "00011001" // /* MW 3 */
+ 7426 "11111111" // /* MW 2 */
+ 7427 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142 first
+ 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7429 "00000000" // /* MW 3 */
+ 7430 "00101000" // /* MW 2 */
+ 7431 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+.delay_slot
+ 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7433 "00000001" // /* MW 5 */
+ 7434 "00000000" // /* MW 4 */
+ 7435 "00000000" // /* MW 3 */
+ 7436 "11111000" // /* MW 2 */
+ 7437 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7443 "00000000" // /* MW 1 */
+.delay_slot
+ 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7445 "10001011" // /* MW 3 */
+ 7446 "10000100" // /* MW 2 */
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 7447 "00001111" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7457 "00000001" // /* MW 5 */
+ 7458 "00100001" // /* MW 4 */
+ 7459 "00000000" // /* MW 3 */
+ 7460 "00000000" // /* MW 2 */
+ 7461 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7463 "11000000" // /* MW 3 */
+ 7464 "01010000" // /* MW 2 */
+ 7465 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7467 "10010000" // /* MW 3 */
+ 7468 "01100000" // /* MW 2 */
+ 7469 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7471 "00010001" // /* MW 3 */
+ 7472 "00000100" // /* MW 2 */
+ 7473 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7475 "00010001" // /* MW 3 */
+ 7476 "00010100" // /* MW 2 */
+ 7477 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7479 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7489 "00101110" // /* MW 3 */
+ 7490 "00011100" // /* MW 2 */
+ 7491 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7493 "00000001" // /* MW 5 */
+ 7494 "00000000" // /* MW 4 */
+ 7495 "00000000" // /* MW 3 */
+ 7496 "00001000" // /* MW 2 */
+ 7497 "00000000" // /* MW 1 */
+ 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7499 "00111101" // /* MW 3 */
+ 7500 "11111000" // /* MW 2 */
+ 7501 "00001111" // /* MW 1 */
+ 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7503 "11110101" // /* MW 3 */
+ 7504 "11111101" // /* MW 2 */
+ 7505 "00001111" // /* MW 1 */
+ 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7507 "00000000" // /* MW 1 */
+ 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7509 "00000000" // /* MW 1 */
+ 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7511 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7513 "00101001" // /* MW 3 */
+ 7514 "00011100" // /* MW 2 */
+ 7515 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7517 "00101110" // /* MW 3 */
+ 7518 "00011100" // /* MW 2 */
+ 7519 "00000001" // /* MW 1 */
+ 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7521 "00000000" // /* MW 1 */
+ 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7523 "00000000" // /* MW 1 */
+ 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7525 "00000000" // /* MW 1 */
+ 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7527 "00000000" // /* MW 1 */
+ 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7529 "00000000" // /* MW 1 */
+ 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7531 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7533 "00101001" // /* MW 3 */
+ 7534 "00011100" // /* MW 2 */
+ 7535 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7537 "00101110" // /* MW 3 */
+ 7538 "00000100" // /* MW 2 */
+ 7539 "00000001" // /* MW 1 */
+ 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7541 "00000000" // /* MW 1 */
+ 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7543 "00000000" // /* MW 1 */
+ 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7545 "00000000" // /* MW 1 */
+ 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7547 "00000000" // /* MW 1 */
+ 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7549 "00000000" // /* MW 1 */
+ 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7551 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7553 "00101001" // /* MW 3 */
+ 7554 "00011100" // /* MW 2 */
+ 7555 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7557 "00101110" // /* MW 3 */
+ 7558 "00010100" // /* MW 2 */
+ 7559 "00000001" // /* MW 1 */
+ 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7561 "00000000" // /* MW 1 */
+ 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7563 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */
+ 7565 "00000001" // /* MW 5 */
+ 7566 "00000000" // /* MW 4 */
+ 7567 "10010000" // /* MW 3 */
+ 7568 "00001110" // /* MW 2 */
+ 7569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7571 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7573 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7575 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7577 "00101001" // /* MW 3 */
+ 7578 "11011100" // /* MW 2 */
+ 7579 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.delay_slot
+ 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7581 "11000000" // /* MW 3 */
+ 7582 "11010000" // /* MW 2 */
+ 7583 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "add_impl.h" 146 29
+.return_address
+ 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7585 "00001000" // /* MW 9 */
+ 7586 "11000100" // /* MW 8 */
+ 7587 "00110011" // /* MW 7 */
+ 7588 "01101000" // /* MW 6 */
+ 7589 "00000000" // /* MW 5 */
+ 7590 "00000001" // /* MW 4 */
+ 7591 "00100000" // /* MW 3 */
+ 7592 "00000111" // /* MW 2 */
+ 7593 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29
+.src_ref 8 "add_impl.h" 147 37
+.src_ref 8 "add_impl.h" 147 39
+ 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7595 "01011000" // /* MW 9 */
+ 7596 "11111101" // /* MW 8 */
+ 7597 "00000111" // /* MW 7 */
+ 7598 "00001000" // /* MW 6 */
+ 7599 "10000000" // /* MW 5 */
+ 7600 "00000001" // /* MW 4 */
+ 7601 "10000000" // /* MW 3 */
+ 7602 "11100010" // /* MW 2 */
+ 7603 "00000001" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29 first
+.src_ref 8 "add_impl.h" 147 39
+ 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7605 "00000001" // /* MW 9 */
+ 7606 "10100000" // /* MW 8 */
+ 7607 "00000111" // /* MW 7 */
+ 7608 "10000000" // /* MW 6 */
+ 7609 "00010001" // /* MW 5 */
+ 7610 "00001010" // /* MW 4 */
+ 7611 "00100000" // /* MW 3 */
+ 7612 "10111110" // /* MW 2 */
+ 7613 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 50 first
+ 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7615 "01001010" // /* MW 3 */
+ 7616 "00000110" // /* MW 2 */
+ 7617 "00000000" // /* MW 1 */
+ 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7619 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7621 "00000000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 37
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7623 "00010111" // /* MW 3 */
+ 7624 "00000010" // /* MW 2 */
+ 7625 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7627 "00000000" // /* MW 3 */
+ 7628 "00101000" // /* MW 2 */
+ 7629 "00010000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7631 "00000101" // /* MW 3 */
+ 7632 "00100010" // /* MW 2 */
+ 7633 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7635 "00000001" // /* MW 5 */
+ 7636 "00000000" // /* MW 4 */
+ 7637 "00000000" // /* MW 3 */
+ 7638 "11111000" // /* MW 2 */
+ 7639 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7641 "00100111" // /* MW 3 */
+ 7642 "01110111" // /* MW 2 */
+ 7643 "00010100" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 39
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7645 "10000010" // /* MW 3 */
+ 7646 "00100001" // /* MW 2 */
+ 7647 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7649 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 81 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25
+.function_start
+ 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7665 "01111000" // /* MW 9 */
+ 7666 "01100000" // /* MW 8 */
+ 7667 "00001000" // /* MW 7 */
+ 7668 "11001000" // /* MW 6 */
+ 7669 "00010000" // /* MW 5 */
+ 7670 "00000000" // /* MW 4 */
+ 7671 "10000000" // /* MW 3 */
+ 7672 "10000000" // /* MW 2 */
+ 7673 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+ 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7675 "00001100" // /* MW 5 */
+ 7676 "11000000" // /* MW 4 */
+ 7677 "10100000" // /* MW 3 */
+ 7678 "00000000" // /* MW 2 */
+ 7679 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+ 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7681 "01001010" // /* MW 3 */
+ 7682 "00001000" // /* MW 2 */
+ 7683 "00000000" // /* MW 1 */
+ 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7685 "00000000" // /* MW 1 */
+ 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7687 "00000000" // /* MW 1 */
+ 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7689 "00000000" // /* MW 1 */
+ 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7691 "00000000" // /* MW 1 */
+ 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7693 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first
+ 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7695 "00000000" // /* MW 3 */
+ 7696 "00101000" // /* MW 2 */
+ 7697 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.delay_slot
+ 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7699 "00001000" // /* MW 3 */
+ 7700 "10000000" // /* MW 2 */
+ 7701 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first
+.delay_slot
+ 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7703 "00011101" // /* MW 3 */
+ 7704 "00000000" // /* MW 2 */
+ 7705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 23
+.delay_slot
+ 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7707 "11100000" // /* MW 5 */
+ 7708 "00001101" // /* MW 4 */
+ 7709 "00110001" // /* MW 3 */
+ 7710 "10000010" // /* MW 2 */
+ 7711 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.delay_slot
+ 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7713 "00011101" // /* MW 3 */
+ 7714 "11000100" // /* MW 2 */
+ 7715 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 23
+.delay_slot
+ 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7717 "01010001" // /* MW 3 */
+ 7718 "00000100" // /* MW 2 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7719 "00001000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_broadcasting.h" 76
+.src_ref 3 "elementwise_binary_broadcasting.h" 76 first
+.function_start
+ 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7729 "00000001" // /* MW 5 */
+ 7730 "00000000" // /* MW 4 */
+ 7731 "00000000" // /* MW 3 */
+ 7732 "00001000" // /* MW 2 */
+ 7733 "00000000" // /* MW 1 */
+ 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7735 "00111101" // /* MW 3 */
+ 7736 "11111100" // /* MW 2 */
+ 7737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first
+.no_stack_arguments
+ 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */
+ 7739 "00000001" // /* MW 5 */
+ 7740 "00000000" // /* MW 4 */
+ 7741 "10100000" // /* MW 3 */
+ 7742 "00001110" // /* MW 2 */
+ 7743 "00000000" // /* MW 1 */
+.delay_slot
+ 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7745 "10011101" // /* MW 3 */
+ 7746 "11111011" // /* MW 2 */
+ 7747 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+ 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7749 "11000000" // /* MW 3 */
+ 7750 "01100000" // /* MW 2 */
+ 7751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7757 "01100111" // /* MW 3 */
+ 7758 "00000001" // /* MW 2 */
+ 7759 "00000000" // /* MW 1 */
+.return_address
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7761 "10011001" // /* MW 3 */
+ 7762 "11111011" // /* MW 2 */
+ 7763 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7765 "00111001" // /* MW 3 */
+ 7766 "11111100" // /* MW 2 */
+ 7767 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first
+.tail_call
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */
+ 7769 "00000000" // /* MW 5 */
+ 7770 "00000000" // /* MW 4 */
+ 7771 "11111000" // /* MW 3 */
+ 7772 "00001110" // /* MW 2 */
+ 7773 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7775 "11000000" // /* MW 3 */
+ 7776 "01101110" // /* MW 2 */
+ 7777 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first
+.delay_slot
+ 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7779 "00000001" // /* MW 5 */
+ 7780 "00000000" // /* MW 4 */
+ 7781 "00000000" // /* MW 3 */
+ 7782 "11111000" // /* MW 2 */
+ 7783 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7789 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 89 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19
+.function_start
+ 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7793 "01010001" // /* MW 5 */
+ 7794 "00000000" // /* MW 4 */
+ 7795 "11010000" // /* MW 3 */
+ 7796 "10000010" // /* MW 2 */
+ 7797 "01100111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7799 "10000001" // /* MW 5 */
+ 7800 "11001101" // /* MW 4 */
+ 7801 "01011000" // /* MW 3 */
+ 7802 "00000101" // /* MW 2 */
+ 7803 "01100001" // /* MW 1 */
+ 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7805 "00000000" // /* MW 1 */
+ 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7807 "00000000" // /* MW 1 */
+ 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7809 "00000000" // /* MW 1 */
+ 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7811 "00000000" // /* MW 1 */
+ 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7813 "00000000" // /* MW 1 */
+ 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7815 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 12
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 35
+ 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */
+ 7817 "00000001" // /* MW 5 */
+ 7818 "01000000" // /* MW 4 */
+ 7819 "01100000" // /* MW 3 */
+ 7820 "00001111" // /* MW 2 */
+ 7821 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78
+.delay_slot
+ 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7823 "11101001" // /* MW 3 */
+ 7824 "11000100" // /* MW 2 */
+ 7825 "00010111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first
+.delay_slot
+ 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7827 "00101101" // /* MW 3 */
+ 7828 "00000000" // /* MW 2 */
+ 7829 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7835 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first
+ 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7837 "00110010" // /* MW 3 */
+ 7838 "00000100" // /* MW 2 */
+ 7839 "00000000" // /* MW 1 */
+ 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7841 "00000000" // /* MW 1 */
+ 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7843 "00000000" // /* MW 1 */
+ 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7845 "00000000" // /* MW 1 */
+ 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */
+ 7847 "00000000" // /* MW 5 */
+ 7848 "00000000" // /* MW 4 */
+ 7849 "01110000" // /* MW 3 */
+ 7850 "00001111" // /* MW 2 */
+ 7851 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7853 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7857 "01110010" // /* MW 3 */
+ 7858 "00000101" // /* MW 2 */
+ 7859 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7861 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7863 "00000000" // /* MW 9 */
+ 7864 "00000000" // /* MW 8 */
+ 7865 "00000000" // /* MW 7 */
+ 7866 "00000000" // /* MW 6 */
+ 7867 "00010011" // /* MW 5 */
+ 7868 "00000100" // /* MW 4 */
+ 7869 "11110000" // /* MW 3 */
+ 7870 "00101100" // /* MW 2 */
+ 7871 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80
+.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first
+ 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7873 "00110010" // /* MW 3 */
+ 7874 "00000100" // /* MW 2 */
+ 7875 "00000001" // /* MW 1 */
+ 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7877 "00000000" // /* MW 1 */
+ 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7879 "00000000" // /* MW 1 */
+ 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7881 "00000000" // /* MW 1 */
+ 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7883 "00000000" // /* MW 1 */
+ 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7885 "00000000" // /* MW 1 */
+ 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7887 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7889 "01110010" // /* MW 3 */
+ 7890 "00000101" // /* MW 2 */
+ 7891 "00011000" // /* MW 1 */
+ 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7893 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7895 "00000000" // /* MW 9 */
+ 7896 "00000000" // /* MW 8 */
+ 7897 "00000000" // /* MW 7 */
+ 7898 "00000000" // /* MW 6 */
+ 7899 "00010011" // /* MW 5 */
+ 7900 "00000100" // /* MW 4 */
+ 7901 "11110001" // /* MW 3 */
+ 7902 "00101100" // /* MW 2 */
+ 7903 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+ 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7905 "01001000" // /* MW 9 */
+ 7906 "00111111" // /* MW 8 */
+ 7907 "10111000" // /* MW 7 */
+ 7908 "10001010" // /* MW 6 */
+ 7909 "00000111" // /* MW 5 */
+ 7910 "00000000" // /* MW 4 */
+ 7911 "11010000" // /* MW 3 */
+ 7912 "10000000" // /* MW 2 */
+ 7913 "10001010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7915 "00010000" // /* MW 9 */
+ 7916 "10101000" // /* MW 8 */
+ 7917 "01111111" // /* MW 7 */
+ 7918 "00000100" // /* MW 6 */
+ 7919 "00000000" // /* MW 5 */
+ 7920 "00000000" // /* MW 4 */
+ 7921 "11010000" // /* MW 3 */
+ 7922 "10010000" // /* MW 2 */
+ 7923 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7925 "11100000" // /* MW 5 */
+ 7926 "11111110" // /* MW 4 */
+ 7927 "00010110" // /* MW 3 */
+ 7928 "00000000" // /* MW 2 */
+ 7929 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7931 "11010000" // /* MW 5 */
+ 7932 "11001000" // /* MW 4 */
+ 7933 "11001000" // /* MW 3 */
+ 7934 "00000111" // /* MW 2 */
+ 7935 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7937 "00100010" // /* MW 3 */
+ 7938 "00000100" // /* MW 2 */
+ 7939 "00000100" // /* MW 1 */
+ 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7941 "00000000" // /* MW 1 */
+ 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7945 "10101011" // /* MW 3 */
+ 7946 "00001000" // /* MW 2 */
+ 7947 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 189 20 first
+ 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7949 "00101011" // /* MW 3 */
+ 7950 "00101001" // /* MW 2 */
+ 7951 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+ 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7953 "00101011" // /* MW 3 */
+ 7954 "00001000" // /* MW 2 */
+ 7955 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7957 "00101011" // /* MW 3 */
+ 7958 "00101010" // /* MW 2 */
+ 7959 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7961 "00000000" // /* MW 5 */
+ 7962 "11110101" // /* MW 4 */
+ 7963 "01110000" // /* MW 3 */
+ 7964 "00010101" // /* MW 2 */
+ 7965 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7967 "00111101" // /* MW 7 */
+ 7968 "00101000" // /* MW 6 */
+ 7969 "00000011" // /* MW 5 */
+ 7970 "00000100" // /* MW 4 */
+ 7971 "01110000" // /* MW 3 */
+ 7972 "00100101" // /* MW 2 */
+ 7973 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7975 "00101011" // /* MW 3 */
+ 7976 "00001000" // /* MW 2 */
+ 7977 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7979 "00111101" // /* MW 7 */
+ 7980 "00010000" // /* MW 6 */
+ 7981 "00000100" // /* MW 5 */
+ 7982 "00000100" // /* MW 4 */
+ 7983 "01110000" // /* MW 3 */
+ 7984 "01000101" // /* MW 2 */
+ 7985 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7987 "10101011" // /* MW 3 */
+ 7988 "00001000" // /* MW 2 */
+ 7989 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7991 "00111101" // /* MW 7 */
+ 7992 "00101000" // /* MW 6 */
+ 7993 "00000011" // /* MW 5 */
+ 7994 "00000100" // /* MW 4 */
+ 7995 "01110000" // /* MW 3 */
+ 7996 "00100101" // /* MW 2 */
+ 7997 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7999 "00101011" // /* MW 3 */
+ 8000 "00001000" // /* MW 2 */
+ 8001 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8003 "00111101" // /* MW 13 */
+ 8004 "00010000" // /* MW 12 */
+ 8005 "00000100" // /* MW 11 */
+ 8006 "01010111" // /* MW 10 */
+ 8007 "00011010" // /* MW 9 */
+ 8008 "01000000" // /* MW 8 */
+ 8009 "00000000" // /* MW 7 */
+ 8010 "00000000" // /* MW 6 */
+ 8011 "01000110" // /* MW 5 */
+ 8012 "00111011" // /* MW 4 */
+ 8013 "01110100" // /* MW 3 */
+ 8014 "01000101" // /* MW 2 */
+ 8015 "00100101" // /* MW 1 */
+.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8017 "10101011" // /* MW 3 */
+ 8018 "00001000" // /* MW 2 */
+ 8019 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8021 "00111101" // /* MW 11 */
+ 8022 "00101000" // /* MW 10 */
+ 8023 "00000011" // /* MW 9 */
+ 8024 "10001110" // /* MW 8 */
+ 8025 "00010001" // /* MW 7 */
+ 8026 "00001111" // /* MW 6 */
+ 8027 "00100001" // /* MW 5 */
+ 8028 "00000000" // /* MW 4 */
+ 8029 "01110000" // /* MW 3 */
+ 8030 "00100101" // /* MW 2 */
+ 8031 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8033 "00000000" // /* MW 15 */
+ 8034 "00000000" // /* MW 14 */
+ 8035 "01111000" // /* MW 13 */
+ 8036 "10100101" // /* MW 12 */
+ 8037 "00000001" // /* MW 11 */
+ 8038 "00000000" // /* MW 10 */
+ 8039 "00000000" // /* MW 9 */
+ 8040 "00000000" // /* MW 8 */
+ 8041 "01011011" // /* MW 7 */
+ 8042 "00000001" // /* MW 6 */
+ 8043 "00100000" // /* MW 5 */
+ 8044 "00000000" // /* MW 4 */
+ 8045 "01110000" // /* MW 3 */
+ 8046 "00000101" // /* MW 2 */
+ 8047 "00000001" // /* MW 1 */
+.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8049 "10000001" // /* MW 15 */
+ 8050 "00100000" // /* MW 14 */
+ 8051 "01111000" // /* MW 13 */
+ 8052 "10100101" // /* MW 12 */
+ 8053 "00000001" // /* MW 11 */
+ 8054 "00000000" // /* MW 10 */
+ 8055 "00000000" // /* MW 9 */
+ 8056 "00000000" // /* MW 8 */
+ 8057 "10100011" // /* MW 7 */
+ 8058 "00011101" // /* MW 6 */
+ 8059 "00100010" // /* MW 5 */
+ 8060 "00000000" // /* MW 4 */
+ 8061 "01110000" // /* MW 3 */
+ 8062 "01000101" // /* MW 2 */
+ 8063 "00100101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8065 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8067 "00111101" // /* MW 7 */
+ 8068 "00101000" // /* MW 6 */
+ 8069 "00000011" // /* MW 5 */
+ 8070 "00000010" // /* MW 4 */
+ 8071 "01100000" // /* MW 3 */
+ 8072 "11000100" // /* MW 2 */
+ 8073 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8075 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8077 "00111101" // /* MW 7 */
+ 8078 "00010000" // /* MW 6 */
+ 8079 "00000100" // /* MW 5 */
+ 8080 "00000010" // /* MW 4 */
+ 8081 "01100000" // /* MW 3 */
+ 8082 "10110100" // /* MW 2 */
+ 8083 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8085 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8087 "00000000" // /* MW 5 */
+ 8088 "01010000" // /* MW 4 */
+ 8089 "01100000" // /* MW 3 */
+ 8090 "11000100" // /* MW 2 */
+ 8091 "01000011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8093 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8095 "10100011" // /* MW 3 */
+ 8096 "00011101" // /* MW 2 */
+ 8097 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8099 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8101 "00100011" // /* MW 3 */
+ 8102 "00011110" // /* MW 2 */
+ 8103 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8105 "00000000" // /* MW 1 */
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first
+.function_start
+ 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8113 "00000001" // /* MW 5 */
+ 8114 "00000000" // /* MW 4 */
+ 8115 "00000000" // /* MW 3 */
+ 8116 "00010000" // /* MW 2 */
+ 8117 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24
+ 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8119 "01110000" // /* MW 7 */
+ 8120 "01100000" // /* MW 6 */
+ 8121 "00001010" // /* MW 5 */
+ 8122 "00000010" // /* MW 4 */
+ 8123 "10110000" // /* MW 3 */
+ 8124 "10000111" // /* MW 2 */
+ 8125 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+ 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8127 "00000000" // /* MW 7 */
+ 8128 "00000011" // /* MW 6 */
+ 8129 "10110100" // /* MW 5 */
+ 8130 "00000001" // /* MW 4 */
+ 8131 "01100000" // /* MW 3 */
+ 8132 "10010001" // /* MW 2 */
+ 8133 "01010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+ 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8135 "10000001" // /* MW 5 */
+ 8136 "00100001" // /* MW 4 */
+ 8137 "01011000" // /* MW 3 */
+ 8138 "11101101" // /* MW 2 */
+ 8139 "01100101" // /* MW 1 */
+ 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8141 "11000001" // /* MW 5 */
+ 8142 "10101011" // /* MW 4 */
+ 8143 "01011000" // /* MW 3 */
+ 8144 "11001010" // /* MW 2 */
+ 8145 "01110011" // /* MW 1 */
+ 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8147 "11000000" // /* MW 3 */
+ 8148 "01101000" // /* MW 2 */
+ 8149 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+ 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8151 "00101011" // /* MW 3 */
+ 8152 "00000111" // /* MW 2 */
+ 8153 "00001000" // /* MW 1 */
+ 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8155 "01010111" // /* MW 3 */
+ 8156 "00000110" // /* MW 2 */
+ 8157 "00000000" // /* MW 1 */
+ 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8159 "00000000" // /* MW 1 */
+ 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8161 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first
+.no_stack_arguments
+ 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */
+ 8163 "00000001" // /* MW 5 */
+ 8164 "00000000" // /* MW 4 */
+ 8165 "00111000" // /* MW 3 */
+ 8166 "00001111" // /* MW 2 */
+ 8167 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.delay_slot
+ 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8169 "11000000" // /* MW 3 */
+ 8170 "01010000" // /* MW 2 */
+ 8171 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first
+.delay_slot
+ 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8175 "00010010" // /* MW 3 */
+ 8176 "00100101" // /* MW 2 */
+ 8177 "00010100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8179 "01000001" // /* MW 5 */
+ 8180 "11010010" // /* MW 4 */
+ 8181 "01000010" // /* MW 3 */
+ 8182 "00100000" // /* MW 2 */
+ 8183 "10001100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8185 "01110000" // /* MW 7 */
+ 8186 "00010000" // /* MW 6 */
+ 8187 "00110100" // /* MW 5 */
+ 8188 "00000000" // /* MW 4 */
+ 8189 "01100000" // /* MW 3 */
+ 8190 "00101011" // /* MW 2 */
+ 8191 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.return_address
+ 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8193 "00111001" // /* MW 3 */
+ 8194 "11111100" // /* MW 2 */
+ 8195 "00000111" // /* MW 1 */
+ 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8197 "00000000" // /* MW 1 */
+ 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8199 "00000000" // /* MW 1 */
+ 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8201 "00000000" // /* MW 1 */
+ 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8203 "00000000" // /* MW 1 */
+ 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8205 "00000000" // /* MW 1 */
+ 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8207 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first
+ 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8209 "00000000" // /* MW 3 */
+ 8210 "00101000" // /* MW 2 */
+ 8211 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.delay_slot
+ 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8213 "00000001" // /* MW 5 */
+ 8214 "00000000" // /* MW 4 */
+ 8215 "00000000" // /* MW 3 */
+ 8216 "11110000" // /* MW 2 */
+ 8217 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8219 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8221 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8223 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8225 "00000000" // /* MW 1 */
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 147 first
+.src_ref 7 "superkernels.cpp" 152 6
+.function_start
+ 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8241 "10000000" // /* MW 5 */
+ 8242 "11001000" // /* MW 4 */
+ 8243 "11000110" // /* MW 3 */
+ 8244 "00000111" // /* MW 2 */
+ 8245 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6 first
+ 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8247 "11000001" // /* MW 5 */
+ 8248 "10110101" // /* MW 4 */
+ 8249 "11011000" // /* MW 3 */
+ 8250 "11000010" // /* MW 2 */
+ 8251 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 147
+ 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8253 "00000001" // /* MW 5 */
+ 8254 "00000000" // /* MW 4 */
+ 8255 "00000000" // /* MW 3 */
+ 8256 "00001000" // /* MW 2 */
+ 8257 "00000000" // /* MW 1 */
+ 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8259 "01110000" // /* MW 7 */
+ 8260 "11010000" // /* MW 6 */
+ 8261 "00001011" // /* MW 5 */
+ 8262 "00000000" // /* MW 4 */
+ 8263 "10110000" // /* MW 3 */
+ 8264 "01100011" // /* MW 2 */
+ 8265 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+ 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8267 "00010001" // /* MW 9 */
+ 8268 "00101000" // /* MW 8 */
+ 8269 "00110010" // /* MW 7 */
+ 8270 "11110011" // /* MW 6 */
+ 8271 "00000001" // /* MW 5 */
+ 8272 "00000000" // /* MW 4 */
+ 8273 "10110000" // /* MW 3 */
+ 8274 "10000010" // /* MW 2 */
+ 8275 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8277 "11000000" // /* MW 3 */
+ 8278 "11010100" // /* MW 2 */
+ 8279 "00011011" // /* MW 1 */
+ 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8281 "00000000" // /* MW 1 */
+ 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8283 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6
+.src_ref 7 "superkernels.cpp" 152 16
+ 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */
+ 8285 "00000001" // /* MW 5 */
+ 8286 "01000000" // /* MW 4 */
+ 8287 "10000000" // /* MW 3 */
+ 8288 "00010000" // /* MW 2 */
+ 8289 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 22 first
+.delay_slot
+ 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8291 "10010000" // /* MW 3 */
+ 8292 "01100010" // /* MW 2 */
+ 8293 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 30
+.delay_slot
+ 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8295 "11111011" // /* MW 3 */
+ 8296 "01100011" // /* MW 2 */
+ 8297 "00010100" // /* MW 1 */
+.delay_slot
+ 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8299 "00111101" // /* MW 3 */
+ 8300 "11110100" // /* MW 2 */
+ 8301 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8303 "01110000" // /* MW 7 */
+ 8304 "01100000" // /* MW 6 */
+ 8305 "00110000" // /* MW 5 */
+ 8306 "00000011" // /* MW 4 */
+ 8307 "00110000" // /* MW 3 */
+ 8308 "11000110" // /* MW 2 */
+ 8309 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4
+.src_ref 7 "superkernels.cpp" 166 2
+.delay_slot
+ 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8311 "10000000" // /* MW 5 */
+ 8312 "11001001" // /* MW 4 */
+ 8313 "11000000" // /* MW 3 */
+ 8314 "00000111" // /* MW 2 */
+ 8315 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8317 "11010000" // /* MW 5 */
+ 8318 "11001000" // /* MW 4 */
+ 8319 "11000100" // /* MW 3 */
+ 8320 "00000111" // /* MW 2 */
+ 8321 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8323 "00010000" // /* MW 9 */
+ 8324 "00110010" // /* MW 8 */
+ 8325 "00110010" // /* MW 7 */
+ 8326 "11110001" // /* MW 6 */
+ 8327 "00000001" // /* MW 5 */
+ 8328 "00000000" // /* MW 4 */
+ 8329 "11100000" // /* MW 3 */
+ 8330 "11000000" // /* MW 2 */
+ 8331 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8333 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */
+ 8335 "00000001" // /* MW 5 */
+ 8336 "00000000" // /* MW 4 */
+ 8337 "00011000" // /* MW 3 */
+ 8338 "00001111" // /* MW 2 */
+ 8339 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8341 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8343 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8345 "00110001" // /* MW 3 */
+ 8346 "00100000" // /* MW 2 */
+ 8347 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8349 "00000101" // /* MW 3 */
+ 8350 "00100000" // /* MW 2 */
+ 8351 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8353 "00000000" // /* MW 15 */
+ 8354 "00000000" // /* MW 14 */
+ 8355 "01111000" // /* MW 13 */
+ 8356 "10100101" // /* MW 12 */
+ 8357 "00000001" // /* MW 11 */
+ 8358 "00000000" // /* MW 10 */
+ 8359 "00000000" // /* MW 9 */
+ 8360 "10000000" // /* MW 8 */
+ 8361 "00010001" // /* MW 7 */
+ 8362 "00000110" // /* MW 6 */
+ 8363 "00100010" // /* MW 5 */
+ 8364 "00000000" // /* MW 4 */
+ 8365 "11110000" // /* MW 3 */
+ 8366 "00101100" // /* MW 2 */
+ 8367 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18
+.return_address
+ 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8369 "10100000" // /* MW 5 */
+ 8370 "11001000" // /* MW 4 */
+ 8371 "11000100" // /* MW 3 */
+ 8372 "00000111" // /* MW 2 */
+ 8373 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18 first
+.src_ref 7 "superkernels.cpp" 159 65
+ 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8375 "00010000" // /* MW 9 */
+ 8376 "01100000" // /* MW 8 */
+ 8377 "00110010" // /* MW 7 */
+ 8378 "11110001" // /* MW 6 */
+ 8379 "00000001" // /* MW 5 */
+ 8380 "00000000" // /* MW 4 */
+ 8381 "11010000" // /* MW 3 */
+ 8382 "11000010" // /* MW 2 */
+ 8383 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51
+.src_ref 7 "superkernels.cpp" 159 65
+.src_ref 7 "superkernels.cpp" 166 2
+ 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8385 "00010000" // /* MW 9 */
+ 8386 "01100000" // /* MW 8 */
+ 8387 "00110010" // /* MW 7 */
+ 8388 "11110001" // /* MW 6 */
+ 8389 "00000001" // /* MW 5 */
+ 8390 "00000000" // /* MW 4 */
+ 8391 "11010000" // /* MW 3 */
+ 8392 "11000110" // /* MW 2 */
+ 8393 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51 first
+.src_ref 7 "superkernels.cpp" 159 16
+.src_ref 7 "superkernels.cpp" 164 47
+ 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8395 "00010000" // /* MW 9 */
+ 8396 "00101010" // /* MW 8 */
+ 8397 "10110010" // /* MW 7 */
+ 8398 "11110000" // /* MW 6 */
+ 8399 "00000001" // /* MW 5 */
+ 8400 "00000000" // /* MW 4 */
+ 8401 "01010000" // /* MW 3 */
+ 8402 "11001011" // /* MW 2 */
+ 8403 "01001010" // /* MW 1 */
+ 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8405 "00000000" // /* MW 1 */
+ 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8407 "00000000" // /* MW 1 */
+ 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */
+ 8409 "00000000" // /* MW 5 */
+ 8410 "00000000" // /* MW 4 */
+ 8411 "10001000" // /* MW 3 */
+ 8412 "00010000" // /* MW 2 */
+ 8413 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13
+.delay_slot
+ 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8415 "11000000" // /* MW 5 */
+ 8416 "11001000" // /* MW 4 */
+ 8417 "11000000" // /* MW 3 */
+ 8418 "00000111" // /* MW 2 */
+ 8419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8421 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 27 first
+.delay_slot
+ 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8423 "00001111" // /* MW 3 */
+ 8424 "01100001" // /* MW 2 */
+ 8425 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13 first
+.delay_slot
+ 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8427 "10100011" // /* MW 5 */
+ 8428 "00001100" // /* MW 4 */
+ 8429 "11110000" // /* MW 3 */
+ 8430 "00101100" // /* MW 2 */
+ 8431 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 16 first
+.delay_slot
+ 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8433 "00000000" // /* MW 15 */
+ 8434 "00000000" // /* MW 14 */
+ 8435 "01111000" // /* MW 13 */
+ 8436 "10100101" // /* MW 12 */
+ 8437 "00000001" // /* MW 11 */
+ 8438 "00000000" // /* MW 10 */
+ 8439 "00000000" // /* MW 9 */
+ 8440 "10000000" // /* MW 8 */
+ 8441 "00010001" // /* MW 7 */
+ 8442 "00000110" // /* MW 6 */
+ 8443 "00100001" // /* MW 5 */
+ 8444 "00000000" // /* MW 4 */
+ 8445 "11110000" // /* MW 3 */
+ 8446 "00101100" // /* MW 2 */
+ 8447 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 164 47
+.src_ref 7 "superkernels.cpp" 166 2
+ 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8449 "00000000" // /* MW 15 */
+ 8450 "00000000" // /* MW 14 */
+ 8451 "00010000" // /* MW 13 */
+ 8452 "00101010" // /* MW 12 */
+ 8453 "10110010" // /* MW 11 */
+ 8454 "11110000" // /* MW 10 */
+ 8455 "00000001" // /* MW 9 */
+ 8456 "00000000" // /* MW 8 */
+ 8457 "10001011" // /* MW 7 */
+ 8458 "10000000" // /* MW 6 */
+ 8459 "00100010" // /* MW 5 */
+ 8460 "00000000" // /* MW 4 */
+ 8461 "11110000" // /* MW 3 */
+ 8462 "00101100" // /* MW 2 */
+ 8463 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8465 "00000000" // /* MW 7 */
+ 8466 "11000011" // /* MW 6 */
+ 8467 "10110011" // /* MW 5 */
+ 8468 "00000011" // /* MW 4 */
+ 8469 "01100000" // /* MW 3 */
+ 8470 "10010001" // /* MW 2 */
+ 8471 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8473 "00010000" // /* MW 9 */
+ 8474 "00100000" // /* MW 8 */
+ 8475 "00110010" // /* MW 7 */
+ 8476 "11110000" // /* MW 6 */
+ 8477 "00000001" // /* MW 5 */
+ 8478 "00000000" // /* MW 4 */
+ 8479 "11010000" // /* MW 3 */
+ 8480 "11101110" // /* MW 2 */
+ 8481 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8483 "00010110" // /* MW 3 */
+ 8484 "11111110" // /* MW 2 */
+ 8485 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8487 "00110110" // /* MW 3 */
+ 8488 "11111110" // /* MW 2 */
+ 8489 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8491 "01010110" // /* MW 3 */
+ 8492 "01000110" // /* MW 2 */
+ 8493 "00000111" // /* MW 1 */
+ 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8495 "00000000" // /* MW 1 */
+ 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8497 "00000000" // /* MW 1 */
+ 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8499 "00000000" // /* MW 1 */
+ 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8501 "00000000" // /* MW 1 */
+ 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8503 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8505 "00000010" // /* MW 3 */
+ 8506 "01100001" // /* MW 2 */
+ 8507 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8509 "00010001" // /* MW 3 */
+ 8510 "00000110" // /* MW 2 */
+ 8511 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8513 "11111101" // /* MW 3 */
+ 8514 "11100000" // /* MW 2 */
+ 8515 "00010111" // /* MW 1 */
+ 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8517 "00000000" // /* MW 1 */
+ 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8519 "00000000" // /* MW 1 */
+ 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8521 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8523 "00001000" // /* MW 3 */
+ 8524 "10010011" // /* MW 2 */
+ 8525 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+ 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8527 "10000001" // /* MW 5 */
+ 8528 "10101101" // /* MW 4 */
+ 8529 "10100111" // /* MW 3 */
+ 8530 "00000000" // /* MW 2 */
+ 8531 "00000100" // /* MW 1 */
+ 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8533 "00000000" // /* MW 1 */
+ 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8535 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+ 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8537 "00110110" // /* MW 3 */
+ 8538 "00000110" // /* MW 2 */
+ 8539 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8541 "10000001" // /* MW 5 */
+ 8542 "11011101" // /* MW 4 */
+ 8543 "11011100" // /* MW 3 */
+ 8544 "11001010" // /* MW 2 */
+ 8545 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 47 first
+ 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8547 "01110110" // /* MW 3 */
+ 8548 "00000110" // /* MW 2 */
+ 8549 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8551 "10011110" // /* MW 3 */
+ 8552 "01011100" // /* MW 2 */
+ 8553 "00000111" // /* MW 1 */
+ 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 166 2 first
+.no_stack_arguments
+ 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */
+ 8557 "00000001" // /* MW 5 */
+ 8558 "00000000" // /* MW 4 */
+ 8559 "11011000" // /* MW 3 */
+ 8560 "00001111" // /* MW 2 */
+ 8561 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8563 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+.delay_slot
+ 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8565 "00000111" // /* MW 3 */
+ 8566 "01100010" // /* MW 2 */
+ 8567 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.delay_slot
+ 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8569 "00110001" // /* MW 3 */
+ 8570 "00000110" // /* MW 2 */
+ 8571 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45 first
+.delay_slot
+ 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8573 "00001101" // /* MW 3 */
+ 8574 "11100001" // /* MW 2 */
+ 8575 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+.delay_slot
+ 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8577 "00000000" // /* MW 15 */
+ 8578 "00000000" // /* MW 14 */
+ 8579 "10101000" // /* MW 13 */
+ 8580 "10100000" // /* MW 12 */
+ 8581 "00110100" // /* MW 11 */
+ 8582 "00000000" // /* MW 10 */
+ 8583 "00000000" // /* MW 9 */
+ 8584 "00000000" // /* MW 8 */
+ 8585 "01011011" // /* MW 7 */
+ 8586 "00000001" // /* MW 6 */
+ 8587 "00100000" // /* MW 5 */
+ 8588 "00000000" // /* MW 4 */
+ 8589 "11110000" // /* MW 3 */
+ 8590 "00101100" // /* MW 2 */
+ 8591 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+.src_ref 7 "superkernels.cpp" 169 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8593 "00010000" // /* MW 9 */
+ 8594 "00100000" // /* MW 8 */
+ 8595 "00110010" // /* MW 7 */
+ 8596 "11110011" // /* MW 6 */
+ 8597 "00000001" // /* MW 5 */
+ 8598 "00000000" // /* MW 4 */
+ 8599 "11010000" // /* MW 3 */
+ 8600 "11000110" // /* MW 2 */
+ 8601 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8603 "00000101" // /* MW 3 */
+ 8604 "00100000" // /* MW 2 */
+ 8605 "00010000" // /* MW 1 */
+ 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8607 "00000000" // /* MW 1 */
+ 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8609 "00000000" // /* MW 1 */
+ 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8611 "00000000" // /* MW 1 */
+ 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8613 "00000000" // /* MW 1 */
+ 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8617 "00001000" // /* MW 3 */
+ 8618 "01010001" // /* MW 2 */
+ 8619 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8621 "00010000" // /* MW 9 */
+ 8622 "00110000" // /* MW 8 */
+ 8623 "00110010" // /* MW 7 */
+ 8624 "11110001" // /* MW 6 */
+ 8625 "00000001" // /* MW 5 */
+ 8626 "00000000" // /* MW 4 */
+ 8627 "11010000" // /* MW 3 */
+ 8628 "11001110" // /* MW 2 */
+ 8629 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6 first
+ 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8631 "00110110" // /* MW 3 */
+ 8632 "00000110" // /* MW 2 */
+ 8633 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+ 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8635 "01010110" // /* MW 3 */
+ 8636 "00000110" // /* MW 2 */
+ 8637 "00000010" // /* MW 1 */
+ 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8639 "00000000" // /* MW 1 */
+ 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8641 "00000000" // /* MW 1 */
+ 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8643 "00000000" // /* MW 1 */
+ 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8645 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8647 "00110001" // /* MW 3 */
+ 8648 "00100001" // /* MW 2 */
+ 8649 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8651 "00010001" // /* MW 3 */
+ 8652 "11100110" // /* MW 2 */
+ 8653 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 16 first
+ 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8655 "00101000" // /* MW 3 */
+ 8656 "01100001" // /* MW 2 */
+ 8657 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+ 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */
+ 8659 "00000001" // /* MW 5 */
+ 8660 "01000000" // /* MW 4 */
+ 8661 "11111000" // /* MW 3 */
+ 8662 "00010000" // /* MW 2 */
+ 8663 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8665 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8671 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8673 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14
+ 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8675 "00000001" // /* MW 3 */
+ 8676 "00100000" // /* MW 2 */
+ 8677 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14 first
+ 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8679 "00000000" // /* MW 9 */
+ 8680 "00000000" // /* MW 8 */
+ 8681 "00000000" // /* MW 7 */
+ 8682 "10000000" // /* MW 6 */
+ 8683 "00010001" // /* MW 5 */
+ 8684 "00000110" // /* MW 4 */
+ 8685 "11110110" // /* MW 3 */
+ 8686 "00101100" // /* MW 2 */
+ 8687 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 171
+ 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8689 "00111001" // /* MW 3 */
+ 8690 "11110100" // /* MW 2 */
+ 8691 "00000111" // /* MW 1 */
+ 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8693 "00011001" // /* MW 3 */
+ 8694 "11111011" // /* MW 2 */
+ 8695 "00000111" // /* MW 1 */
+ 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8697 "00000000" // /* MW 1 */
+ 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8699 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8701 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8703 "11110001" // /* MW 3 */
+ 8704 "11111101" // /* MW 2 */
+ 8705 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8707 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8709 "00000000" // /* MW 3 */
+ 8710 "00101000" // /* MW 2 */
+ 8711 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8713 "10100000" // /* MW 3 */
+ 8714 "01100111" // /* MW 2 */
+ 8715 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171
+.delay_slot
+ 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8717 "00000001" // /* MW 5 */
+ 8718 "00000000" // /* MW 4 */
+ 8719 "00000000" // /* MW 3 */
+ 8720 "11111000" // /* MW 2 */
+ 8721 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8723 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 8727 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.src_ref 3 "elementwise_unary.h" 124 first
+.src_ref 3 "elementwise_unary.h" 126 24 first
+.function_start
+ 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8737 "00101110" // /* MW 3 */
+ 8738 "00011100" // /* MW 2 */
+ 8739 "00000001" // /* MW 1 */
+ 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8741 "00000000" // /* MW 1 */
+ 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8743 "00000000" // /* MW 1 */
+ 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8745 "00000000" // /* MW 1 */
+ 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8747 "00000000" // /* MW 1 */
+ 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8749 "00000000" // /* MW 1 */
+ 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8751 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 126 22 first
+ 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8753 "00101001" // /* MW 3 */
+ 8754 "00011100" // /* MW 2 */
+ 8755 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 24 first
+ 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8757 "00101110" // /* MW 3 */
+ 8758 "00011100" // /* MW 2 */
+ 8759 "00000001" // /* MW 1 */
+ 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8761 "00000000" // /* MW 1 */
+ 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8763 "00000000" // /* MW 1 */
+ 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8765 "00000000" // /* MW 1 */
+ 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8767 "00000000" // /* MW 1 */
+ 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8769 "00000000" // /* MW 1 */
+ 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8771 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 22
+ 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8773 "00101001" // /* MW 3 */
+ 8774 "00011100" // /* MW 2 */
+ 8775 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 24 first
+ 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8777 "00101110" // /* MW 3 */
+ 8778 "01101100" // /* MW 2 */
+ 8779 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8781 "00010010" // /* MW 3 */
+ 8782 "00000100" // /* MW 2 */
+ 8783 "00000001" // /* MW 1 */
+ 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8785 "00000000" // /* MW 1 */
+ 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8787 "00000000" // /* MW 1 */
+ 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8789 "00000000" // /* MW 1 */
+ 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8791 "00000000" // /* MW 1 */
+ 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8793 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 22 first
+ 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8795 "00101001" // /* MW 3 */
+ 8796 "01101100" // /* MW 2 */
+ 8797 "00001000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8799 "00010111" // /* MW 3 */
+ 8800 "00000100" // /* MW 2 */
+ 8801 "00000000" // /* MW 1 */
+ 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8803 "00000000" // /* MW 1 */
+ 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8805 "00000000" // /* MW 1 */
+ 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8807 "00000000" // /* MW 1 */
+ 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8809 "00000000" // /* MW 1 */
+ 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8811 "00000000" // /* MW 1 */
+ 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8813 "00000000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33 first
+ 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8815 "00010010" // /* MW 3 */
+ 8816 "00100100" // /* MW 2 */
+ 8817 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33
+ 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8819 "00010111" // /* MW 3 */
+ 8820 "00010100" // /* MW 2 */
+ 8821 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 130 4 first
+ 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8823 "00000000" // /* MW 3 */
+ 8824 "00101000" // /* MW 2 */
+ 8825 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8827 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8829 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0
+ 8835 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 136 first
+.src_ref 3 "elementwise_unary.h" 142 37
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 171 19
+.function_start
+ 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8849 "00010000" // /* MW 11 */
+ 8850 "10001000" // /* MW 10 */
+ 8851 "01111001" // /* MW 9 */
+ 8852 "00001000" // /* MW 8 */
+ 8853 "00000000" // /* MW 7 */
+ 8854 "00000000" // /* MW 6 */
+ 8855 "01101000" // /* MW 5 */
+ 8856 "00111010" // /* MW 4 */
+ 8857 "10000000" // /* MW 3 */
+ 8858 "11000010" // /* MW 2 */
+ 8859 "11111011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 142 78
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+ 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8861 "00010000" // /* MW 11 */
+ 8862 "10100000" // /* MW 10 */
+ 8863 "10111001" // /* MW 9 */
+ 8864 "00001001" // /* MW 8 */
+ 8865 "00000000" // /* MW 7 */
+ 8866 "00000000" // /* MW 6 */
+ 8867 "01101000" // /* MW 5 */
+ 8868 "00111001" // /* MW 4 */
+ 8869 "00000000" // /* MW 3 */
+ 8870 "01010001" // /* MW 2 */
+ 8871 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136
+ 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8873 "11000000" // /* MW 3 */
+ 8874 "00010100" // /* MW 2 */
+ 8875 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136 first
+ 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8877 "00010000" // /* MW 3 */
+ 8878 "01100000" // /* MW 2 */
+ 8879 "00011010" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 103 16 first
+ 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8881 "01010010" // /* MW 3 */
+ 8882 "00011100" // /* MW 2 */
+ 8883 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 142 37 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8885 "00010110" // /* MW 3 */
+ 8886 "00000000" // /* MW 2 */
+ 8887 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 8 "clip_impl.h" 104 16 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8889 "01101000" // /* MW 5 */
+ 8890 "00111010" // /* MW 4 */
+ 8891 "01010000" // /* MW 3 */
+ 8892 "10000110" // /* MW 2 */
+ 8893 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8895 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8897 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8899 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8901 "10110100" // /* MW 3 */
+ 8902 "00011100" // /* MW 2 */
+ 8903 "00111000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8905 "01110010" // /* MW 3 */
+ 8906 "00001001" // /* MW 2 */
+ 8907 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 142 78 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8909 "01111000" // /* MW 9 */
+ 8910 "00110110" // /* MW 8 */
+ 8911 "01010000" // /* MW 7 */
+ 8912 "11101101" // /* MW 6 */
+ 8913 "00011000" // /* MW 5 */
+ 8914 "00000001" // /* MW 4 */
+ 8915 "01101000" // /* MW 3 */
+ 8916 "00111010" // /* MW 2 */
+ 8917 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8919 "11111110" // /* MW 3 */
+ 8920 "01111000" // /* MW 2 */
+ 8921 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8923 "01110010" // /* MW 3 */
+ 8924 "10000101" // /* MW 2 */
+ 8925 "00011000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8927 "10101100" // /* MW 3 */
+ 8928 "10101000" // /* MW 2 */
+ 8929 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8931 "01100000" // /* MW 13 */
+ 8932 "00101011" // /* MW 12 */
+ 8933 "00000000" // /* MW 11 */
+ 8934 "11001111" // /* MW 10 */
+ 8935 "00000110" // /* MW 9 */
+ 8936 "00110001" // /* MW 8 */
+ 8937 "00000000" // /* MW 7 */
+ 8938 "00000000" // /* MW 6 */
+ 8939 "01101000" // /* MW 5 */
+ 8940 "00111001" // /* MW 4 */
+ 8941 "11110000" // /* MW 3 */
+ 8942 "00101100" // /* MW 2 */
+ 8943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8945 "00000000" // /* MW 15 */
+ 8946 "00000000" // /* MW 14 */
+ 8947 "01111000" // /* MW 13 */
+ 8948 "01010110" // /* MW 12 */
+ 8949 "11011000" // /* MW 11 */
+ 8950 "00000001" // /* MW 10 */
+ 8951 "00000000" // /* MW 9 */
+ 8952 "00000000" // /* MW 8 */
+ 8953 "11010011" // /* MW 7 */
+ 8954 "00011100" // /* MW 6 */
+ 8955 "00100001" // /* MW 5 */
+ 8956 "00000000" // /* MW 4 */
+ 8957 "11110000" // /* MW 3 */
+ 8958 "00101100" // /* MW 2 */
+ 8959 "00000000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8961 "00000000" // /* MW 15 */
+ 8962 "00000000" // /* MW 14 */
+ 8963 "01111000" // /* MW 13 */
+ 8964 "00110110" // /* MW 12 */
+ 8965 "01010000" // /* MW 11 */
+ 8966 "00000001" // /* MW 10 */
+ 8967 "00000000" // /* MW 9 */
+ 8968 "00000000" // /* MW 8 */
+ 8969 "01011011" // /* MW 7 */
+ 8970 "00000001" // /* MW 6 */
+ 8971 "00100000" // /* MW 5 */
+ 8972 "00000000" // /* MW 4 */
+ 8973 "11110000" // /* MW 3 */
+ 8974 "00101100" // /* MW 2 */
+ 8975 "00000000" // /* MW 1 */
+.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8977 "00000000" // /* MW 15 */
+ 8978 "00000000" // /* MW 14 */
+ 8979 "01111000" // /* MW 13 */
+ 8980 "01010110" // /* MW 12 */
+ 8981 "11010100" // /* MW 11 */
+ 8982 "00000000" // /* MW 10 */
+ 8983 "00000000" // /* MW 9 */
+ 8984 "00000000" // /* MW 8 */
+ 8985 "11010011" // /* MW 7 */
+ 8986 "00011101" // /* MW 6 */
+ 8987 "01101001" // /* MW 5 */
+ 8988 "00111010" // /* MW 4 */
+ 8989 "11110000" // /* MW 3 */
+ 8990 "00101100" // /* MW 2 */
+ 8991 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8993 "00000000" // /* MW 15 */
+ 8994 "00000000" // /* MW 14 */
+ 8995 "01111000" // /* MW 13 */
+ 8996 "00110110" // /* MW 12 */
+ 8997 "10001000" // /* MW 11 */
+ 8998 "00000001" // /* MW 10 */
+ 8999 "00000000" // /* MW 9 */
+ 9000 "00000000" // /* MW 8 */
+ 9001 "01011011" // /* MW 7 */
+ 9002 "00000001" // /* MW 6 */
+ 9003 "01101000" // /* MW 5 */
+ 9004 "00111001" // /* MW 4 */
+ 9005 "11110000" // /* MW 3 */
+ 9006 "00101100" // /* MW 2 */
+ 9007 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9009 "00000000" // /* MW 15 */
+ 9010 "00000000" // /* MW 14 */
+ 9011 "01111000" // /* MW 13 */
+ 9012 "01010110" // /* MW 12 */
+ 9013 "11011000" // /* MW 11 */
+ 9014 "00000001" // /* MW 10 */
+ 9015 "00000000" // /* MW 9 */
+ 9016 "00000000" // /* MW 8 */
+ 9017 "11010011" // /* MW 7 */
+ 9018 "00011100" // /* MW 6 */
+ 9019 "00100001" // /* MW 5 */
+ 9020 "00000000" // /* MW 4 */
+ 9021 "11110000" // /* MW 3 */
+ 9022 "00101100" // /* MW 2 */
+ 9023 "00000000" // /* MW 1 */
+.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176
+.src_ref 4 "max_min.hpp" 20 104 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9025 "00000000" // /* MW 15 */
+ 9026 "00000000" // /* MW 14 */
+ 9027 "01111000" // /* MW 13 */
+ 9028 "00110110" // /* MW 12 */
+ 9029 "01010000" // /* MW 11 */
+ 9030 "00000001" // /* MW 10 */
+ 9031 "00000000" // /* MW 9 */
+ 9032 "00000000" // /* MW 8 */
+ 9033 "01011011" // /* MW 7 */
+ 9034 "00000001" // /* MW 6 */
+ 9035 "00100000" // /* MW 5 */
+ 9036 "00000000" // /* MW 4 */
+ 9037 "11110000" // /* MW 3 */
+ 9038 "00101100" // /* MW 2 */
+ 9039 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9041 "01110000" // /* MW 7 */
+ 9042 "01010110" // /* MW 6 */
+ 9043 "11010100" // /* MW 5 */
+ 9044 "00000000" // /* MW 4 */
+ 9045 "01100000" // /* MW 3 */
+ 9046 "10111010" // /* MW 2 */
+ 9047 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9049 "01101100" // /* MW 3 */
+ 9050 "00010000" // /* MW 2 */
+ 9051 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+ 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9053 "01110000" // /* MW 7 */
+ 9054 "01010110" // /* MW 6 */
+ 9055 "11011000" // /* MW 5 */
+ 9056 "00000001" // /* MW 4 */
+ 9057 "01100000" // /* MW 3 */
+ 9058 "10011010" // /* MW 2 */
+ 9059 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 158 4 first
+ 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9061 "11011001" // /* MW 5 */
+ 9062 "01000000" // /* MW 4 */
+ 9063 "00000101" // /* MW 3 */
+ 9064 "00000000" // /* MW 2 */
+ 9065 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9067 "01110000" // /* MW 7 */
+ 9068 "01010110" // /* MW 6 */
+ 9069 "11010100" // /* MW 5 */
+ 9070 "00000000" // /* MW 4 */
+ 9071 "01100000" // /* MW 3 */
+ 9072 "10111010" // /* MW 2 */
+ 9073 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9075 "01101100" // /* MW 3 */
+ 9076 "00010000" // /* MW 2 */
+ 9077 "00011011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.delay_slot
+ 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9079 "10101100" // /* MW 3 */
+ 9080 "10110000" // /* MW 2 */
+ 9081 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.delay_slot
+ 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9083 "11010011" // /* MW 3 */
+ 9084 "00011100" // /* MW 2 */
+ 9085 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9087 "11010011" // /* MW 3 */
+ 9088 "00011101" // /* MW 2 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0
+ 9089 "00001001" // /* MW 1 */
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 176 first
+.src_ref 7 "superkernels.cpp" 181 6
+.function_start
+ 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9105 "10000000" // /* MW 5 */
+ 9106 "11001000" // /* MW 4 */
+ 9107 "11000110" // /* MW 3 */
+ 9108 "00000111" // /* MW 2 */
+ 9109 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6 first
+ 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9111 "11000001" // /* MW 5 */
+ 9112 "10110101" // /* MW 4 */
+ 9113 "11011000" // /* MW 3 */
+ 9114 "11000010" // /* MW 2 */
+ 9115 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 176
+ 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9117 "00000001" // /* MW 5 */
+ 9118 "00000000" // /* MW 4 */
+ 9119 "00000000" // /* MW 3 */
+ 9120 "00001000" // /* MW 2 */
+ 9121 "00000000" // /* MW 1 */
+ 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9123 "01110000" // /* MW 7 */
+ 9124 "11010000" // /* MW 6 */
+ 9125 "00001011" // /* MW 5 */
+ 9126 "00000000" // /* MW 4 */
+ 9127 "10110000" // /* MW 3 */
+ 9128 "01100011" // /* MW 2 */
+ 9129 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+ 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9131 "00010001" // /* MW 9 */
+ 9132 "00101000" // /* MW 8 */
+ 9133 "00110010" // /* MW 7 */
+ 9134 "11110011" // /* MW 6 */
+ 9135 "00000001" // /* MW 5 */
+ 9136 "00000000" // /* MW 4 */
+ 9137 "10110000" // /* MW 3 */
+ 9138 "10000010" // /* MW 2 */
+ 9139 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9141 "11000000" // /* MW 3 */
+ 9142 "11010100" // /* MW 2 */
+ 9143 "00011011" // /* MW 1 */
+ 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9145 "00000000" // /* MW 1 */
+ 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9147 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6
+.src_ref 7 "superkernels.cpp" 181 16
+ 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */
+ 9149 "00000001" // /* MW 5 */
+ 9150 "01000000" // /* MW 4 */
+ 9151 "00110000" // /* MW 3 */
+ 9152 "00010010" // /* MW 2 */
+ 9153 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 22 first
+.delay_slot
+ 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9155 "10010000" // /* MW 3 */
+ 9156 "01100010" // /* MW 2 */
+ 9157 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 30
+.delay_slot
+ 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9159 "11111011" // /* MW 3 */
+ 9160 "01100011" // /* MW 2 */
+ 9161 "00010100" // /* MW 1 */
+.delay_slot
+ 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9163 "00111101" // /* MW 3 */
+ 9164 "11110100" // /* MW 2 */
+ 9165 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9167 "01110000" // /* MW 7 */
+ 9168 "01100000" // /* MW 6 */
+ 9169 "00110000" // /* MW 5 */
+ 9170 "00000011" // /* MW 4 */
+ 9171 "00110000" // /* MW 3 */
+ 9172 "11000110" // /* MW 2 */
+ 9173 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4
+.src_ref 7 "superkernels.cpp" 195 2
+.delay_slot
+ 9174 "01000100" // MOVXM p0, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9175 "10000000" // /* MW 5 */
+ 9176 "11001011" // /* MW 4 */
+ 9177 "11000000" // /* MW 3 */
+ 9178 "00000111" // /* MW 2 */
+ 9179 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9181 "11010000" // /* MW 5 */
+ 9182 "11001000" // /* MW 4 */
+ 9183 "11000100" // /* MW 3 */
+ 9184 "00000111" // /* MW 2 */
+ 9185 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9187 "00010000" // /* MW 9 */
+ 9188 "00110010" // /* MW 8 */
+ 9189 "00110010" // /* MW 7 */
+ 9190 "11110001" // /* MW 6 */
+ 9191 "00000001" // /* MW 5 */
+ 9192 "00000000" // /* MW 4 */
+ 9193 "11100000" // /* MW 3 */
+ 9194 "11000000" // /* MW 2 */
+ 9195 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9197 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */
+ 9199 "00000001" // /* MW 5 */
+ 9200 "00000000" // /* MW 4 */
+ 9201 "00010000" // /* MW 3 */
+ 9202 "00010001" // /* MW 2 */
+ 9203 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9205 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9207 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9209 "00110001" // /* MW 3 */
+ 9210 "00100000" // /* MW 2 */
+ 9211 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9213 "00000101" // /* MW 3 */
+ 9214 "00100000" // /* MW 2 */
+ 9215 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9217 "00000000" // /* MW 15 */
+ 9218 "00000000" // /* MW 14 */
+ 9219 "01111000" // /* MW 13 */
+ 9220 "10100101" // /* MW 12 */
+ 9221 "00000001" // /* MW 11 */
+ 9222 "00000000" // /* MW 10 */
+ 9223 "00000000" // /* MW 9 */
+ 9224 "10000000" // /* MW 8 */
+ 9225 "00010001" // /* MW 7 */
+ 9226 "00000110" // /* MW 6 */
+ 9227 "00100010" // /* MW 5 */
+ 9228 "00000000" // /* MW 4 */
+ 9229 "11110000" // /* MW 3 */
+ 9230 "00101100" // /* MW 2 */
+ 9231 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18
+.return_address
+ 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9233 "10100000" // /* MW 5 */
+ 9234 "11001000" // /* MW 4 */
+ 9235 "11000100" // /* MW 3 */
+ 9236 "00000111" // /* MW 2 */
+ 9237 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18 first
+.src_ref 7 "superkernels.cpp" 188 43
+ 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9239 "00010000" // /* MW 9 */
+ 9240 "11100000" // /* MW 8 */
+ 9241 "00110010" // /* MW 7 */
+ 9242 "11110001" // /* MW 6 */
+ 9243 "00000001" // /* MW 5 */
+ 9244 "00000000" // /* MW 4 */
+ 9245 "11010000" // /* MW 3 */
+ 9246 "11000010" // /* MW 2 */
+ 9247 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29
+.src_ref 7 "superkernels.cpp" 188 43
+.src_ref 7 "superkernels.cpp" 195 2
+ 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9249 "00010000" // /* MW 9 */
+ 9250 "11100000" // /* MW 8 */
+ 9251 "00110010" // /* MW 7 */
+ 9252 "11110001" // /* MW 6 */
+ 9253 "00000001" // /* MW 5 */
+ 9254 "00000000" // /* MW 4 */
+ 9255 "11010000" // /* MW 3 */
+ 9256 "11000110" // /* MW 2 */
+ 9257 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29 first
+.src_ref 7 "superkernels.cpp" 188 16
+.src_ref 7 "superkernels.cpp" 193 47
+ 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9259 "00010000" // /* MW 9 */
+ 9260 "00101010" // /* MW 8 */
+ 9261 "10110010" // /* MW 7 */
+ 9262 "11110000" // /* MW 6 */
+ 9263 "00000001" // /* MW 5 */
+ 9264 "00000000" // /* MW 4 */
+ 9265 "01010000" // /* MW 3 */
+ 9266 "11001011" // /* MW 2 */
+ 9267 "01001000" // /* MW 1 */
+ 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9269 "00000000" // /* MW 1 */
+ 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9271 "00000000" // /* MW 1 */
+ 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */
+ 9273 "00000000" // /* MW 5 */
+ 9274 "00000000" // /* MW 4 */
+ 9275 "00111000" // /* MW 3 */
+ 9276 "00010010" // /* MW 2 */
+ 9277 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13
+.delay_slot
+ 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9279 "11000000" // /* MW 5 */
+ 9280 "11001000" // /* MW 4 */
+ 9281 "11000000" // /* MW 3 */
+ 9282 "00000111" // /* MW 2 */
+ 9283 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9285 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 27 first
+.delay_slot
+ 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9287 "00001111" // /* MW 3 */
+ 9288 "01100001" // /* MW 2 */
+ 9289 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13 first
+.delay_slot
+ 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9291 "10100011" // /* MW 5 */
+ 9292 "00001100" // /* MW 4 */
+ 9293 "11110000" // /* MW 3 */
+ 9294 "00101100" // /* MW 2 */
+ 9295 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 16 first
+.delay_slot
+ 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9297 "00000000" // /* MW 15 */
+ 9298 "00000000" // /* MW 14 */
+ 9299 "01111000" // /* MW 13 */
+ 9300 "10100101" // /* MW 12 */
+ 9301 "00000001" // /* MW 11 */
+ 9302 "00000000" // /* MW 10 */
+ 9303 "00000000" // /* MW 9 */
+ 9304 "10000000" // /* MW 8 */
+ 9305 "00010001" // /* MW 7 */
+ 9306 "00000110" // /* MW 6 */
+ 9307 "00100001" // /* MW 5 */
+ 9308 "00000000" // /* MW 4 */
+ 9309 "11110000" // /* MW 3 */
+ 9310 "00101100" // /* MW 2 */
+ 9311 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 193 47
+.src_ref 7 "superkernels.cpp" 195 2
+ 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9313 "00000000" // /* MW 15 */
+ 9314 "00000000" // /* MW 14 */
+ 9315 "00010000" // /* MW 13 */
+ 9316 "00101010" // /* MW 12 */
+ 9317 "10110010" // /* MW 11 */
+ 9318 "11110000" // /* MW 10 */
+ 9319 "00000001" // /* MW 9 */
+ 9320 "00000000" // /* MW 8 */
+ 9321 "10001011" // /* MW 7 */
+ 9322 "10000000" // /* MW 6 */
+ 9323 "00100010" // /* MW 5 */
+ 9324 "00000000" // /* MW 4 */
+ 9325 "11110000" // /* MW 3 */
+ 9326 "00101100" // /* MW 2 */
+ 9327 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9329 "00000000" // /* MW 7 */
+ 9330 "11000011" // /* MW 6 */
+ 9331 "10110011" // /* MW 5 */
+ 9332 "00000011" // /* MW 4 */
+ 9333 "01100000" // /* MW 3 */
+ 9334 "10010001" // /* MW 2 */
+ 9335 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9337 "00010000" // /* MW 9 */
+ 9338 "00100000" // /* MW 8 */
+ 9339 "00110010" // /* MW 7 */
+ 9340 "11110000" // /* MW 6 */
+ 9341 "00000001" // /* MW 5 */
+ 9342 "00000000" // /* MW 4 */
+ 9343 "11010000" // /* MW 3 */
+ 9344 "11101110" // /* MW 2 */
+ 9345 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9347 "00010110" // /* MW 3 */
+ 9348 "11111110" // /* MW 2 */
+ 9349 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9351 "00110110" // /* MW 3 */
+ 9352 "11111110" // /* MW 2 */
+ 9353 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9355 "01010110" // /* MW 3 */
+ 9356 "01000110" // /* MW 2 */
+ 9357 "00000111" // /* MW 1 */
+ 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9359 "00000000" // /* MW 1 */
+ 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9361 "00000000" // /* MW 1 */
+ 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9363 "00000000" // /* MW 1 */
+ 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9365 "00000000" // /* MW 1 */
+ 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9367 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9369 "00000010" // /* MW 3 */
+ 9370 "01100001" // /* MW 2 */
+ 9371 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9373 "00010001" // /* MW 3 */
+ 9374 "00000110" // /* MW 2 */
+ 9375 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9377 "11111101" // /* MW 3 */
+ 9378 "11100000" // /* MW 2 */
+ 9379 "00010111" // /* MW 1 */
+ 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9381 "00000000" // /* MW 1 */
+ 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9383 "00000000" // /* MW 1 */
+ 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9385 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9387 "00001000" // /* MW 3 */
+ 9388 "10010011" // /* MW 2 */
+ 9389 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+ 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9391 "10000001" // /* MW 5 */
+ 9392 "10101101" // /* MW 4 */
+ 9393 "10100111" // /* MW 3 */
+ 9394 "00000000" // /* MW 2 */
+ 9395 "00000100" // /* MW 1 */
+ 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9397 "00000000" // /* MW 1 */
+ 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9399 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+ 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9401 "00110110" // /* MW 3 */
+ 9402 "00000110" // /* MW 2 */
+ 9403 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9405 "10000001" // /* MW 5 */
+ 9406 "11011101" // /* MW 4 */
+ 9407 "11011100" // /* MW 3 */
+ 9408 "11001010" // /* MW 2 */
+ 9409 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 47 first
+ 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9411 "01110110" // /* MW 3 */
+ 9412 "00000110" // /* MW 2 */
+ 9413 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9415 "10011110" // /* MW 3 */
+ 9416 "01011100" // /* MW 2 */
+ 9417 "00000111" // /* MW 1 */
+ 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9419 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 195 2 first
+.no_stack_arguments
+ 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */
+ 9421 "00000001" // /* MW 5 */
+ 9422 "00000000" // /* MW 4 */
+ 9423 "01001000" // /* MW 3 */
+ 9424 "00010001" // /* MW 2 */
+ 9425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9427 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+.delay_slot
+ 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9429 "00000111" // /* MW 3 */
+ 9430 "01100010" // /* MW 2 */
+ 9431 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.delay_slot
+ 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9433 "00110001" // /* MW 3 */
+ 9434 "00000110" // /* MW 2 */
+ 9435 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45 first
+.delay_slot
+ 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9437 "00001101" // /* MW 3 */
+ 9438 "11100001" // /* MW 2 */
+ 9439 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+.delay_slot
+ 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9441 "00000000" // /* MW 15 */
+ 9442 "00000000" // /* MW 14 */
+ 9443 "10101000" // /* MW 13 */
+ 9444 "10100000" // /* MW 12 */
+ 9445 "00110100" // /* MW 11 */
+ 9446 "00000000" // /* MW 10 */
+ 9447 "00000000" // /* MW 9 */
+ 9448 "00000000" // /* MW 8 */
+ 9449 "01011011" // /* MW 7 */
+ 9450 "00000001" // /* MW 6 */
+ 9451 "00100000" // /* MW 5 */
+ 9452 "00000000" // /* MW 4 */
+ 9453 "11110000" // /* MW 3 */
+ 9454 "00101100" // /* MW 2 */
+ 9455 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+.src_ref 7 "superkernels.cpp" 198 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9457 "00010000" // /* MW 9 */
+ 9458 "00100000" // /* MW 8 */
+ 9459 "00110010" // /* MW 7 */
+ 9460 "11110011" // /* MW 6 */
+ 9461 "00000001" // /* MW 5 */
+ 9462 "00000000" // /* MW 4 */
+ 9463 "11010000" // /* MW 3 */
+ 9464 "11000110" // /* MW 2 */
+ 9465 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9467 "00000101" // /* MW 3 */
+ 9468 "00100000" // /* MW 2 */
+ 9469 "00010000" // /* MW 1 */
+ 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9471 "00000000" // /* MW 1 */
+ 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9473 "00000000" // /* MW 1 */
+ 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9475 "00000000" // /* MW 1 */
+ 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9477 "00000000" // /* MW 1 */
+ 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9479 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9481 "00001000" // /* MW 3 */
+ 9482 "01010001" // /* MW 2 */
+ 9483 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9485 "00010000" // /* MW 9 */
+ 9486 "00110000" // /* MW 8 */
+ 9487 "00110010" // /* MW 7 */
+ 9488 "11110001" // /* MW 6 */
+ 9489 "00000001" // /* MW 5 */
+ 9490 "00000000" // /* MW 4 */
+ 9491 "11010000" // /* MW 3 */
+ 9492 "11001110" // /* MW 2 */
+ 9493 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6 first
+ 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9495 "00110110" // /* MW 3 */
+ 9496 "00000110" // /* MW 2 */
+ 9497 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+ 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9499 "01010110" // /* MW 3 */
+ 9500 "00000110" // /* MW 2 */
+ 9501 "00000010" // /* MW 1 */
+ 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9503 "00000000" // /* MW 1 */
+ 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9505 "00000000" // /* MW 1 */
+ 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9507 "00000000" // /* MW 1 */
+ 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9509 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9511 "00110001" // /* MW 3 */
+ 9512 "00100001" // /* MW 2 */
+ 9513 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9515 "00010001" // /* MW 3 */
+ 9516 "11100110" // /* MW 2 */
+ 9517 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 16 first
+ 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9519 "00101000" // /* MW 3 */
+ 9520 "01100001" // /* MW 2 */
+ 9521 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+ 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */
+ 9523 "00000001" // /* MW 5 */
+ 9524 "01000000" // /* MW 4 */
+ 9525 "10101000" // /* MW 3 */
+ 9526 "00010010" // /* MW 2 */
+ 9527 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9535 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9537 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14
+ 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9539 "00000001" // /* MW 3 */
+ 9540 "00100000" // /* MW 2 */
+ 9541 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14 first
+ 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9543 "00000000" // /* MW 9 */
+ 9544 "00000000" // /* MW 8 */
+ 9545 "00000000" // /* MW 7 */
+ 9546 "10000000" // /* MW 6 */
+ 9547 "00010001" // /* MW 5 */
+ 9548 "00000110" // /* MW 4 */
+ 9549 "11110110" // /* MW 3 */
+ 9550 "00101100" // /* MW 2 */
+ 9551 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 200
+ 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9553 "00111001" // /* MW 3 */
+ 9554 "11110100" // /* MW 2 */
+ 9555 "00000111" // /* MW 1 */
+ 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9557 "00011001" // /* MW 3 */
+ 9558 "11111011" // /* MW 2 */
+ 9559 "00000111" // /* MW 1 */
+ 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9561 "00000000" // /* MW 1 */
+ 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9563 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9565 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9567 "11110001" // /* MW 3 */
+ 9568 "11111101" // /* MW 2 */
+ 9569 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9571 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9573 "00000000" // /* MW 3 */
+ 9574 "00101000" // /* MW 2 */
+ 9575 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9577 "10100000" // /* MW 3 */
+ 9578 "01100111" // /* MW 2 */
+ 9579 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200
+.delay_slot
+ 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9581 "00000001" // /* MW 5 */
+ 9582 "00000000" // /* MW 4 */
+ 9583 "00000000" // /* MW 3 */
+ 9584 "11111000" // /* MW 2 */
+ 9585 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9587 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9589 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 9591 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 205 first
+.src_ref 3 "elementwise_binary_shared.h" 211 24 first
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.function_start
+ 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9601 "01011000" // /* MW 9 */
+ 9602 "00000000" // /* MW 8 */
+ 9603 "00001000" // /* MW 7 */
+ 9604 "00001011" // /* MW 6 */
+ 9605 "00100000" // /* MW 5 */
+ 9606 "00001000" // /* MW 4 */
+ 9607 "11010000" // /* MW 3 */
+ 9608 "10000101" // /* MW 2 */
+ 9609 "00100011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+ 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9611 "00000001" // /* MW 3 */
+ 9612 "10000000" // /* MW 2 */
+ 9613 "00010111" // /* MW 1 */
+ 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9615 "00000000" // /* MW 1 */
+ 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9617 "00000000" // /* MW 1 */
+ 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9619 "00000000" // /* MW 1 */
+ 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9621 "00000000" // /* MW 1 */
+ 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9623 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 211 22 first
+ 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9625 "00101001" // /* MW 3 */
+ 9626 "00011100" // /* MW 2 */
+ 9627 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 24 first
+ 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9629 "00101110" // /* MW 3 */
+ 9630 "00011100" // /* MW 2 */
+ 9631 "00000001" // /* MW 1 */
+ 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9633 "00000000" // /* MW 1 */
+ 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9635 "00000000" // /* MW 1 */
+ 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9637 "00000000" // /* MW 1 */
+ 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9639 "00000000" // /* MW 1 */
+ 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9641 "00000000" // /* MW 1 */
+ 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9643 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 22
+ 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9645 "00101001" // /* MW 3 */
+ 9646 "00011100" // /* MW 2 */
+ 9647 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 24 first
+ 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9649 "00101110" // /* MW 3 */
+ 9650 "00000100" // /* MW 2 */
+ 9651 "00000001" // /* MW 1 */
+ 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9653 "00000000" // /* MW 1 */
+ 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9655 "00000000" // /* MW 1 */
+ 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9657 "00000000" // /* MW 1 */
+ 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9659 "00000000" // /* MW 1 */
+ 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9661 "00000000" // /* MW 1 */
+ 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9663 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 22
+ 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9665 "00101001" // /* MW 3 */
+ 9666 "00011100" // /* MW 2 */
+ 9667 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 24 first
+ 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9669 "01110110" // /* MW 3 */
+ 9670 "00010100" // /* MW 2 */
+ 9671 "00000001" // /* MW 1 */
+ 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9673 "00000000" // /* MW 1 */
+ 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9675 "00000000" // /* MW 1 */
+ 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9677 "00000000" // /* MW 1 */
+ 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9679 "00000000" // /* MW 1 */
+ 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9681 "00000000" // /* MW 1 */
+ 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9683 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 22
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9685 "01110001" // /* MW 3 */
+ 9686 "01001100" // /* MW 2 */
+ 9687 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 34 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9689 "00010111" // /* MW 3 */
+ 9690 "00000100" // /* MW 2 */
+ 9691 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 217 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9693 "00000000" // /* MW 3 */
+ 9694 "00101000" // /* MW 2 */
+ 9695 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9697 "00000000" // /* MW 5 */
+ 9698 "10111110" // /* MW 4 */
+ 9699 "11110000" // /* MW 3 */
+ 9700 "00000000" // /* MW 2 */
+ 9701 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9703 "00010100" // /* MW 3 */
+ 9704 "11000010" // /* MW 2 */
+ 9705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9707 "00100111" // /* MW 3 */
+ 9708 "01110110" // /* MW 2 */
+ 9709 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9711 "10000010" // /* MW 3 */
+ 9712 "00000001" // /* MW 2 */
+ 9713 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9715 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 219
+.src_ref 3 "elementwise_binary_shared.h" 219 first
+.function_start
+ 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9729 "00000001" // /* MW 5 */
+ 9730 "00000000" // /* MW 4 */
+ 9731 "00000000" // /* MW 3 */
+ 9732 "00001000" // /* MW 2 */
+ 9733 "00000000" // /* MW 1 */
+ 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9735 "00111101" // /* MW 3 */
+ 9736 "11111000" // /* MW 2 */
+ 9737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8 first
+.no_stack_arguments
+ 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */
+ 9739 "00000001" // /* MW 5 */
+ 9740 "00000000" // /* MW 4 */
+ 9741 "11000000" // /* MW 3 */
+ 9742 "00010010" // /* MW 2 */
+ 9743 "00000000" // /* MW 1 */
+.delay_slot
+ 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9745 "10011101" // /* MW 3 */
+ 9746 "11111111" // /* MW 2 */
+ 9747 "00001111" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+ 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9749 "11000000" // /* MW 3 */
+ 9750 "01100000" // /* MW 2 */
+ 9751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9757 "01100111" // /* MW 3 */
+ 9758 "00000001" // /* MW 2 */
+ 9759 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.return_address
+ 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9761 "00111001" // /* MW 3 */
+ 9762 "11111000" // /* MW 2 */
+ 9763 "00000111" // /* MW 1 */
+ 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9765 "00000000" // /* MW 1 */
+ 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9767 "00000000" // /* MW 1 */
+ 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9769 "00000000" // /* MW 1 */
+ 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9771 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9773 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9775 "10011001" // /* MW 3 */
+ 9776 "11111111" // /* MW 2 */
+ 9777 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9779 "00000000" // /* MW 3 */
+ 9780 "00101000" // /* MW 2 */
+ 9781 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9783 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9787 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9789 "00001001" // /* MW 3 */
+ 9790 "00100000" // /* MW 2 */
+ 9791 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.src_ref 8 "mul_impl.h" 193 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9793 "01110001" // /* MW 9 */
+ 9794 "00000000" // /* MW 8 */
+ 9795 "00000000" // /* MW 7 */
+ 9796 "00000000" // /* MW 6 */
+ 9797 "11111110" // /* MW 5 */
+ 9798 "00111111" // /* MW 4 */
+ 9799 "00110000" // /* MW 3 */
+ 9800 "11000010" // /* MW 2 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9801 "11101000" // /* MW 1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0
+.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.src_ref 3 "elementwise_binary_shared.h" 107 first
+.src_ref 3 "elementwise_binary_shared.h" 119 37
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.function_start
+ 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9809 "11000000" // /* MW 3 */
+ 9810 "00010110" // /* MW 2 */
+ 9811 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+ 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9813 "00000111" // /* MW 3 */
+ 9814 "01100000" // /* MW 2 */
+ 9815 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 122 22 first
+ 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9817 "01010010" // /* MW 3 */
+ 9818 "00011100" // /* MW 2 */
+ 9819 "00000011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 15 first
+ 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9821 "10010110" // /* MW 3 */
+ 9822 "00000100" // /* MW 2 */
+ 9823 "00000011" // /* MW 1 */
+ 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9825 "00000000" // /* MW 1 */
+ 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9827 "00000000" // /* MW 1 */
+ 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9829 "00000000" // /* MW 1 */
+ 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9831 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9833 "00001001" // /* MW 3 */
+ 9834 "00000110" // /* MW 2 */
+ 9835 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 107
+ 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9837 "00000001" // /* MW 5 */
+ 9838 "00000000" // /* MW 4 */
+ 9839 "00000000" // /* MW 3 */
+ 9840 "00010000" // /* MW 2 */
+ 9841 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9843 "01001100" // /* MW 3 */
+ 9844 "11000110" // /* MW 2 */
+ 9845 "00010000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25
+.src_ref 3 "elementwise_binary_shared.h" 124 8
+ 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */
+ 9847 "01100000" // /* MW 9 */
+ 9848 "00000000" // /* MW 8 */
+ 9849 "00010000" // /* MW 7 */
+ 9850 "11100010" // /* MW 6 */
+ 9851 "00000100" // /* MW 5 */
+ 9852 "00000110" // /* MW 4 */
+ 9853 "00000000" // /* MW 3 */
+ 9854 "00000001" // /* MW 2 */
+ 9855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25 first
+.delay_slot
+ 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9857 "01110010" // /* MW 3 */
+ 9858 "00000101" // /* MW 2 */
+ 9859 "00011000" // /* MW 1 */
+.delay_slot
+ 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9861 "11000000" // /* MW 3 */
+ 9862 "01011110" // /* MW 2 */
+ 9863 "00011000" // /* MW 1 */
+.delay_slot
+ 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9865 "11100000" // /* MW 3 */
+ 9866 "01100101" // /* MW 2 */
+ 9867 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9869 "10000001" // /* MW 5 */
+ 9870 "11011101" // /* MW 4 */
+ 9871 "00001010" // /* MW 3 */
+ 9872 "11110010" // /* MW 2 */
+ 9873 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+.delay_slot
+ 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9875 "00010011" // /* MW 3 */
+ 9876 "00000100" // /* MW 2 */
+ 9877 "00001111" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+ 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9879 "01110010" // /* MW 9 */
+ 9880 "10111001" // /* MW 8 */
+ 9881 "00000100" // /* MW 7 */
+ 9882 "00000000" // /* MW 6 */
+ 9883 "00001011" // /* MW 5 */
+ 9884 "10000000" // /* MW 4 */
+ 9885 "10000100" // /* MW 3 */
+ 9886 "10000010" // /* MW 2 */
+ 9887 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 126 34 first
+.src_ref 3 "elementwise_binary_shared.h" 131 19 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9889 "00000001" // /* MW 5 */
+ 9890 "00000001" // /* MW 4 */
+ 9891 "01010100" // /* MW 3 */
+ 9892 "00000001" // /* MW 2 */
+ 9893 "10000000" // /* MW 1 */
+ 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9895 "00000000" // /* MW 1 */
+ 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9897 "00000000" // /* MW 1 */
+ 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9899 "00000000" // /* MW 1 */
+ 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9901 "00000000" // /* MW 1 */
+ 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9903 "00000000" // /* MW 1 */
+ 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9905 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 131 12
+.src_ref 3 "elementwise_binary_shared.h" 131 35
+ 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */
+ 9907 "00000001" // /* MW 5 */
+ 9908 "01000000" // /* MW 4 */
+ 9909 "01110000" // /* MW 3 */
+ 9910 "00010011" // /* MW 2 */
+ 9911 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9913 "00000000" // /* MW 3 */
+ 9914 "00000000" // /* MW 2 */
+ 9915 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9917 "11010000" // /* MW 5 */
+ 9918 "11001000" // /* MW 4 */
+ 9919 "11001000" // /* MW 3 */
+ 9920 "00000111" // /* MW 2 */
+ 9921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9927 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */
+ 9929 "00100000" // /* MW 9 */
+ 9930 "00000000" // /* MW 8 */
+ 9931 "00000000" // /* MW 7 */
+ 9932 "11011110" // /* MW 6 */
+ 9933 "00000100" // /* MW 5 */
+ 9934 "00000000" // /* MW 4 */
+ 9935 "10000000" // /* MW 3 */
+ 9936 "00000100" // /* MW 2 */
+ 9937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9945 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9947 "00100110" // /* MW 5 */
+ 9948 "00001000" // /* MW 4 */
+ 9949 "11110000" // /* MW 3 */
+ 9950 "00101100" // /* MW 2 */
+ 9951 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9953 "10000000" // /* MW 3 */
+ 9954 "00000000" // /* MW 2 */
+ 9955 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9957 "01010000" // /* MW 11 */
+ 9958 "00000000" // /* MW 10 */
+ 9959 "00000000" // /* MW 9 */
+ 9960 "00000001" // /* MW 8 */
+ 9961 "00010011" // /* MW 7 */
+ 9962 "00000100" // /* MW 6 */
+ 9963 "00100001" // /* MW 5 */
+ 9964 "00000000" // /* MW 4 */
+ 9965 "11110000" // /* MW 3 */
+ 9966 "00101100" // /* MW 2 */
+ 9967 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160
+ 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */
+ 9969 "00000000" // /* MW 5 */
+ 9970 "00000000" // /* MW 4 */
+ 9971 "11001000" // /* MW 3 */
+ 9972 "00010011" // /* MW 2 */
+ 9973 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9975 "01110000" // /* MW 7 */
+ 9976 "01100000" // /* MW 6 */
+ 9977 "10110000" // /* MW 5 */
+ 9978 "00000011" // /* MW 4 */
+ 9979 "01100000" // /* MW 3 */
+ 9980 "10010001" // /* MW 2 */
+ 9981 "00010011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9983 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9985 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9987 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9989 "10000001" // /* MW 11 */
+ 9990 "10101101" // /* MW 10 */
+ 9991 "00000000" // /* MW 9 */
+ 9992 "00000000" // /* MW 8 */
+ 9993 "00000000" // /* MW 7 */
+ 9994 "00000000" // /* MW 6 */
+ 9995 "00100000" // /* MW 5 */
+ 9996 "00000000" // /* MW 4 */
+ 9997 "11110000" // /* MW 3 */
+ 9998 "00101100" // /* MW 2 */
+ 9999 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192
+.src_ref 3 "elementwise_binary_shared.h" 150 97
+ 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10001 "00001101" // /* MW 3 */
+ 10002 "00000100" // /* MW 2 */
+ 10003 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 97 first
+ 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10005 "01000111" // /* MW 3 */
+ 10006 "10000100" // /* MW 2 */
+ 10007 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */
+ 10009 "00000001" // /* MW 5 */
+ 10010 "01000000" // /* MW 4 */
+ 10011 "10100000" // /* MW 3 */
+ 10012 "00010011" // /* MW 2 */
+ 10013 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10015 "00000000" // /* MW 5 */
+ 10016 "00100000" // /* MW 4 */
+ 10017 "00000000" // /* MW 3 */
+ 10018 "10000000" // /* MW 2 */
+ 10019 "00111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10021 "11010000" // /* MW 5 */
+ 10022 "11001000" // /* MW 4 */
+ 10023 "11001000" // /* MW 3 */
+ 10024 "00000111" // /* MW 2 */
+ 10025 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10027 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10029 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10031 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10033 "00000000" // /* MW 15 */
+ 10034 "00000000" // /* MW 14 */
+ 10035 "00010000" // /* MW 13 */
+ 10036 "00000000" // /* MW 12 */
+ 10037 "00001000" // /* MW 11 */
+ 10038 "00000000" // /* MW 10 */
+ 10039 "11100000" // /* MW 9 */
+ 10040 "00101111" // /* MW 8 */
+ 10041 "01011011" // /* MW 7 */
+ 10042 "00000001" // /* MW 6 */
+ 10043 "00100000" // /* MW 5 */
+ 10044 "00000000" // /* MW 4 */
+ 10045 "11110000" // /* MW 3 */
+ 10046 "00101100" // /* MW 2 */
+ 10047 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10049 "01011000" // /* MW 9 */
+ 10050 "10111110" // /* MW 8 */
+ 10051 "01000111" // /* MW 7 */
+ 10052 "00000000" // /* MW 6 */
+ 10053 "11010010" // /* MW 5 */
+ 10054 "00000010" // /* MW 4 */
+ 10055 "01010000" // /* MW 3 */
+ 10056 "10000000" // /* MW 2 */
+ 10057 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10059 "10000000" // /* MW 3 */
+ 10060 "00000000" // /* MW 2 */
+ 10061 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10063 "00000000" // /* MW 3 */
+ 10064 "00000000" // /* MW 2 */
+ 10065 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10067 "10000000" // /* MW 3 */
+ 10068 "00000000" // /* MW 2 */
+ 10069 "00011010" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10071 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10073 "00010001" // /* MW 3 */
+ 10074 "00000000" // /* MW 2 */
+ 10075 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10077 "00100101" // /* MW 5 */
+ 10078 "00000001" // /* MW 4 */
+ 10079 "11100010" // /* MW 3 */
+ 10080 "00000010" // /* MW 2 */
+ 10081 "10100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10083 "10000000" // /* MW 3 */
+ 10084 "00111010" // /* MW 2 */
+ 10085 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10087 "10010110" // /* MW 3 */
+ 10088 "01000000" // /* MW 2 */
+ 10089 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10091 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10093 "00000001" // /* MW 3 */
+ 10094 "00000001" // /* MW 2 */
+ 10095 "00011000" // /* MW 1 */
+ 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10097 "00000000" // /* MW 1 */
+ 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10099 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10101 "00010010" // /* MW 3 */
+ 10102 "00000000" // /* MW 2 */
+ 10103 "00000101" // /* MW 1 */
+ 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10105 "00000000" // /* MW 1 */
+ 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10107 "00000000" // /* MW 1 */
+ 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10109 "00000000" // /* MW 1 */
+ 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10111 "00000000" // /* MW 1 */
+ 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10113 "00000000" // /* MW 1 */
+ 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10115 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10117 "01110010" // /* MW 3 */
+ 10118 "00000001" // /* MW 2 */
+ 10119 "00011000" // /* MW 1 */
+ 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10121 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10123 "01100110" // /* MW 5 */
+ 10124 "11111000" // /* MW 4 */
+ 10125 "11111111" // /* MW 3 */
+ 10126 "00101100" // /* MW 2 */
+ 10127 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 166 4 first
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+ 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10129 "00010000" // /* MW 11 */
+ 10130 "00000000" // /* MW 10 */
+ 10131 "01111100" // /* MW 9 */
+ 10132 "00001000" // /* MW 8 */
+ 10133 "00000000" // /* MW 7 */
+ 10134 "00000000" // /* MW 6 */
+ 10135 "11101000" // /* MW 5 */
+ 10136 "01010000" // /* MW 4 */
+ 10137 "11011110" // /* MW 3 */
+ 10138 "10001010" // /* MW 2 */
+ 10139 "01111000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10141 "00010000" // /* MW 11 */
+ 10142 "00011000" // /* MW 10 */
+ 10143 "10111100" // /* MW 9 */
+ 10144 "00001001" // /* MW 8 */
+ 10145 "00000000" // /* MW 7 */
+ 10146 "00000000" // /* MW 6 */
+ 10147 "01101000" // /* MW 5 */
+ 10148 "10010000" // /* MW 4 */
+ 10149 "00000010" // /* MW 3 */
+ 10150 "01100011" // /* MW 2 */
+ 10151 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 177 44
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10153 "11110001" // /* MW 7 */
+ 10154 "00000000" // /* MW 6 */
+ 10155 "11101000" // /* MW 5 */
+ 10156 "01010000" // /* MW 4 */
+ 10157 "01111110" // /* MW 3 */
+ 10158 "00000101" // /* MW 2 */
+ 10159 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10161 "01101000" // /* MW 5 */
+ 10162 "10010000" // /* MW 4 */
+ 10163 "01010010" // /* MW 3 */
+ 10164 "10010000" // /* MW 2 */
+ 10165 "10000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10167 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10169 "00101011" // /* MW 3 */
+ 10170 "00001000" // /* MW 2 */
+ 10171 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10175 "00111101" // /* MW 3 */
+ 10176 "10000100" // /* MW 2 */
+ 10177 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10179 "00000001" // /* MW 7 */
+ 10180 "00000010" // /* MW 6 */
+ 10181 "00000001" // /* MW 5 */
+ 10182 "10000110" // /* MW 4 */
+ 10183 "01111110" // /* MW 3 */
+ 10184 "01110001" // /* MW 2 */
+ 10185 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10187 "11101000" // /* MW 5 */
+ 10188 "01010000" // /* MW 4 */
+ 10189 "01111110" // /* MW 3 */
+ 10190 "00000011" // /* MW 2 */
+ 10191 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10193 "00000000" // /* MW 15 */
+ 10194 "00000000" // /* MW 14 */
+ 10195 "01111000" // /* MW 13 */
+ 10196 "10100101" // /* MW 12 */
+ 10197 "00000001" // /* MW 11 */
+ 10198 "00000000" // /* MW 10 */
+ 10199 "11010100" // /* MW 9 */
+ 10200 "00001001" // /* MW 8 */
+ 10201 "01011011" // /* MW 7 */
+ 10202 "00000001" // /* MW 6 */
+ 10203 "00100000" // /* MW 5 */
+ 10204 "00000000" // /* MW 4 */
+ 10205 "01110000" // /* MW 3 */
+ 10206 "00000101" // /* MW 2 */
+ 10207 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10209 "00000000" // /* MW 15 */
+ 10210 "00000000" // /* MW 14 */
+ 10211 "01111000" // /* MW 13 */
+ 10212 "10100101" // /* MW 12 */
+ 10213 "00000001" // /* MW 11 */
+ 10214 "00000000" // /* MW 10 */
+ 10215 "00000000" // /* MW 9 */
+ 10216 "00000000" // /* MW 8 */
+ 10217 "01011011" // /* MW 7 */
+ 10218 "00000001" // /* MW 6 */
+ 10219 "00100000" // /* MW 5 */
+ 10220 "00000000" // /* MW 4 */
+ 10221 "11110000" // /* MW 3 */
+ 10222 "00101100" // /* MW 2 */
+ 10223 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10225 "00010000" // /* MW 15 */
+ 10226 "00001000" // /* MW 14 */
+ 10227 "01111000" // /* MW 13 */
+ 10228 "10100101" // /* MW 12 */
+ 10229 "00000001" // /* MW 11 */
+ 10230 "00000000" // /* MW 10 */
+ 10231 "00000000" // /* MW 9 */
+ 10232 "00000000" // /* MW 8 */
+ 10233 "01011011" // /* MW 7 */
+ 10234 "00000001" // /* MW 6 */
+ 10235 "00100000" // /* MW 5 */
+ 10236 "00000000" // /* MW 4 */
+ 10237 "11110000" // /* MW 3 */
+ 10238 "00101100" // /* MW 2 */
+ 10239 "00000000" // /* MW 1 */
+.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10241 "00000000" // /* MW 15 */
+ 10242 "00000000" // /* MW 14 */
+ 10243 "01111000" // /* MW 13 */
+ 10244 "10100101" // /* MW 12 */
+ 10245 "00000001" // /* MW 11 */
+ 10246 "00000000" // /* MW 10 */
+ 10247 "00000000" // /* MW 9 */
+ 10248 "00000000" // /* MW 8 */
+ 10249 "01011011" // /* MW 7 */
+ 10250 "00000001" // /* MW 6 */
+ 10251 "11101000" // /* MW 5 */
+ 10252 "01010000" // /* MW 4 */
+ 10253 "01111110" // /* MW 3 */
+ 10254 "00000011" // /* MW 2 */
+ 10255 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10257 "00000000" // /* MW 15 */
+ 10258 "00000000" // /* MW 14 */
+ 10259 "01111000" // /* MW 13 */
+ 10260 "10100101" // /* MW 12 */
+ 10261 "00000001" // /* MW 11 */
+ 10262 "00000000" // /* MW 10 */
+ 10263 "00000000" // /* MW 9 */
+ 10264 "00000000" // /* MW 8 */
+ 10265 "10100011" // /* MW 7 */
+ 10266 "00011100" // /* MW 6 */
+ 10267 "00100010" // /* MW 5 */
+ 10268 "00000000" // /* MW 4 */
+ 10269 "01110000" // /* MW 3 */
+ 10270 "00000101" // /* MW 2 */
+ 10271 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10273 "00000000" // /* MW 15 */
+ 10274 "00000000" // /* MW 14 */
+ 10275 "01111000" // /* MW 13 */
+ 10276 "10100101" // /* MW 12 */
+ 10277 "00000001" // /* MW 11 */
+ 10278 "00000000" // /* MW 10 */
+ 10279 "00000000" // /* MW 9 */
+ 10280 "00000000" // /* MW 8 */
+ 10281 "01011011" // /* MW 7 */
+ 10282 "00000001" // /* MW 6 */
+ 10283 "00100000" // /* MW 5 */
+ 10284 "00000000" // /* MW 4 */
+ 10285 "11110000" // /* MW 3 */
+ 10286 "00101100" // /* MW 2 */
+ 10287 "00000000" // /* MW 1 */
+.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10289 "00010000" // /* MW 15 */
+ 10290 "00001000" // /* MW 14 */
+ 10291 "01111000" // /* MW 13 */
+ 10292 "10100101" // /* MW 12 */
+ 10293 "00000001" // /* MW 11 */
+ 10294 "00000000" // /* MW 10 */
+ 10295 "00000000" // /* MW 9 */
+ 10296 "00000000" // /* MW 8 */
+ 10297 "01011011" // /* MW 7 */
+ 10298 "00000001" // /* MW 6 */
+ 10299 "00100000" // /* MW 5 */
+ 10300 "00000000" // /* MW 4 */
+ 10301 "11110000" // /* MW 3 */
+ 10302 "00101100" // /* MW 2 */
+ 10303 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10305 "00000001" // /* MW 5 */
+ 10306 "00000000" // /* MW 4 */
+ 10307 "00000000" // /* MW 3 */
+ 10308 "11110000" // /* MW 2 */
+ 10309 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10311 "10100011" // /* MW 3 */
+ 10312 "00011100" // /* MW 2 */
+ 10313 "00001010" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10315 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10317 "00000001" // /* MW 3 */
+ 10318 "00000010" // /* MW 2 */
+ 10319 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10321 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10323 "00000000" // /* MW 3 */
+ 10324 "00101000" // /* MW 2 */
+ 10325 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10327 "10100011" // /* MW 3 */
+ 10328 "00011100" // /* MW 2 */
+ 10329 "00001010" // /* MW 1 */
+.delay_slot
+ 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10331 "10100000" // /* MW 3 */
+ 10332 "01100000" // /* MW 2 */
+ 10333 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10335 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.delay_slot
+ 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10337 "10100011" // /* MW 3 */
+ 10338 "00011100" // /* MW 2 */
+ 10339 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0
+ 10341 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 3 "elementwise_binary_shared.h" 237 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.function_start
+ 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10353 "01110010" // /* MW 9 */
+ 10354 "11110000" // /* MW 8 */
+ 10355 "01100000" // /* MW 7 */
+ 10356 "00000000" // /* MW 6 */
+ 10357 "10001011" // /* MW 5 */
+ 10358 "10001000" // /* MW 4 */
+ 10359 "10000011" // /* MW 3 */
+ 10360 "10000010" // /* MW 2 */
+ 10361 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19 first
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+ 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10363 "10000001" // /* MW 5 */
+ 10364 "11000101" // /* MW 4 */
+ 10365 "01010100" // /* MW 3 */
+ 10366 "00000001" // /* MW 2 */
+ 10367 "01000000" // /* MW 1 */
+ 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10369 "00000000" // /* MW 1 */
+ 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10371 "00000000" // /* MW 1 */
+ 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10373 "00000000" // /* MW 1 */
+ 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10375 "00000000" // /* MW 1 */
+ 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10377 "00000000" // /* MW 1 */
+ 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10379 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 244 12
+.src_ref 3 "elementwise_binary_shared.h" 244 35
+ 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */
+ 10381 "00000001" // /* MW 5 */
+ 10382 "00000000" // /* MW 4 */
+ 10383 "01101000" // /* MW 3 */
+ 10384 "00010100" // /* MW 2 */
+ 10385 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 237
+.delay_slot
+ 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10387 "00000001" // /* MW 5 */
+ 10388 "00000000" // /* MW 4 */
+ 10389 "00000000" // /* MW 3 */
+ 10390 "00001000" // /* MW 2 */
+ 10391 "00000000" // /* MW 1 */
+.delay_slot
+ 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10393 "11100000" // /* MW 3 */
+ 10394 "01010101" // /* MW 2 */
+ 10395 "00011000" // /* MW 1 */
+.delay_slot
+ 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10397 "11100000" // /* MW 3 */
+ 10398 "01100000" // /* MW 2 */
+ 10399 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+.delay_slot
+ 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10401 "00101011" // /* MW 3 */
+ 10402 "00000111" // /* MW 2 */
+ 10403 "00001001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10405 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 247 12 first
+.no_stack_arguments
+ 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10407 "00000001" // /* MW 5 */
+ 10408 "00000000" // /* MW 4 */
+ 10409 "00101000" // /* MW 3 */
+ 10410 "00010011" // /* MW 2 */
+ 10411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10421 "10000001" // /* MW 11 */
+ 10422 "10101101" // /* MW 10 */
+ 10423 "00000000" // /* MW 9 */
+ 10424 "00000000" // /* MW 8 */
+ 10425 "00000000" // /* MW 7 */
+ 10426 "00000000" // /* MW 6 */
+ 10427 "00100000" // /* MW 5 */
+ 10428 "00000000" // /* MW 4 */
+ 10429 "11110000" // /* MW 3 */
+ 10430 "00101100" // /* MW 2 */
+ 10431 "00000000" // /* MW 1 */
+.return_address
+ 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 10433 "00000000" // /* MW 5 */
+ 10434 "00000000" // /* MW 4 */
+ 10435 "01111000" // /* MW 3 */
+ 10436 "00010100" // /* MW 2 */
+ 10437 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10447 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96
+.src_ref 3 "elementwise_binary_shared.h" 245 12 first
+.no_stack_arguments
+ 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10449 "00000001" // /* MW 5 */
+ 10450 "00000000" // /* MW 4 */
+ 10451 "00101000" // /* MW 3 */
+ 10452 "00010011" // /* MW 2 */
+ 10453 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.delay_slot
+ 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10455 "01110000" // /* MW 7 */
+ 10456 "01100000" // /* MW 6 */
+ 10457 "10110000" // /* MW 5 */
+ 10458 "00000000" // /* MW 4 */
+ 10459 "01100000" // /* MW 3 */
+ 10460 "10010001" // /* MW 2 */
+ 10461 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10463 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10469 "10000001" // /* MW 11 */
+ 10470 "10101101" // /* MW 10 */
+ 10471 "00000000" // /* MW 9 */
+ 10472 "00000000" // /* MW 8 */
+ 10473 "00000000" // /* MW 7 */
+ 10474 "00000000" // /* MW 6 */
+ 10475 "00100000" // /* MW 5 */
+ 10476 "00000000" // /* MW 4 */
+ 10477 "11110000" // /* MW 3 */
+ 10478 "00101100" // /* MW 2 */
+ 10479 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.return_address
+ 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10481 "10000000" // /* MW 3 */
+ 10482 "01110001" // /* MW 2 */
+ 10483 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4 first
+ 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10485 "00000000" // /* MW 3 */
+ 10486 "00101000" // /* MW 2 */
+ 10487 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.delay_slot
+ 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10489 "00000001" // /* MW 5 */
+ 10490 "00000000" // /* MW 4 */
+ 10491 "00000000" // /* MW 3 */
+ 10492 "11111000" // /* MW 2 */
+ 10493 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10495 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0
+ 10501 "00000000" // /* MW 1 */
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 205 first
+.src_ref 7 "superkernels.cpp" 210 6
+.function_start
+ 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10513 "10000000" // /* MW 5 */
+ 10514 "11001000" // /* MW 4 */
+ 10515 "11000110" // /* MW 3 */
+ 10516 "00000111" // /* MW 2 */
+ 10517 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6 first
+ 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10519 "11000001" // /* MW 5 */
+ 10520 "10110101" // /* MW 4 */
+ 10521 "11011000" // /* MW 3 */
+ 10522 "11000010" // /* MW 2 */
+ 10523 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 205
+ 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10525 "00000001" // /* MW 5 */
+ 10526 "00000000" // /* MW 4 */
+ 10527 "00000000" // /* MW 3 */
+ 10528 "00001000" // /* MW 2 */
+ 10529 "00000000" // /* MW 1 */
+ 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10531 "01110000" // /* MW 7 */
+ 10532 "11010000" // /* MW 6 */
+ 10533 "00001011" // /* MW 5 */
+ 10534 "00000000" // /* MW 4 */
+ 10535 "10110000" // /* MW 3 */
+ 10536 "01100011" // /* MW 2 */
+ 10537 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+ 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10539 "00010001" // /* MW 9 */
+ 10540 "00101000" // /* MW 8 */
+ 10541 "00110010" // /* MW 7 */
+ 10542 "11110011" // /* MW 6 */
+ 10543 "00000001" // /* MW 5 */
+ 10544 "00000000" // /* MW 4 */
+ 10545 "10110000" // /* MW 3 */
+ 10546 "10000010" // /* MW 2 */
+ 10547 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10549 "11000000" // /* MW 3 */
+ 10550 "11010100" // /* MW 2 */
+ 10551 "00011011" // /* MW 1 */
+ 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10553 "00000000" // /* MW 1 */
+ 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6
+.src_ref 7 "superkernels.cpp" 210 16
+ 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */
+ 10557 "00000001" // /* MW 5 */
+ 10558 "01000000" // /* MW 4 */
+ 10559 "11110000" // /* MW 3 */
+ 10560 "00010100" // /* MW 2 */
+ 10561 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 22 first
+.delay_slot
+ 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10563 "10010000" // /* MW 3 */
+ 10564 "01100010" // /* MW 2 */
+ 10565 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 30
+.delay_slot
+ 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10567 "11111011" // /* MW 3 */
+ 10568 "01100011" // /* MW 2 */
+ 10569 "00010100" // /* MW 1 */
+.delay_slot
+ 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10571 "00111101" // /* MW 3 */
+ 10572 "11110100" // /* MW 2 */
+ 10573 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10575 "01110000" // /* MW 7 */
+ 10576 "01100000" // /* MW 6 */
+ 10577 "00110000" // /* MW 5 */
+ 10578 "00000011" // /* MW 4 */
+ 10579 "00110000" // /* MW 3 */
+ 10580 "11000110" // /* MW 2 */
+ 10581 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4
+.src_ref 7 "superkernels.cpp" 224 2
+.delay_slot
+ 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10583 "00000000" // /* MW 5 */
+ 10584 "11001010" // /* MW 4 */
+ 10585 "11000000" // /* MW 3 */
+ 10586 "00000111" // /* MW 2 */
+ 10587 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10589 "11010000" // /* MW 5 */
+ 10590 "11001000" // /* MW 4 */
+ 10591 "11000100" // /* MW 3 */
+ 10592 "00000111" // /* MW 2 */
+ 10593 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10595 "00010000" // /* MW 9 */
+ 10596 "00110010" // /* MW 8 */
+ 10597 "00110010" // /* MW 7 */
+ 10598 "11110001" // /* MW 6 */
+ 10599 "00000001" // /* MW 5 */
+ 10600 "00000000" // /* MW 4 */
+ 10601 "11100000" // /* MW 3 */
+ 10602 "11000000" // /* MW 2 */
+ 10603 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10605 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */
+ 10607 "00000001" // /* MW 5 */
+ 10608 "00000000" // /* MW 4 */
+ 10609 "00000000" // /* MW 3 */
+ 10610 "00010011" // /* MW 2 */
+ 10611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10615 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10617 "00110001" // /* MW 3 */
+ 10618 "00100000" // /* MW 2 */
+ 10619 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10621 "00000101" // /* MW 3 */
+ 10622 "00100000" // /* MW 2 */
+ 10623 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10625 "00000000" // /* MW 15 */
+ 10626 "00000000" // /* MW 14 */
+ 10627 "01111000" // /* MW 13 */
+ 10628 "10100101" // /* MW 12 */
+ 10629 "00000001" // /* MW 11 */
+ 10630 "00000000" // /* MW 10 */
+ 10631 "00000000" // /* MW 9 */
+ 10632 "10000000" // /* MW 8 */
+ 10633 "00010001" // /* MW 7 */
+ 10634 "00000110" // /* MW 6 */
+ 10635 "00100010" // /* MW 5 */
+ 10636 "00000000" // /* MW 4 */
+ 10637 "11110000" // /* MW 3 */
+ 10638 "00101100" // /* MW 2 */
+ 10639 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18
+.return_address
+ 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10641 "10100000" // /* MW 5 */
+ 10642 "11001000" // /* MW 4 */
+ 10643 "11000100" // /* MW 3 */
+ 10644 "00000111" // /* MW 2 */
+ 10645 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18 first
+.src_ref 7 "superkernels.cpp" 217 65
+ 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10647 "00010000" // /* MW 9 */
+ 10648 "10000000" // /* MW 8 */
+ 10649 "00110010" // /* MW 7 */
+ 10650 "11110001" // /* MW 6 */
+ 10651 "00000001" // /* MW 5 */
+ 10652 "00000000" // /* MW 4 */
+ 10653 "11010000" // /* MW 3 */
+ 10654 "11000010" // /* MW 2 */
+ 10655 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51
+.src_ref 7 "superkernels.cpp" 217 65
+.src_ref 7 "superkernels.cpp" 224 2
+ 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10657 "00010000" // /* MW 9 */
+ 10658 "10000000" // /* MW 8 */
+ 10659 "00110010" // /* MW 7 */
+ 10660 "11110001" // /* MW 6 */
+ 10661 "00000001" // /* MW 5 */
+ 10662 "00000000" // /* MW 4 */
+ 10663 "11010000" // /* MW 3 */
+ 10664 "11000110" // /* MW 2 */
+ 10665 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51 first
+.src_ref 7 "superkernels.cpp" 217 16
+.src_ref 7 "superkernels.cpp" 222 47
+ 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10667 "00010000" // /* MW 9 */
+ 10668 "00101010" // /* MW 8 */
+ 10669 "10110010" // /* MW 7 */
+ 10670 "11110000" // /* MW 6 */
+ 10671 "00000001" // /* MW 5 */
+ 10672 "00000000" // /* MW 4 */
+ 10673 "01010000" // /* MW 3 */
+ 10674 "11001011" // /* MW 2 */
+ 10675 "01001010" // /* MW 1 */
+ 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10677 "00000000" // /* MW 1 */
+ 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10679 "00000000" // /* MW 1 */
+ 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */
+ 10681 "00000000" // /* MW 5 */
+ 10682 "00000000" // /* MW 4 */
+ 10683 "11111000" // /* MW 3 */
+ 10684 "00010100" // /* MW 2 */
+ 10685 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13
+.delay_slot
+ 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10687 "11000000" // /* MW 5 */
+ 10688 "11001000" // /* MW 4 */
+ 10689 "11000000" // /* MW 3 */
+ 10690 "00000111" // /* MW 2 */
+ 10691 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10693 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 27 first
+.delay_slot
+ 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10695 "00001111" // /* MW 3 */
+ 10696 "01100001" // /* MW 2 */
+ 10697 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13 first
+.delay_slot
+ 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10699 "10100011" // /* MW 5 */
+ 10700 "00001100" // /* MW 4 */
+ 10701 "11110000" // /* MW 3 */
+ 10702 "00101100" // /* MW 2 */
+ 10703 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 16 first
+.delay_slot
+ 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10705 "00000000" // /* MW 15 */
+ 10706 "00000000" // /* MW 14 */
+ 10707 "01111000" // /* MW 13 */
+ 10708 "10100101" // /* MW 12 */
+ 10709 "00000001" // /* MW 11 */
+ 10710 "00000000" // /* MW 10 */
+ 10711 "00000000" // /* MW 9 */
+ 10712 "10000000" // /* MW 8 */
+ 10713 "00010001" // /* MW 7 */
+ 10714 "00000110" // /* MW 6 */
+ 10715 "00100001" // /* MW 5 */
+ 10716 "00000000" // /* MW 4 */
+ 10717 "11110000" // /* MW 3 */
+ 10718 "00101100" // /* MW 2 */
+ 10719 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 222 47
+.src_ref 7 "superkernels.cpp" 224 2
+ 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10721 "00000000" // /* MW 15 */
+ 10722 "00000000" // /* MW 14 */
+ 10723 "00010000" // /* MW 13 */
+ 10724 "00101010" // /* MW 12 */
+ 10725 "10110010" // /* MW 11 */
+ 10726 "11110000" // /* MW 10 */
+ 10727 "00000001" // /* MW 9 */
+ 10728 "00000000" // /* MW 8 */
+ 10729 "10001011" // /* MW 7 */
+ 10730 "10000000" // /* MW 6 */
+ 10731 "00100010" // /* MW 5 */
+ 10732 "00000000" // /* MW 4 */
+ 10733 "11110000" // /* MW 3 */
+ 10734 "00101100" // /* MW 2 */
+ 10735 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10737 "00000000" // /* MW 7 */
+ 10738 "11000011" // /* MW 6 */
+ 10739 "10110011" // /* MW 5 */
+ 10740 "00000011" // /* MW 4 */
+ 10741 "01100000" // /* MW 3 */
+ 10742 "10010001" // /* MW 2 */
+ 10743 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10745 "00010000" // /* MW 9 */
+ 10746 "00100000" // /* MW 8 */
+ 10747 "00110010" // /* MW 7 */
+ 10748 "11110000" // /* MW 6 */
+ 10749 "00000001" // /* MW 5 */
+ 10750 "00000000" // /* MW 4 */
+ 10751 "11010000" // /* MW 3 */
+ 10752 "11101110" // /* MW 2 */
+ 10753 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10755 "00010110" // /* MW 3 */
+ 10756 "11111110" // /* MW 2 */
+ 10757 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10759 "00110110" // /* MW 3 */
+ 10760 "11111110" // /* MW 2 */
+ 10761 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10763 "01010110" // /* MW 3 */
+ 10764 "01000110" // /* MW 2 */
+ 10765 "00000111" // /* MW 1 */
+ 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10767 "00000000" // /* MW 1 */
+ 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10769 "00000000" // /* MW 1 */
+ 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10771 "00000000" // /* MW 1 */
+ 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10773 "00000000" // /* MW 1 */
+ 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10775 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10777 "00000010" // /* MW 3 */
+ 10778 "01100001" // /* MW 2 */
+ 10779 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10781 "00010001" // /* MW 3 */
+ 10782 "00000110" // /* MW 2 */
+ 10783 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10785 "11111101" // /* MW 3 */
+ 10786 "11100000" // /* MW 2 */
+ 10787 "00010111" // /* MW 1 */
+ 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10789 "00000000" // /* MW 1 */
+ 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10791 "00000000" // /* MW 1 */
+ 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10793 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10795 "00001000" // /* MW 3 */
+ 10796 "10010011" // /* MW 2 */
+ 10797 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+ 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10799 "10000001" // /* MW 5 */
+ 10800 "10101101" // /* MW 4 */
+ 10801 "10100111" // /* MW 3 */
+ 10802 "00000000" // /* MW 2 */
+ 10803 "00000100" // /* MW 1 */
+ 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10805 "00000000" // /* MW 1 */
+ 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10807 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+ 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10809 "00110110" // /* MW 3 */
+ 10810 "00000110" // /* MW 2 */
+ 10811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10813 "10000001" // /* MW 5 */
+ 10814 "11011101" // /* MW 4 */
+ 10815 "11011100" // /* MW 3 */
+ 10816 "11001010" // /* MW 2 */
+ 10817 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 47 first
+ 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10819 "01110110" // /* MW 3 */
+ 10820 "00000110" // /* MW 2 */
+ 10821 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10823 "10011110" // /* MW 3 */
+ 10824 "01011100" // /* MW 2 */
+ 10825 "00000111" // /* MW 1 */
+ 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10827 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 224 2 first
+.no_stack_arguments
+ 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */
+ 10829 "00000001" // /* MW 5 */
+ 10830 "00000000" // /* MW 4 */
+ 10831 "00111000" // /* MW 3 */
+ 10832 "00010100" // /* MW 2 */
+ 10833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10835 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+.delay_slot
+ 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10837 "00000111" // /* MW 3 */
+ 10838 "01100010" // /* MW 2 */
+ 10839 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.delay_slot
+ 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10841 "00110001" // /* MW 3 */
+ 10842 "00000110" // /* MW 2 */
+ 10843 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45 first
+.delay_slot
+ 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10845 "00001101" // /* MW 3 */
+ 10846 "11100001" // /* MW 2 */
+ 10847 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+.delay_slot
+ 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10849 "00000000" // /* MW 15 */
+ 10850 "00000000" // /* MW 14 */
+ 10851 "10101000" // /* MW 13 */
+ 10852 "10100000" // /* MW 12 */
+ 10853 "00110100" // /* MW 11 */
+ 10854 "00000000" // /* MW 10 */
+ 10855 "00000000" // /* MW 9 */
+ 10856 "00000000" // /* MW 8 */
+ 10857 "01011011" // /* MW 7 */
+ 10858 "00000001" // /* MW 6 */
+ 10859 "00100000" // /* MW 5 */
+ 10860 "00000000" // /* MW 4 */
+ 10861 "11110000" // /* MW 3 */
+ 10862 "00101100" // /* MW 2 */
+ 10863 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+.src_ref 7 "superkernels.cpp" 227 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10865 "00010000" // /* MW 9 */
+ 10866 "00100000" // /* MW 8 */
+ 10867 "00110010" // /* MW 7 */
+ 10868 "11110011" // /* MW 6 */
+ 10869 "00000001" // /* MW 5 */
+ 10870 "00000000" // /* MW 4 */
+ 10871 "11010000" // /* MW 3 */
+ 10872 "11000110" // /* MW 2 */
+ 10873 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10875 "00000101" // /* MW 3 */
+ 10876 "00100000" // /* MW 2 */
+ 10877 "00010000" // /* MW 1 */
+ 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10879 "00000000" // /* MW 1 */
+ 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10881 "00000000" // /* MW 1 */
+ 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10883 "00000000" // /* MW 1 */
+ 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10885 "00000000" // /* MW 1 */
+ 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10887 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10889 "00001000" // /* MW 3 */
+ 10890 "01010001" // /* MW 2 */
+ 10891 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10893 "00010000" // /* MW 9 */
+ 10894 "00110000" // /* MW 8 */
+ 10895 "00110010" // /* MW 7 */
+ 10896 "11110001" // /* MW 6 */
+ 10897 "00000001" // /* MW 5 */
+ 10898 "00000000" // /* MW 4 */
+ 10899 "11010000" // /* MW 3 */
+ 10900 "11001110" // /* MW 2 */
+ 10901 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6 first
+ 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10903 "00110110" // /* MW 3 */
+ 10904 "00000110" // /* MW 2 */
+ 10905 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+ 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10907 "01010110" // /* MW 3 */
+ 10908 "00000110" // /* MW 2 */
+ 10909 "00000010" // /* MW 1 */
+ 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10911 "00000000" // /* MW 1 */
+ 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10913 "00000000" // /* MW 1 */
+ 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10915 "00000000" // /* MW 1 */
+ 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10917 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10919 "00110001" // /* MW 3 */
+ 10920 "00100001" // /* MW 2 */
+ 10921 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10923 "00010001" // /* MW 3 */
+ 10924 "11100110" // /* MW 2 */
+ 10925 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 16 first
+ 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10927 "00101000" // /* MW 3 */
+ 10928 "01100001" // /* MW 2 */
+ 10929 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+ 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */
+ 10931 "00000001" // /* MW 5 */
+ 10932 "01000000" // /* MW 4 */
+ 10933 "01101000" // /* MW 3 */
+ 10934 "00010101" // /* MW 2 */
+ 10935 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14
+ 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10947 "00000001" // /* MW 3 */
+ 10948 "00100000" // /* MW 2 */
+ 10949 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14 first
+ 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10951 "00000000" // /* MW 9 */
+ 10952 "00000000" // /* MW 8 */
+ 10953 "00000000" // /* MW 7 */
+ 10954 "10000000" // /* MW 6 */
+ 10955 "00010001" // /* MW 5 */
+ 10956 "00000110" // /* MW 4 */
+ 10957 "11110110" // /* MW 3 */
+ 10958 "00101100" // /* MW 2 */
+ 10959 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 229
+ 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10961 "00111001" // /* MW 3 */
+ 10962 "11110100" // /* MW 2 */
+ 10963 "00000111" // /* MW 1 */
+ 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10965 "00011001" // /* MW 3 */
+ 10966 "11111011" // /* MW 2 */
+ 10967 "00000111" // /* MW 1 */
+ 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10969 "00000000" // /* MW 1 */
+ 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10971 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10973 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10975 "11110001" // /* MW 3 */
+ 10976 "11111101" // /* MW 2 */
+ 10977 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10979 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10981 "00000000" // /* MW 3 */
+ 10982 "00101000" // /* MW 2 */
+ 10983 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10985 "10100000" // /* MW 3 */
+ 10986 "01100111" // /* MW 2 */
+ 10987 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229
+.delay_slot
+ 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10989 "00000001" // /* MW 5 */
+ 10990 "00000000" // /* MW 4 */
+ 10991 "00000000" // /* MW 3 */
+ 10992 "11111000" // /* MW 2 */
+ 10993 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10995 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10997 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 10999 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 205 first
+.src_ref 3 "elementwise_binary_shared.h" 211 24 first
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.function_start
+ 11008 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11009 "01011000" // /* MW 9 */
+ 11010 "00000000" // /* MW 8 */
+ 11011 "00001000" // /* MW 7 */
+ 11012 "00001011" // /* MW 6 */
+ 11013 "00100000" // /* MW 5 */
+ 11014 "00001000" // /* MW 4 */
+ 11015 "11010000" // /* MW 3 */
+ 11016 "10000101" // /* MW 2 */
+ 11017 "00100011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+ 11018 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11019 "00000001" // /* MW 3 */
+ 11020 "10000000" // /* MW 2 */
+ 11021 "00010111" // /* MW 1 */
+ 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11023 "00000000" // /* MW 1 */
+ 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11025 "00000000" // /* MW 1 */
+ 11026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11027 "00000000" // /* MW 1 */
+ 11028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11029 "00000000" // /* MW 1 */
+ 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11031 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 211 22 first
+ 11032 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11033 "00101001" // /* MW 3 */
+ 11034 "00011100" // /* MW 2 */
+ 11035 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 24 first
+ 11036 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11037 "00101110" // /* MW 3 */
+ 11038 "00011100" // /* MW 2 */
+ 11039 "00000001" // /* MW 1 */
+ 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11041 "00000000" // /* MW 1 */
+ 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11043 "00000000" // /* MW 1 */
+ 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11045 "00000000" // /* MW 1 */
+ 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11047 "00000000" // /* MW 1 */
+ 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11049 "00000000" // /* MW 1 */
+ 11050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11051 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 22
+ 11052 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11053 "00101001" // /* MW 3 */
+ 11054 "00011100" // /* MW 2 */
+ 11055 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 24 first
+ 11056 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11057 "00101110" // /* MW 3 */
+ 11058 "00000100" // /* MW 2 */
+ 11059 "00000001" // /* MW 1 */
+ 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11061 "00000000" // /* MW 1 */
+ 11062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11063 "00000000" // /* MW 1 */
+ 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11065 "00000000" // /* MW 1 */
+ 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11067 "00000000" // /* MW 1 */
+ 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11069 "00000000" // /* MW 1 */
+ 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11071 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 22
+ 11072 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11073 "00101001" // /* MW 3 */
+ 11074 "00011100" // /* MW 2 */
+ 11075 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 24 first
+ 11076 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11077 "01110110" // /* MW 3 */
+ 11078 "00010100" // /* MW 2 */
+ 11079 "00000001" // /* MW 1 */
+ 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11081 "00000000" // /* MW 1 */
+ 11082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11083 "00000000" // /* MW 1 */
+ 11084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11085 "00000000" // /* MW 1 */
+ 11086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11087 "00000000" // /* MW 1 */
+ 11088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11089 "00000000" // /* MW 1 */
+ 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11091 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 22
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11092 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11093 "01110001" // /* MW 3 */
+ 11094 "01001100" // /* MW 2 */
+ 11095 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 34 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11096 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11097 "00010111" // /* MW 3 */
+ 11098 "00000100" // /* MW 2 */
+ 11099 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 217 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11100 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11101 "00000000" // /* MW 3 */
+ 11102 "00101000" // /* MW 2 */
+ 11103 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11104 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11105 "00000000" // /* MW 5 */
+ 11106 "10111110" // /* MW 4 */
+ 11107 "11110000" // /* MW 3 */
+ 11108 "00000000" // /* MW 2 */
+ 11109 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11110 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11111 "00010100" // /* MW 3 */
+ 11112 "11000010" // /* MW 2 */
+ 11113 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11114 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11115 "00100111" // /* MW 3 */
+ 11116 "01110110" // /* MW 2 */
+ 11117 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11118 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11119 "10000010" // /* MW 3 */
+ 11120 "00000001" // /* MW 2 */
+ 11121 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 11123 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 219
+.src_ref 3 "elementwise_binary_shared.h" 219 first
+.function_start
+ 11136 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11137 "00000001" // /* MW 5 */
+ 11138 "00000000" // /* MW 4 */
+ 11139 "00000000" // /* MW 3 */
+ 11140 "00001000" // /* MW 2 */
+ 11141 "00000000" // /* MW 1 */
+ 11142 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11143 "00111101" // /* MW 3 */
+ 11144 "11111000" // /* MW 2 */
+ 11145 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8 first
+.no_stack_arguments
+ 11146 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */
+ 11147 "00000001" // /* MW 5 */
+ 11148 "00000000" // /* MW 4 */
+ 11149 "10000000" // /* MW 3 */
+ 11150 "00010101" // /* MW 2 */
+ 11151 "00000000" // /* MW 1 */
+.delay_slot
+ 11152 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11153 "10100000" // /* MW 3 */
+ 11154 "00010111" // /* MW 2 */
+ 11155 "00011000" // /* MW 1 */
+.delay_slot
+ 11156 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11157 "00010101" // /* MW 3 */
+ 11158 "11111100" // /* MW 2 */
+ 11159 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8
+.delay_slot
+ 11160 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11161 "11000000" // /* MW 3 */
+ 11162 "11010000" // /* MW 2 */
+ 11163 "00011011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11165 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11167 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.src_ref 8 "add_impl.h" 146 29
+.return_address
+ 11168 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11169 "00001000" // /* MW 9 */
+ 11170 "11000100" // /* MW 8 */
+ 11171 "00110011" // /* MW 7 */
+ 11172 "01101000" // /* MW 6 */
+ 11173 "00000000" // /* MW 5 */
+ 11174 "00000001" // /* MW 4 */
+ 11175 "00100000" // /* MW 3 */
+ 11176 "00000111" // /* MW 2 */
+ 11177 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29
+.src_ref 8 "add_impl.h" 147 37
+.src_ref 8 "add_impl.h" 147 39
+ 11178 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11179 "01011000" // /* MW 9 */
+ 11180 "11111101" // /* MW 8 */
+ 11181 "00000111" // /* MW 7 */
+ 11182 "00001000" // /* MW 6 */
+ 11183 "10000000" // /* MW 5 */
+ 11184 "00000001" // /* MW 4 */
+ 11185 "10000000" // /* MW 3 */
+ 11186 "11100010" // /* MW 2 */
+ 11187 "00000001" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29 first
+.src_ref 8 "add_impl.h" 147 39
+ 11188 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11189 "00000001" // /* MW 9 */
+ 11190 "10100000" // /* MW 8 */
+ 11191 "00000111" // /* MW 7 */
+ 11192 "10000000" // /* MW 6 */
+ 11193 "00010001" // /* MW 5 */
+ 11194 "00001010" // /* MW 4 */
+ 11195 "00100000" // /* MW 3 */
+ 11196 "10111110" // /* MW 2 */
+ 11197 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 50 first
+ 11198 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11199 "01001010" // /* MW 3 */
+ 11200 "00000110" // /* MW 2 */
+ 11201 "00000000" // /* MW 1 */
+ 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11203 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11205 "00000000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 37
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11206 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11207 "00010111" // /* MW 3 */
+ 11208 "00000010" // /* MW 2 */
+ 11209 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11210 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11211 "00000000" // /* MW 3 */
+ 11212 "00101000" // /* MW 2 */
+ 11213 "00010000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11214 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11215 "00000101" // /* MW 3 */
+ 11216 "00100010" // /* MW 2 */
+ 11217 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11218 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11219 "00000001" // /* MW 5 */
+ 11220 "00000000" // /* MW 4 */
+ 11221 "00000000" // /* MW 3 */
+ 11222 "11111000" // /* MW 2 */
+ 11223 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11224 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11225 "00100111" // /* MW 3 */
+ 11226 "01110111" // /* MW 2 */
+ 11227 "00010100" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 39
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11228 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11229 "10000010" // /* MW 3 */
+ 11230 "00100001" // /* MW 2 */
+ 11231 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 11233 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_shared.h" 227 first
+.src_ref 3 "elementwise_binary_shared.h" 232 8 first
+.tail_call
+.function_start
+ 11248 "10000100" // J #9808 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 11249 "00000000" // /* MW 5 */
+ 11250 "00000000" // /* MW 4 */
+ 11251 "00101000" // /* MW 3 */
+ 11252 "00010011" // /* MW 2 */
+ 11253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11255 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11257 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11259 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11261 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 11263 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 11264 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11265 "00000001" // /* MW 5 */
+ 11266 "00100001" // /* MW 4 */
+ 11267 "00000000" // /* MW 3 */
+ 11268 "00000000" // /* MW 2 */
+ 11269 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11270 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11271 "11000000" // /* MW 3 */
+ 11272 "01010000" // /* MW 2 */
+ 11273 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11274 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11275 "10010000" // /* MW 3 */
+ 11276 "01100000" // /* MW 2 */
+ 11277 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 11278 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11279 "00010001" // /* MW 3 */
+ 11280 "00000100" // /* MW 2 */
+ 11281 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 11282 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11283 "00010001" // /* MW 3 */
+ 11284 "00010100" // /* MW 2 */
+ 11285 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0
+ 11287 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 11296 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11297 "00101110" // /* MW 3 */
+ 11298 "00011100" // /* MW 2 */
+ 11299 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 11300 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11301 "00000001" // /* MW 5 */
+ 11302 "00000000" // /* MW 4 */
+ 11303 "00000000" // /* MW 3 */
+ 11304 "00001000" // /* MW 2 */
+ 11305 "00000000" // /* MW 1 */
+ 11306 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11307 "00111101" // /* MW 3 */
+ 11308 "11111100" // /* MW 2 */
+ 11309 "00001111" // /* MW 1 */
+ 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11311 "00000000" // /* MW 1 */
+ 11312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11313 "00000000" // /* MW 1 */
+ 11314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11315 "00000000" // /* MW 1 */
+ 11316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11317 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 11318 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11319 "00101001" // /* MW 3 */
+ 11320 "00011100" // /* MW 2 */
+ 11321 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 11322 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11323 "00101110" // /* MW 3 */
+ 11324 "00011100" // /* MW 2 */
+ 11325 "00000001" // /* MW 1 */
+ 11326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11327 "00000000" // /* MW 1 */
+ 11328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11329 "00000000" // /* MW 1 */
+ 11330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11331 "00000000" // /* MW 1 */
+ 11332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11333 "00000000" // /* MW 1 */
+ 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11335 "00000000" // /* MW 1 */
+ 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11337 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 11338 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11339 "00101001" // /* MW 3 */
+ 11340 "00011100" // /* MW 2 */
+ 11341 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 11342 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11343 "00101110" // /* MW 3 */
+ 11344 "00000100" // /* MW 2 */
+ 11345 "00000001" // /* MW 1 */
+ 11346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11347 "00000000" // /* MW 1 */
+ 11348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11349 "00000000" // /* MW 1 */
+ 11350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11351 "00000000" // /* MW 1 */
+ 11352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11353 "00000000" // /* MW 1 */
+ 11354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11355 "00000000" // /* MW 1 */
+ 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11357 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 11358 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11359 "00101001" // /* MW 3 */
+ 11360 "00011100" // /* MW 2 */
+ 11361 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 11362 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11363 "00101110" // /* MW 3 */
+ 11364 "00010100" // /* MW 2 */
+ 11365 "00000001" // /* MW 1 */
+ 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11367 "00000000" // /* MW 1 */
+ 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11369 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 11370 "00000100" // JL #11264 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11264 delay_slots=5 */
+ 11371 "00000001" // /* MW 5 */
+ 11372 "00000000" // /* MW 4 */
+ 11373 "00000000" // /* MW 3 */
+ 11374 "00010110" // /* MW 2 */
+ 11375 "00000000" // /* MW 1 */
+.delay_slot
+ 11376 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11377 "10011101" // /* MW 3 */
+ 11378 "11111011" // /* MW 2 */
+ 11379 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11383 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 11384 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11385 "00101001" // /* MW 3 */
+ 11386 "11011100" // /* MW 2 */
+ 11387 "00001000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+ 11388 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11389 "11000000" // /* MW 3 */
+ 11390 "01100000" // /* MW 2 */
+ 11391 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.return_address
+ 11392 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11393 "00111001" // /* MW 3 */
+ 11394 "11111100" // /* MW 2 */
+ 11395 "00000111" // /* MW 1 */
+ 11396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11397 "00000000" // /* MW 1 */
+ 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11399 "00000000" // /* MW 1 */
+ 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11401 "00000000" // /* MW 1 */
+ 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11403 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11405 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11406 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11407 "10011001" // /* MW 3 */
+ 11408 "11111011" // /* MW 2 */
+ 11409 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11410 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11411 "00000000" // /* MW 3 */
+ 11412 "00101000" // /* MW 2 */
+ 11413 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11419 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11420 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11421 "00000001" // /* MW 3 */
+ 11422 "00100000" // /* MW 2 */
+ 11423 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "mul_impl.h" 134 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11424 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11425 "01110001" // /* MW 9 */
+ 11426 "00000000" // /* MW 8 */
+ 11427 "00000000" // /* MW 7 */
+ 11428 "00000000" // /* MW 6 */
+ 11429 "11111110" // /* MW 5 */
+ 11430 "00111111" // /* MW 4 */
+ 11431 "00110000" // /* MW 3 */
+ 11432 "11000010" // /* MW 2 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 11433 "11101000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 149 first
+.src_ref 3 "elementwise_binary.h" 156 37
+.src_ref 3 "elementwise_binary.h" 168 8 first
+.function_start
+ 11440 "10111010" // MOVA m0, #32; MOVXM ls, #11616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11441 "00010000" // /* MW 9 */
+ 11442 "10110000" // /* MW 8 */
+ 11443 "01111110" // /* MW 7 */
+ 11444 "00001000" // /* MW 6 */
+ 11445 "00000000" // /* MW 5 */
+ 11446 "00000000" // /* MW 4 */
+ 11447 "10000000" // /* MW 3 */
+ 11448 "00000000" // /* MW 2 */
+ 11449 "00000100" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 37 first
+.src_ref 3 "elementwise_binary.h" 168 8 first
+ 11450 "10111010" // LDA r3, [p3], m0; MOVXM le, #11632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11451 "00010000" // /* MW 9 */
+ 11452 "10111000" // /* MW 8 */
+ 11453 "10111110" // /* MW 7 */
+ 11454 "00001001" // /* MW 6 */
+ 11455 "00000000" // /* MW 5 */
+ 11456 "00000000" // /* MW 4 */
+ 11457 "11010000" // /* MW 3 */
+ 11458 "00001110" // /* MW 2 */
+ 11459 "01100001" // /* MW 1 */
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11460 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11461 "01011000" // /* MW 9 */
+ 11462 "00111100" // /* MW 8 */
+ 11463 "00001011" // /* MW 7 */
+ 11464 "01001000" // /* MW 6 */
+ 11465 "00010111" // /* MW 5 */
+ 11466 "00111110" // /* MW 4 */
+ 11467 "11010000" // /* MW 3 */
+ 11468 "10010000" // /* MW 2 */
+ 11469 "01100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11470 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11471 "00010000" // /* MW 9 */
+ 11472 "00110100" // /* MW 8 */
+ 11473 "00110010" // /* MW 7 */
+ 11474 "11110010" // /* MW 6 */
+ 11475 "00000001" // /* MW 5 */
+ 11476 "00000000" // /* MW 4 */
+ 11477 "11010000" // /* MW 3 */
+ 11478 "10000000" // /* MW 2 */
+ 11479 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11480 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11481 "01000010" // /* MW 3 */
+ 11482 "00000100" // /* MW 2 */
+ 11483 "00000100" // /* MW 1 */
+ 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11485 "00000000" // /* MW 1 */
+ 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11487 "00000000" // /* MW 1 */
+ 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11489 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11490 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11491 "00011101" // /* MW 3 */
+ 11492 "11000010" // /* MW 2 */
+ 11493 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 168 8
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 11494 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11495 "11111001" // /* MW 5 */
+ 11496 "11100001" // /* MW 4 */
+ 11497 "10001010" // /* MW 3 */
+ 11498 "00001110" // /* MW 2 */
+ 11499 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11500 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11501 "01101000" // /* MW 5 */
+ 11502 "01010000" // /* MW 4 */
+ 11503 "01110000" // /* MW 3 */
+ 11504 "00010011" // /* MW 2 */
+ 11505 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11506 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11507 "10000000" // /* MW 7 */
+ 11508 "10111010" // /* MW 6 */
+ 11509 "11101000" // /* MW 5 */
+ 11510 "01010000" // /* MW 4 */
+ 11511 "01110000" // /* MW 3 */
+ 11512 "00011011" // /* MW 2 */
+ 11513 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11514 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11515 "01101000" // /* MW 5 */
+ 11516 "01010000" // /* MW 4 */
+ 11517 "01110000" // /* MW 3 */
+ 11518 "00010011" // /* MW 2 */
+ 11519 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11520 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11521 "11101000" // /* MW 5 */
+ 11522 "01010000" // /* MW 4 */
+ 11523 "01110000" // /* MW 3 */
+ 11524 "00011011" // /* MW 2 */
+ 11525 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11526 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11527 "10011011" // /* MW 3 */
+ 11528 "00001000" // /* MW 2 */
+ 11529 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11530 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11531 "01101000" // /* MW 5 */
+ 11532 "01010000" // /* MW 4 */
+ 11533 "01110000" // /* MW 3 */
+ 11534 "00011011" // /* MW 2 */
+ 11535 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11536 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11537 "11101000" // /* MW 5 */
+ 11538 "01010000" // /* MW 4 */
+ 11539 "01110000" // /* MW 3 */
+ 11540 "00010011" // /* MW 2 */
+ 11541 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11542 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11543 "01000001" // /* MW 9 */
+ 11544 "11100010" // /* MW 8 */
+ 11545 "00000000" // /* MW 7 */
+ 11546 "00011101" // /* MW 6 */
+ 11547 "00110100" // /* MW 5 */
+ 11548 "00101000" // /* MW 4 */
+ 11549 "01110000" // /* MW 3 */
+ 11550 "00011011" // /* MW 2 */
+ 11551 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11552 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11553 "01100001" // /* MW 9 */
+ 11554 "11100000" // /* MW 8 */
+ 11555 "00000001" // /* MW 7 */
+ 11556 "00011101" // /* MW 6 */
+ 11557 "01110100" // /* MW 5 */
+ 11558 "00101000" // /* MW 4 */
+ 11559 "01110000" // /* MW 3 */
+ 11560 "00010011" // /* MW 2 */
+ 11561 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11562 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11563 "01000001" // /* MW 9 */
+ 11564 "11100010" // /* MW 8 */
+ 11565 "00000000" // /* MW 7 */
+ 11566 "00011101" // /* MW 6 */
+ 11567 "00110100" // /* MW 5 */
+ 11568 "00101000" // /* MW 4 */
+ 11569 "01110000" // /* MW 3 */
+ 11570 "00011011" // /* MW 2 */
+ 11571 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11572 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11573 "01100001" // /* MW 9 */
+ 11574 "11100000" // /* MW 8 */
+ 11575 "00000001" // /* MW 7 */
+ 11576 "00011101" // /* MW 6 */
+ 11577 "01110100" // /* MW 5 */
+ 11578 "00101000" // /* MW 4 */
+ 11579 "01110000" // /* MW 3 */
+ 11580 "00010011" // /* MW 2 */
+ 11581 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11582 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11583 "01000001" // /* MW 9 */
+ 11584 "11100010" // /* MW 8 */
+ 11585 "00000000" // /* MW 7 */
+ 11586 "00011101" // /* MW 6 */
+ 11587 "00110100" // /* MW 5 */
+ 11588 "00101000" // /* MW 4 */
+ 11589 "01110000" // /* MW 3 */
+ 11590 "00011011" // /* MW 2 */
+ 11591 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11592 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11593 "01100001" // /* MW 9 */
+ 11594 "11100000" // /* MW 8 */
+ 11595 "00000001" // /* MW 7 */
+ 11596 "00011101" // /* MW 6 */
+ 11597 "01110100" // /* MW 5 */
+ 11598 "00101000" // /* MW 4 */
+ 11599 "01110000" // /* MW 3 */
+ 11600 "00010011" // /* MW 2 */
+ 11601 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11602 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11603 "01000001" // /* MW 13 */
+ 11604 "11100010" // /* MW 12 */
+ 11605 "00000000" // /* MW 11 */
+ 11606 "10001100" // /* MW 10 */
+ 11607 "01110000" // /* MW 9 */
+ 11608 "00001000" // /* MW 8 */
+ 11609 "00000000" // /* MW 7 */
+ 11610 "00000000" // /* MW 6 */
+ 11611 "01101000" // /* MW 5 */
+ 11612 "01010000" // /* MW 4 */
+ 11613 "01110000" // /* MW 3 */
+ 11614 "00011011" // /* MW 2 */
+ 11615 "00100001" // /* MW 1 */
+.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 11616 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11617 "00000011" // /* MW 15 */
+ 11618 "00001111" // /* MW 14 */
+ 11619 "01111000" // /* MW 13 */
+ 11620 "10100101" // /* MW 12 */
+ 11621 "00000001" // /* MW 11 */
+ 11622 "00000000" // /* MW 10 */
+ 11623 "00000000" // /* MW 9 */
+ 11624 "00000000" // /* MW 8 */
+ 11625 "10100011" // /* MW 7 */
+ 11626 "00011100" // /* MW 6 */
+ 11627 "11101010" // /* MW 5 */
+ 11628 "01010000" // /* MW 4 */
+ 11629 "01110000" // /* MW 3 */
+ 11630 "00010011" // /* MW 2 */
+ 11631 "00100001" // /* MW 1 */
+.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11632 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11633 "00010010" // /* MW 15 */
+ 11634 "00000111" // /* MW 14 */
+ 11635 "01111000" // /* MW 13 */
+ 11636 "10100101" // /* MW 12 */
+ 11637 "00000001" // /* MW 11 */
+ 11638 "00000000" // /* MW 10 */
+ 11639 "00000000" // /* MW 9 */
+ 11640 "00000000" // /* MW 8 */
+ 11641 "00100011" // /* MW 7 */
+ 11642 "00011100" // /* MW 6 */
+ 11643 "01101010" // /* MW 5 */
+ 11644 "01010000" // /* MW 4 */
+ 11645 "01110000" // /* MW 3 */
+ 11646 "00011011" // /* MW 2 */
+ 11647 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 11648 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11649 "01100001" // /* MW 7 */
+ 11650 "11100000" // /* MW 6 */
+ 11651 "00000001" // /* MW 5 */
+ 11652 "00000010" // /* MW 4 */
+ 11653 "01100000" // /* MW 3 */
+ 11654 "10010100" // /* MW 2 */
+ 11655 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11656 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11657 "01000001" // /* MW 7 */
+ 11658 "11100010" // /* MW 6 */
+ 11659 "00000000" // /* MW 5 */
+ 11660 "00000010" // /* MW 4 */
+ 11661 "01100000" // /* MW 3 */
+ 11662 "10000100" // /* MW 2 */
+ 11663 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11664 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11665 "01100001" // /* MW 7 */
+ 11666 "11100000" // /* MW 6 */
+ 11667 "00000001" // /* MW 5 */
+ 11668 "00000010" // /* MW 4 */
+ 11669 "01100000" // /* MW 3 */
+ 11670 "10010100" // /* MW 2 */
+ 11671 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11672 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11673 "01000001" // /* MW 7 */
+ 11674 "11100010" // /* MW 6 */
+ 11675 "00000000" // /* MW 5 */
+ 11676 "00000010" // /* MW 4 */
+ 11677 "01100000" // /* MW 3 */
+ 11678 "10000100" // /* MW 2 */
+ 11679 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11680 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11681 "01100001" // /* MW 7 */
+ 11682 "11100000" // /* MW 6 */
+ 11683 "00000001" // /* MW 5 */
+ 11684 "00000010" // /* MW 4 */
+ 11685 "01100000" // /* MW 3 */
+ 11686 "10010100" // /* MW 2 */
+ 11687 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11688 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11689 "01000001" // /* MW 7 */
+ 11690 "11100010" // /* MW 6 */
+ 11691 "00000000" // /* MW 5 */
+ 11692 "00000010" // /* MW 4 */
+ 11693 "01100000" // /* MW 3 */
+ 11694 "10000100" // /* MW 2 */
+ 11695 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11696 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11697 "01100001" // /* MW 7 */
+ 11698 "11100000" // /* MW 6 */
+ 11699 "00000001" // /* MW 5 */
+ 11700 "00000010" // /* MW 4 */
+ 11701 "01100000" // /* MW 3 */
+ 11702 "10010100" // /* MW 2 */
+ 11703 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11704 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11705 "00100011" // /* MW 3 */
+ 11706 "00011100" // /* MW 2 */
+ 11707 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 172 4 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11708 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11709 "00000000" // /* MW 5 */
+ 11710 "01010000" // /* MW 4 */
+ 11711 "01100000" // /* MW 3 */
+ 11712 "10010100" // /* MW 2 */
+ 11713 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11714 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11715 "00100011" // /* MW 3 */
+ 11716 "00011100" // /* MW 2 */
+ 11717 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11718 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11719 "10100011" // /* MW 3 */
+ 11720 "00011100" // /* MW 2 */
+ 11721 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 11722 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11723 "00100011" // /* MW 3 */
+ 11724 "00011100" // /* MW 2 */
+ 11725 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 11726 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11727 "10100011" // /* MW 3 */
+ 11728 "00011100" // /* MW 2 */
+ 11729 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0
+ 11731 "00000000" // /* MW 1 */
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.src_ref 7 "superkernels.cpp" 369 first
+.src_ref 7 "superkernels.cpp" 374 6
+.function_start
+ 11744 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11745 "10000000" // /* MW 5 */
+ 11746 "11001000" // /* MW 4 */
+ 11747 "11001000" // /* MW 3 */
+ 11748 "00000111" // /* MW 2 */
+ 11749 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+ 11750 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11751 "11000001" // /* MW 5 */
+ 11752 "10110101" // /* MW 4 */
+ 11753 "11011000" // /* MW 3 */
+ 11754 "11000010" // /* MW 2 */
+ 11755 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 369
+ 11756 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11757 "00000001" // /* MW 5 */
+ 11758 "00000000" // /* MW 4 */
+ 11759 "00000000" // /* MW 3 */
+ 11760 "00001000" // /* MW 2 */
+ 11761 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 22 first
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11762 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11763 "01111001" // /* MW 9 */
+ 11764 "01100000" // /* MW 8 */
+ 11765 "11001010" // /* MW 7 */
+ 11766 "10000001" // /* MW 6 */
+ 11767 "00010100" // /* MW 5 */
+ 11768 "00100011" // /* MW 4 */
+ 11769 "10110000" // /* MW 3 */
+ 11770 "00111010" // /* MW 2 */
+ 11771 "11111111" // /* MW 1 */
+ 11772 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11773 "01110000" // /* MW 7 */
+ 11774 "11010000" // /* MW 6 */
+ 11775 "00001011" // /* MW 5 */
+ 11776 "00000000" // /* MW 4 */
+ 11777 "10110000" // /* MW 3 */
+ 11778 "10000011" // /* MW 2 */
+ 11779 "11111101" // /* MW 1 */
+ 11780 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11781 "00010101" // /* MW 3 */
+ 11782 "11111100" // /* MW 2 */
+ 11783 "00001111" // /* MW 1 */
+ 11784 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11785 "00111101" // /* MW 3 */
+ 11786 "11110000" // /* MW 2 */
+ 11787 "00001111" // /* MW 1 */
+ 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11789 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+.src_ref 7 "superkernels.cpp" 374 16 first
+ 11790 "10000100" // JNZ r16, #11936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11936 delay_slots=5 */
+ 11791 "00000001" // /* MW 5 */
+ 11792 "01000000" // /* MW 4 */
+ 11793 "01010000" // /* MW 3 */
+ 11794 "00010111" // /* MW 2 */
+ 11795 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 30 first
+.delay_slot
+ 11796 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11797 "11111011" // /* MW 3 */
+ 11798 "01100011" // /* MW 2 */
+ 11799 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11800 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11801 "10100000" // /* MW 5 */
+ 11802 "11001000" // /* MW 4 */
+ 11803 "11000100" // /* MW 3 */
+ 11804 "00000111" // /* MW 2 */
+ 11805 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11806 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11807 "01110000" // /* MW 7 */
+ 11808 "01100000" // /* MW 6 */
+ 11809 "00110111" // /* MW 5 */
+ 11810 "00000001" // /* MW 4 */
+ 11811 "00110000" // /* MW 3 */
+ 11812 "11000110" // /* MW 2 */
+ 11813 "01000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 11814 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11815 "11000000" // /* MW 3 */
+ 11816 "11010110" // /* MW 2 */
+ 11817 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 7 "superkernels.cpp" 379 28
+.src_ref 7 "superkernels.cpp" 381 42
+.src_ref 7 "superkernels.cpp" 393 2
+.delay_slot
+ 11818 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11819 "00010001" // /* MW 9 */
+ 11820 "11000000" // /* MW 8 */
+ 11821 "10110010" // /* MW 7 */
+ 11822 "11110011" // /* MW 6 */
+ 11823 "00000001" // /* MW 5 */
+ 11824 "00000000" // /* MW 4 */
+ 11825 "10110000" // /* MW 3 */
+ 11826 "10100011" // /* MW 2 */
+ 11827 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11828 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11829 "00010001" // /* MW 9 */
+ 11830 "00110100" // /* MW 8 */
+ 11831 "00110010" // /* MW 7 */
+ 11832 "11110001" // /* MW 6 */
+ 11833 "00000001" // /* MW 5 */
+ 11834 "00000000" // /* MW 4 */
+ 11835 "01100000" // /* MW 3 */
+ 11836 "10010001" // /* MW 2 */
+ 11837 "00010011" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11838 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11839 "00010000" // /* MW 9 */
+ 11840 "00110010" // /* MW 8 */
+ 11841 "00110010" // /* MW 7 */
+ 11842 "11110001" // /* MW 6 */
+ 11843 "00000001" // /* MW 5 */
+ 11844 "00000000" // /* MW 4 */
+ 11845 "11100000" // /* MW 3 */
+ 11846 "11000000" // /* MW 2 */
+ 11847 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11849 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11850 "00000100" // JL #11296 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */
+ 11851 "00000001" // /* MW 5 */
+ 11852 "00000000" // /* MW 4 */
+ 11853 "00010000" // /* MW 3 */
+ 11854 "00010110" // /* MW 2 */
+ 11855 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11857 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11859 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11860 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11861 "00110001" // /* MW 3 */
+ 11862 "00100000" // /* MW 2 */
+ 11863 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 11864 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11865 "00000101" // /* MW 3 */
+ 11866 "00100000" // /* MW 2 */
+ 11867 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 11868 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11869 "00010001" // /* MW 3 */
+ 11870 "00000110" // /* MW 2 */
+ 11871 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 381 42 first
+.return_address
+ 11872 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11873 "00010000" // /* MW 9 */
+ 11874 "00101000" // /* MW 8 */
+ 11875 "10110010" // /* MW 7 */
+ 11876 "11110000" // /* MW 6 */
+ 11877 "00000001" // /* MW 5 */
+ 11878 "00000000" // /* MW 4 */
+ 11879 "11010000" // /* MW 3 */
+ 11880 "11000010" // /* MW 2 */
+ 11881 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 390 48
+ 11882 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11883 "00010000" // /* MW 9 */
+ 11884 "00101010" // /* MW 8 */
+ 11885 "10110010" // /* MW 7 */
+ 11886 "11110001" // /* MW 6 */
+ 11887 "00000001" // /* MW 5 */
+ 11888 "00000000" // /* MW 4 */
+ 11889 "11010000" // /* MW 3 */
+ 11890 "11000110" // /* MW 2 */
+ 11891 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 28 first
+.src_ref 7 "superkernels.cpp" 382 16
+.src_ref 7 "superkernels.cpp" 391 48
+ 11892 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11893 "00010000" // /* MW 9 */
+ 11894 "00101110" // /* MW 8 */
+ 11895 "10110010" // /* MW 7 */
+ 11896 "11110000" // /* MW 6 */
+ 11897 "00000001" // /* MW 5 */
+ 11898 "00000000" // /* MW 4 */
+ 11899 "01010000" // /* MW 3 */
+ 11900 "11001011" // /* MW 2 */
+ 11901 "11101010" // /* MW 1 */
+ 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11903 "00000000" // /* MW 1 */
+ 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11905 "00000000" // /* MW 1 */
+ 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11907 "00000000" // /* MW 1 */
+ 11908 "10000100" // J #11952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11952 delay_slots=5 */
+ 11909 "00000000" // /* MW 5 */
+ 11910 "00000000" // /* MW 4 */
+ 11911 "01011000" // /* MW 3 */
+ 11912 "00010111" // /* MW 2 */
+ 11913 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13
+.delay_slot
+ 11914 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11915 "11000000" // /* MW 5 */
+ 11916 "11001000" // /* MW 4 */
+ 11917 "11000100" // /* MW 3 */
+ 11918 "00000111" // /* MW 2 */
+ 11919 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 27 first
+.delay_slot
+ 11920 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11921 "00001111" // /* MW 3 */
+ 11922 "01100001" // /* MW 2 */
+ 11923 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13 first
+.delay_slot
+ 11924 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11925 "01010001" // /* MW 3 */
+ 11926 "00000110" // /* MW 2 */
+ 11927 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16 first
+.delay_slot
+ 11928 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11929 "00010001" // /* MW 3 */
+ 11930 "00000110" // /* MW 2 */
+ 11931 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 382 16 first
+.delay_slot
+ 11932 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11933 "00010001" // /* MW 3 */
+ 11934 "00000110" // /* MW 2 */
+ 11935 "00001001" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192
+.src_ref 7 "superkernels.cpp" 390 48
+ 11936 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11937 "10101000" // /* MW 5 */
+ 11938 "11001000" // /* MW 4 */
+ 11939 "11000110" // /* MW 3 */
+ 11940 "00000111" // /* MW 2 */
+ 11941 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48
+ 11942 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11943 "00010000" // /* MW 9 */
+ 11944 "00101110" // /* MW 8 */
+ 11945 "10110010" // /* MW 7 */
+ 11946 "11110000" // /* MW 6 */
+ 11947 "00000001" // /* MW 5 */
+ 11948 "00000000" // /* MW 4 */
+ 11949 "11110000" // /* MW 3 */
+ 11950 "00101100" // /* MW 2 */
+ 11951 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 11952 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11953 "10000110" // /* MW 3 */
+ 11954 "01100111" // /* MW 2 */
+ 11955 "00011000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11956 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11957 "00010000" // /* MW 9 */
+ 11958 "00100000" // /* MW 8 */
+ 11959 "00110010" // /* MW 7 */
+ 11960 "11110001" // /* MW 6 */
+ 11961 "00000001" // /* MW 5 */
+ 11962 "00000000" // /* MW 4 */
+ 11963 "11010000" // /* MW 3 */
+ 11964 "11101110" // /* MW 2 */
+ 11965 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 11966 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11967 "00010110" // /* MW 3 */
+ 11968 "11111110" // /* MW 2 */
+ 11969 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 11970 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11971 "00110110" // /* MW 3 */
+ 11972 "11111110" // /* MW 2 */
+ 11973 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+ 11974 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11975 "01010110" // /* MW 3 */
+ 11976 "00000110" // /* MW 2 */
+ 11977 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 11978 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11979 "01110110" // /* MW 3 */
+ 11980 "01000110" // /* MW 2 */
+ 11981 "00000000" // /* MW 1 */
+ 11982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11983 "00000000" // /* MW 1 */
+ 11984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11985 "00000000" // /* MW 1 */
+ 11986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11987 "00000000" // /* MW 1 */
+ 11988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11989 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 11990 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11991 "00000010" // /* MW 3 */
+ 11992 "01100001" // /* MW 2 */
+ 11993 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+.src_ref 1 "io_buffer_main.h" 218 20
+ 11994 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11995 "00001110" // /* MW 5 */
+ 11996 "01000000" // /* MW 4 */
+ 11997 "00111001" // /* MW 3 */
+ 11998 "11000010" // /* MW 2 */
+ 11999 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+ 12000 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12001 "00010001" // /* MW 3 */
+ 12002 "00000110" // /* MW 2 */
+ 12003 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+.src_ref 1 "io_buffer_main.h" 395 8
+ 12004 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12005 "11111101" // /* MW 3 */
+ 12006 "11100000" // /* MW 2 */
+ 12007 "00010111" // /* MW 1 */
+ 12008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12009 "00000000" // /* MW 1 */
+ 12010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12011 "00000000" // /* MW 1 */
+ 12012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12013 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 12014 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12015 "00001000" // /* MW 3 */
+ 12016 "11010011" // /* MW 2 */
+ 12017 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 12018 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12019 "00000110" // /* MW 3 */
+ 12020 "01100111" // /* MW 2 */
+ 12021 "00011010" // /* MW 1 */
+ 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12023 "00000000" // /* MW 1 */
+ 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12025 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 12026 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12027 "01110110" // /* MW 3 */
+ 12028 "11111111" // /* MW 2 */
+ 12029 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 12030 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12031 "00110110" // /* MW 3 */
+ 12032 "11111110" // /* MW 2 */
+ 12033 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 12034 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12035 "01010110" // /* MW 3 */
+ 12036 "11111110" // /* MW 2 */
+ 12037 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 47 first
+ 12038 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12039 "01110110" // /* MW 3 */
+ 12040 "01010110" // /* MW 2 */
+ 12041 "00000010" // /* MW 1 */
+ 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12043 "00000000" // /* MW 1 */
+ 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12045 "00000000" // /* MW 1 */
+ 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12047 "00000000" // /* MW 1 */
+ 12048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12049 "00000000" // /* MW 1 */
+ 12050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12051 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 12052 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12053 "00010010" // /* MW 3 */
+ 12054 "10100011" // /* MW 2 */
+ 12055 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 12056 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12057 "00110001" // /* MW 3 */
+ 12058 "00000110" // /* MW 2 */
+ 12059 "00001010" // /* MW 1 */
+ 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12061 "00000000" // /* MW 1 */
+ 12062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12063 "00000000" // /* MW 1 */
+ 12064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12065 "00000000" // /* MW 1 */
+ 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12067 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 12068 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12069 "00001000" // /* MW 3 */
+ 12070 "11010011" // /* MW 2 */
+ 12071 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46
+.src_ref 7 "superkernels.cpp" 391 46
+.src_ref 1 "io_buffer_main.h" 324 32
+ 12072 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12073 "01111001" // /* MW 9 */
+ 12074 "01100000" // /* MW 8 */
+ 12075 "11001110" // /* MW 7 */
+ 12076 "00101001" // /* MW 6 */
+ 12077 "00000000" // /* MW 5 */
+ 12078 "00000001" // /* MW 4 */
+ 12079 "01100000" // /* MW 3 */
+ 12080 "00010001" // /* MW 2 */
+ 12081 "11010001" // /* MW 1 */
+ 12082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12083 "00000000" // /* MW 1 */
+ 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12085 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+ 12086 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12087 "00011001" // /* MW 3 */
+ 12088 "11101110" // /* MW 2 */
+ 12089 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 48 first
+ 12090 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12091 "00111011" // /* MW 5 */
+ 12092 "11011000" // /* MW 4 */
+ 12093 "11011111" // /* MW 3 */
+ 12094 "11000110" // /* MW 2 */
+ 12095 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48 first
+.src_ref 7 "superkernels.cpp" 393 2
+ 12096 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12097 "10000001" // /* MW 5 */
+ 12098 "11011101" // /* MW 4 */
+ 12099 "11010110" // /* MW 3 */
+ 12100 "11010010" // /* MW 2 */
+ 12101 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 12102 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12103 "01010110" // /* MW 3 */
+ 12104 "01001110" // /* MW 2 */
+ 12105 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 12106 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12107 "00011110" // /* MW 3 */
+ 12108 "01011101" // /* MW 2 */
+ 12109 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12110 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12111 "11000000" // /* MW 3 */
+ 12112 "01100000" // /* MW 2 */
+ 12113 "00011111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12115 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12116 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12117 "01110110" // /* MW 3 */
+ 12118 "00000110" // /* MW 2 */
+ 12119 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12121 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 393 2 first
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 12122 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */
+ 12123 "00000001" // /* MW 5 */
+ 12124 "00000000" // /* MW 4 */
+ 12125 "01011000" // /* MW 3 */
+ 12126 "00010110" // /* MW 2 */
+ 12127 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12128 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12129 "11000000" // /* MW 3 */
+ 12130 "11010100" // /* MW 2 */
+ 12131 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 12132 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12133 "00001101" // /* MW 3 */
+ 12134 "01100011" // /* MW 2 */
+ 12135 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46 first
+.delay_slot
+ 12136 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12137 "00001101" // /* MW 3 */
+ 12138 "00100001" // /* MW 2 */
+ 12139 "00010101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46
+.delay_slot
+ 12140 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12141 "01000001" // /* MW 3 */
+ 12142 "01101001" // /* MW 2 */
+ 12143 "00011001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12145 "00000000" // /* MW 15 */
+ 12146 "00000000" // /* MW 14 */
+ 12147 "10101000" // /* MW 13 */
+ 12148 "11100010" // /* MW 12 */
+ 12149 "00110100" // /* MW 11 */
+ 12150 "00000000" // /* MW 10 */
+ 12151 "00000000" // /* MW 9 */
+ 12152 "00000000" // /* MW 8 */
+ 12153 "01011011" // /* MW 7 */
+ 12154 "00000001" // /* MW 6 */
+ 12155 "00100000" // /* MW 5 */
+ 12156 "00000000" // /* MW 4 */
+ 12157 "11110000" // /* MW 3 */
+ 12158 "00101100" // /* MW 2 */
+ 12159 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 32 first
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 40
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.return_address
+ 12160 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12161 "01111000" // /* MW 9 */
+ 12162 "11010000" // /* MW 8 */
+ 12163 "10110011" // /* MW 7 */
+ 12164 "00101000" // /* MW 6 */
+ 12165 "00000000" // /* MW 5 */
+ 12166 "00000001" // /* MW 4 */
+ 12167 "11010000" // /* MW 3 */
+ 12168 "11000110" // /* MW 2 */
+ 12169 "11001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19
+ 12170 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12171 "11000000" // /* MW 5 */
+ 12172 "11001000" // /* MW 4 */
+ 12173 "11001100" // /* MW 3 */
+ 12174 "00000111" // /* MW 2 */
+ 12175 "00000000" // /* MW 1 */
+ 12176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12177 "00000000" // /* MW 1 */
+ 12178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12179 "00000000" // /* MW 1 */
+ 12180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12181 "00000000" // /* MW 1 */
+ 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12183 "00000000" // /* MW 1 */
+ 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12185 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 12186 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12187 "00001000" // /* MW 3 */
+ 12188 "01010001" // /* MW 2 */
+ 12189 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 12190 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12191 "00110110" // /* MW 3 */
+ 12192 "11110110" // /* MW 2 */
+ 12193 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 12194 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12195 "00011001" // /* MW 3 */
+ 12196 "11101101" // /* MW 2 */
+ 12197 "00000111" // /* MW 1 */
+ 12198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12199 "00000000" // /* MW 1 */
+ 12200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12201 "00000000" // /* MW 1 */
+ 12202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12203 "00000000" // /* MW 1 */
+ 12204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12205 "00000000" // /* MW 1 */
+ 12206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12207 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 12208 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12209 "00010001" // /* MW 3 */
+ 12210 "00100011" // /* MW 2 */
+ 12211 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 28
+ 12212 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12213 "01100011" // /* MW 5 */
+ 12214 "11101100" // /* MW 4 */
+ 12215 "11010011" // /* MW 3 */
+ 12216 "11000110" // /* MW 2 */
+ 12217 "01001010" // /* MW 1 */
+ 12218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12219 "00000000" // /* MW 1 */
+ 12220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12221 "00000000" // /* MW 1 */
+ 12222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12223 "00000000" // /* MW 1 */
+ 12224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12225 "00000000" // /* MW 1 */
+ 12226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12227 "00000000" // /* MW 1 */
+ 12228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12229 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 12230 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12231 "00001000" // /* MW 3 */
+ 12232 "01010001" // /* MW 2 */
+ 12233 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+.src_ref 7 "superkernels.cpp" 398 14
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 12234 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12235 "00010000" // /* MW 9 */
+ 12236 "00100000" // /* MW 8 */
+ 12237 "10110010" // /* MW 7 */
+ 12238 "11110000" // /* MW 6 */
+ 12239 "00000001" // /* MW 5 */
+ 12240 "00000000" // /* MW 4 */
+ 12241 "11010000" // /* MW 3 */
+ 12242 "11001110" // /* MW 2 */
+ 12243 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19 first
+ 12244 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12245 "01010110" // /* MW 3 */
+ 12246 "00000110" // /* MW 2 */
+ 12247 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 12248 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12249 "00110110" // /* MW 3 */
+ 12250 "00000110" // /* MW 2 */
+ 12251 "00000001" // /* MW 1 */
+ 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12253 "00000000" // /* MW 1 */
+ 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12255 "00000000" // /* MW 1 */
+ 12256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12257 "00000000" // /* MW 1 */
+ 12258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12259 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 12260 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12261 "00110001" // /* MW 3 */
+ 12262 "00100001" // /* MW 2 */
+ 12263 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 12264 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12265 "00010001" // /* MW 3 */
+ 12266 "11100110" // /* MW 2 */
+ 12267 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 16 first
+ 12268 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12269 "00101000" // /* MW 3 */
+ 12270 "01100001" // /* MW 2 */
+ 12271 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 12272 "10000100" // JNZ r16, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */
+ 12273 "00000001" // /* MW 5 */
+ 12274 "01000000" // /* MW 4 */
+ 12275 "00001000" // /* MW 3 */
+ 12276 "00011000" // /* MW 2 */
+ 12277 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12283 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12285 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12287 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14
+ 12288 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12289 "00000001" // /* MW 3 */
+ 12290 "00100000" // /* MW 2 */
+ 12291 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14 first
+ 12292 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12293 "11000001" // /* MW 11 */
+ 12294 "00001000" // /* MW 10 */
+ 12295 "10000011" // /* MW 9 */
+ 12296 "00000000" // /* MW 8 */
+ 12297 "00000000" // /* MW 7 */
+ 12298 "00000000" // /* MW 6 */
+ 12299 "00100000" // /* MW 5 */
+ 12300 "00000000" // /* MW 4 */
+ 12301 "11110000" // /* MW 3 */
+ 12302 "00101100" // /* MW 2 */
+ 12303 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560
+.src_ref 7 "superkernels.cpp" 400
+ 12304 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12305 "00111001" // /* MW 3 */
+ 12306 "11110000" // /* MW 2 */
+ 12307 "00000111" // /* MW 1 */
+ 12308 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12309 "11110001" // /* MW 3 */
+ 12310 "11111101" // /* MW 2 */
+ 12311 "00000111" // /* MW 1 */
+ 12312 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12313 "10011001" // /* MW 3 */
+ 12314 "11110111" // /* MW 2 */
+ 12315 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 12316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12317 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 12318 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12319 "11010001" // /* MW 3 */
+ 12320 "11111001" // /* MW 2 */
+ 12321 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12323 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12325 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12326 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12327 "00000000" // /* MW 3 */
+ 12328 "00101000" // /* MW 2 */
+ 12329 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12330 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12331 "00001011" // /* MW 3 */
+ 12332 "10001110" // /* MW 2 */
+ 12333 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400
+.delay_slot
+ 12334 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12335 "00000001" // /* MW 5 */
+ 12336 "00000000" // /* MW 4 */
+ 12337 "00000000" // /* MW 3 */
+ 12338 "11111000" // /* MW 2 */
+ 12339 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12341 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12343 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0
+ 12345 "00000000" // /* MW 1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0
+.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.src_ref 2 "conv2d_dw_bf16_params.h" 211 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.function_start
+ 12352 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12353 "00010000" // /* MW 9 */
+ 12354 "11100000" // /* MW 8 */
+ 12355 "10110011" // /* MW 7 */
+ 12356 "11110000" // /* MW 6 */
+ 12357 "00000001" // /* MW 5 */
+ 12358 "00000000" // /* MW 4 */
+ 12359 "11010000" // /* MW 3 */
+ 12360 "10000101" // /* MW 2 */
+ 12361 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12362 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12363 "01011000" // /* MW 9 */
+ 12364 "00000000" // /* MW 8 */
+ 12365 "00001000" // /* MW 7 */
+ 12366 "01001011" // /* MW 6 */
+ 12367 "00000000" // /* MW 5 */
+ 12368 "00000001" // /* MW 4 */
+ 12369 "11010000" // /* MW 3 */
+ 12370 "10000001" // /* MW 2 */
+ 12371 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 211
+ 12372 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12373 "00000001" // /* MW 5 */
+ 12374 "00000000" // /* MW 4 */
+ 12375 "00000000" // /* MW 3 */
+ 12376 "00001000" // /* MW 2 */
+ 12377 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32
+ 12378 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12379 "00010001" // /* MW 9 */
+ 12380 "11100000" // /* MW 8 */
+ 12381 "10110011" // /* MW 7 */
+ 12382 "11110011" // /* MW 6 */
+ 12383 "00000001" // /* MW 5 */
+ 12384 "00000000" // /* MW 4 */
+ 12385 "10110000" // /* MW 3 */
+ 12386 "11110011" // /* MW 2 */
+ 12387 "11111110" // /* MW 1 */
+ 12388 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12389 "00111101" // /* MW 3 */
+ 12390 "11111100" // /* MW 2 */
+ 12391 "00001111" // /* MW 1 */
+ 12392 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12393 "11110101" // /* MW 3 */
+ 12394 "11111001" // /* MW 2 */
+ 12395 "00001111" // /* MW 1 */
+ 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12397 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12398 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12399 "00101001" // /* MW 3 */
+ 12400 "00011100" // /* MW 2 */
+ 12401 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12402 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12403 "00001001" // /* MW 3 */
+ 12404 "00011100" // /* MW 2 */
+ 12405 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12406 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12407 "00101110" // /* MW 3 */
+ 12408 "00000100" // /* MW 2 */
+ 12409 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12410 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12411 "00001110" // /* MW 3 */
+ 12412 "00010100" // /* MW 2 */
+ 12413 "00000000" // /* MW 1 */
+ 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12415 "00000000" // /* MW 1 */
+ 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12417 "00000000" // /* MW 1 */
+ 12418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12419 "00000000" // /* MW 1 */
+ 12420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12421 "00000000" // /* MW 1 */
+ 12422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12423 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12424 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12425 "00101001" // /* MW 3 */
+ 12426 "00000100" // /* MW 2 */
+ 12427 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12428 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12429 "00001001" // /* MW 3 */
+ 12430 "00010100" // /* MW 2 */
+ 12431 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first
+ 12432 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12433 "00101010" // /* MW 3 */
+ 12434 "01011110" // /* MW 2 */
+ 12435 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 52
+ 12436 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12437 "01001010" // /* MW 3 */
+ 12438 "11101110" // /* MW 2 */
+ 12439 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12440 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12441 "00101010" // /* MW 3 */
+ 12442 "11101100" // /* MW 2 */
+ 12443 "00000111" // /* MW 1 */
+ 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12445 "00000000" // /* MW 1 */
+ 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12447 "00000000" // /* MW 1 */
+ 12448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12449 "00000000" // /* MW 1 */
+ 12450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12451 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.no_stack_arguments
+ 12452 "00000100" // JL #15664 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */
+ 12453 "00000001" // /* MW 5 */
+ 12454 "00000000" // /* MW 4 */
+ 12455 "10011000" // /* MW 3 */
+ 12456 "00011110" // /* MW 2 */
+ 12457 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 38
+.delay_slot
+ 12458 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12459 "01000011" // /* MW 5 */
+ 12460 "10111110" // /* MW 4 */
+ 12461 "10111000" // /* MW 3 */
+ 12462 "11001010" // /* MW 2 */
+ 12463 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+.delay_slot
+ 12464 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12465 "00010001" // /* MW 5 */
+ 12466 "11000010" // /* MW 4 */
+ 12467 "10110000" // /* MW 3 */
+ 12468 "10000110" // /* MW 2 */
+ 12469 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12470 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12471 "00010101" // /* MW 5 */
+ 12472 "11101111" // /* MW 4 */
+ 12473 "10110111" // /* MW 3 */
+ 12474 "01000010" // /* MW 2 */
+ 12475 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12476 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12477 "11110001" // /* MW 3 */
+ 12478 "00100010" // /* MW 2 */
+ 12479 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12480 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12481 "00000000" // /* MW 15 */
+ 12482 "00000000" // /* MW 14 */
+ 12483 "01111000" // /* MW 13 */
+ 12484 "10100101" // /* MW 12 */
+ 12485 "00000001" // /* MW 11 */
+ 12486 "10010000" // /* MW 10 */
+ 12487 "00001000" // /* MW 9 */
+ 12488 "00011110" // /* MW 8 */
+ 12489 "01011011" // /* MW 7 */
+ 12490 "00000001" // /* MW 6 */
+ 12491 "00100000" // /* MW 5 */
+ 12492 "00000000" // /* MW 4 */
+ 12493 "11110000" // /* MW 3 */
+ 12494 "00101100" // /* MW 2 */
+ 12495 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.return_address
+ 12496 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12497 "00000010" // /* MW 5 */
+ 12498 "01000000" // /* MW 4 */
+ 12499 "00100000" // /* MW 3 */
+ 12500 "11010010" // /* MW 2 */
+ 12501 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first
+ 12502 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12503 "01000011" // /* MW 5 */
+ 12504 "01001000" // /* MW 4 */
+ 12505 "01011000" // /* MW 3 */
+ 12506 "11000101" // /* MW 2 */
+ 12507 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 52
+ 12508 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12509 "01101010" // /* MW 3 */
+ 12510 "11101110" // /* MW 2 */
+ 12511 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12512 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12513 "00110001" // /* MW 3 */
+ 12514 "11101100" // /* MW 2 */
+ 12515 "00000111" // /* MW 1 */
+ 12516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12517 "00000000" // /* MW 1 */
+ 12518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12519 "00000000" // /* MW 1 */
+ 12520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12521 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+ 12522 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12523 "01000110" // /* MW 3 */
+ 12524 "11101001" // /* MW 2 */
+ 12525 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+ 12526 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12527 "00001010" // /* MW 3 */
+ 12528 "00110111" // /* MW 2 */
+ 12529 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first
+ 12530 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12531 "01100011" // /* MW 5 */
+ 12532 "11000110" // /* MW 4 */
+ 12533 "10111000" // /* MW 3 */
+ 12534 "01001110" // /* MW 2 */
+ 12535 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.no_stack_arguments
+ 12536 "00111010" // ST r17, [sp, #-32]; JL #15664 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */
+ 12537 "01000001" // /* MW 9 */
+ 12538 "00000000" // /* MW 8 */
+ 12539 "00000000" // /* MW 7 */
+ 12540 "10100110" // /* MW 6 */
+ 12541 "00000111" // /* MW 5 */
+ 12542 "00000000" // /* MW 4 */
+ 12543 "10110000" // /* MW 3 */
+ 12544 "01000110" // /* MW 2 */
+ 12545 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12546 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12547 "00100010" // /* MW 3 */
+ 12548 "10101001" // /* MW 2 */
+ 12549 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12550 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12551 "00001010" // /* MW 3 */
+ 12552 "01110111" // /* MW 2 */
+ 12553 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.delay_slot
+ 12554 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12555 "00010001" // /* MW 3 */
+ 12556 "00100101" // /* MW 2 */
+ 12557 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12558 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12559 "01110000" // /* MW 3 */
+ 12560 "00100110" // /* MW 2 */
+ 12561 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 87
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12562 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12563 "01100000" // /* MW 13 */
+ 12564 "00101011" // /* MW 12 */
+ 12565 "00000000" // /* MW 11 */
+ 12566 "00001001" // /* MW 10 */
+ 12567 "10011000" // /* MW 9 */
+ 12568 "00111101" // /* MW 8 */
+ 12569 "00100010" // /* MW 7 */
+ 12570 "01000001" // /* MW 6 */
+ 12571 "00100100" // /* MW 5 */
+ 12572 "00000000" // /* MW 4 */
+ 12573 "11110000" // /* MW 3 */
+ 12574 "00101100" // /* MW 2 */
+ 12575 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+.return_address
+ 12576 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12577 "01011000" // /* MW 9 */
+ 12578 "01000010" // /* MW 8 */
+ 12579 "00000000" // /* MW 7 */
+ 12580 "11001000" // /* MW 6 */
+ 12581 "00110111" // /* MW 5 */
+ 12582 "00111111" // /* MW 4 */
+ 12583 "00100000" // /* MW 3 */
+ 12584 "00001110" // /* MW 2 */
+ 12585 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12586 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12587 "01011000" // /* MW 9 */
+ 12588 "11111100" // /* MW 8 */
+ 12589 "00101001" // /* MW 7 */
+ 12590 "00001000" // /* MW 6 */
+ 12591 "10000000" // /* MW 5 */
+ 12592 "00000001" // /* MW 4 */
+ 12593 "00100000" // /* MW 3 */
+ 12594 "11000010" // /* MW 2 */
+ 12595 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53
+ 12596 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12597 "01011000" // /* MW 9 */
+ 12598 "00000010" // /* MW 8 */
+ 12599 "10001000" // /* MW 7 */
+ 12600 "10001000" // /* MW 6 */
+ 12601 "01100000" // /* MW 5 */
+ 12602 "00000000" // /* MW 4 */
+ 12603 "00100000" // /* MW 3 */
+ 12604 "11011010" // /* MW 2 */
+ 12605 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+ 12606 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12607 "01011000" // /* MW 9 */
+ 12608 "00010111" // /* MW 8 */
+ 12609 "10001000" // /* MW 7 */
+ 12610 "00001011" // /* MW 6 */
+ 12611 "01010001" // /* MW 5 */
+ 12612 "00000000" // /* MW 4 */
+ 12613 "01010000" // /* MW 3 */
+ 12614 "01000101" // /* MW 2 */
+ 12615 "11100001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76
+ 12616 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12617 "01011000" // /* MW 9 */
+ 12618 "00100000" // /* MW 8 */
+ 12619 "10000000" // /* MW 7 */
+ 12620 "01001000" // /* MW 6 */
+ 12621 "00100111" // /* MW 5 */
+ 12622 "00111111" // /* MW 4 */
+ 12623 "00100000" // /* MW 3 */
+ 12624 "01010110" // /* MW 2 */
+ 12625 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12626 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12627 "01011000" // /* MW 9 */
+ 12628 "00000001" // /* MW 8 */
+ 12629 "01001000" // /* MW 7 */
+ 12630 "11001011" // /* MW 6 */
+ 12631 "01110000" // /* MW 5 */
+ 12632 "00000001" // /* MW 4 */
+ 12633 "00100000" // /* MW 3 */
+ 12634 "01111010" // /* MW 2 */
+ 12635 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41
+ 12636 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12637 "01011000" // /* MW 9 */
+ 12638 "11000000" // /* MW 8 */
+ 12639 "11101111" // /* MW 7 */
+ 12640 "00001011" // /* MW 6 */
+ 12641 "11010000" // /* MW 5 */
+ 12642 "00000101" // /* MW 4 */
+ 12643 "10000000" // /* MW 3 */
+ 12644 "11000000" // /* MW 2 */
+ 12645 "11101001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12646 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12647 "00100001" // /* MW 3 */
+ 12648 "00101000" // /* MW 2 */
+ 12649 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12650 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12651 "00000110" // /* MW 3 */
+ 12652 "11000111" // /* MW 2 */
+ 12653 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+ 12654 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12655 "00000010" // /* MW 5 */
+ 12656 "00110110" // /* MW 4 */
+ 12657 "01010000" // /* MW 3 */
+ 12658 "11110001" // /* MW 2 */
+ 12659 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69
+ 12660 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12661 "11110101" // /* MW 5 */
+ 12662 "00111111" // /* MW 4 */
+ 12663 "01001011" // /* MW 3 */
+ 12664 "00101000" // /* MW 2 */
+ 12665 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12666 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12667 "00011101" // /* MW 5 */
+ 12668 "00100000" // /* MW 4 */
+ 12669 "11110001" // /* MW 3 */
+ 12670 "11100001" // /* MW 2 */
+ 12671 "01111000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12672 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12673 "01110000" // /* MW 3 */
+ 12674 "00101000" // /* MW 2 */
+ 12675 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+ 12676 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12677 "00000001" // /* MW 5 */
+ 12678 "10100000" // /* MW 4 */
+ 12679 "10010000" // /* MW 3 */
+ 12680 "00000000" // /* MW 2 */
+ 12681 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first
+ 12682 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12683 "00000001" // /* MW 5 */
+ 12684 "10110100" // /* MW 4 */
+ 12685 "10111101" // /* MW 3 */
+ 12686 "11100111" // /* MW 2 */
+ 12687 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first
+ 12688 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12689 "00000010" // /* MW 5 */
+ 12690 "10100011" // /* MW 4 */
+ 12691 "10110000" // /* MW 3 */
+ 12692 "00001101" // /* MW 2 */
+ 12693 "01111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 70
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first
+ 12694 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12695 "11111111" // /* MW 5 */
+ 12696 "00110101" // /* MW 4 */
+ 12697 "10110000" // /* MW 3 */
+ 12698 "11001101" // /* MW 2 */
+ 12699 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first
+ 12700 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12701 "00001111" // /* MW 3 */
+ 12702 "11001101" // /* MW 2 */
+ 12703 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first
+ 12704 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12705 "00011111" // /* MW 3 */
+ 12706 "11011111" // /* MW 2 */
+ 12707 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 79
+ 12708 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12709 "11111111" // /* MW 5 */
+ 12710 "10110011" // /* MW 4 */
+ 12711 "11111001" // /* MW 3 */
+ 12712 "01101011" // /* MW 2 */
+ 12713 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first
+ 12714 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12715 "00000111" // /* MW 3 */
+ 12716 "00110111" // /* MW 2 */
+ 12717 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first
+ 12718 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12719 "11011111" // /* MW 5 */
+ 12720 "10010000" // /* MW 4 */
+ 12721 "00110111" // /* MW 3 */
+ 12722 "11010110" // /* MW 2 */
+ 12723 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+ 12724 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12725 "01010010" // /* MW 3 */
+ 12726 "00111000" // /* MW 2 */
+ 12727 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first
+ 12728 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12729 "00101101" // /* MW 3 */
+ 12730 "00100101" // /* MW 2 */
+ 12731 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+ 12732 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12733 "00111111" // /* MW 5 */
+ 12734 "11001000" // /* MW 4 */
+ 12735 "00111000" // /* MW 3 */
+ 12736 "01001010" // /* MW 2 */
+ 12737 "11100101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first
+ 12738 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12739 "11111011" // /* MW 5 */
+ 12740 "01110010" // /* MW 4 */
+ 12741 "00111111" // /* MW 3 */
+ 12742 "11110010" // /* MW 2 */
+ 12743 "11111001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 47
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first
+ 12744 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12745 "00011111" // /* MW 5 */
+ 12746 "01110000" // /* MW 4 */
+ 12747 "00111001" // /* MW 3 */
+ 12748 "11110010" // /* MW 2 */
+ 12749 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first
+ 12750 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12751 "11111011" // /* MW 5 */
+ 12752 "11001110" // /* MW 4 */
+ 12753 "00111001" // /* MW 3 */
+ 12754 "11001110" // /* MW 2 */
+ 12755 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first
+ 12756 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12757 "11101010" // /* MW 5 */
+ 12758 "10110011" // /* MW 4 */
+ 12759 "10111001" // /* MW 3 */
+ 12760 "00110101" // /* MW 2 */
+ 12761 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first
+ 12762 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12763 "01011011" // /* MW 5 */
+ 12764 "01111011" // /* MW 4 */
+ 12765 "00111001" // /* MW 3 */
+ 12766 "11111110" // /* MW 2 */
+ 12767 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12768 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12769 "11100010" // /* MW 5 */
+ 12770 "00110011" // /* MW 4 */
+ 12771 "11111001" // /* MW 3 */
+ 12772 "00100001" // /* MW 2 */
+ 12773 "10010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first
+ 12774 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12775 "00000100" // /* MW 5 */
+ 12776 "11110011" // /* MW 4 */
+ 12777 "00111111" // /* MW 3 */
+ 12778 "10000010" // /* MW 2 */
+ 12779 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first
+ 12780 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12781 "01101101" // /* MW 3 */
+ 12782 "11111111" // /* MW 2 */
+ 12783 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 73
+ 12784 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12785 "11111111" // /* MW 5 */
+ 12786 "10111111" // /* MW 4 */
+ 12787 "00111001" // /* MW 3 */
+ 12788 "01100110" // /* MW 2 */
+ 12789 "11110000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+ 12790 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12791 "11011011" // /* MW 5 */
+ 12792 "11000110" // /* MW 4 */
+ 12793 "00111000" // /* MW 3 */
+ 12794 "10000110" // /* MW 2 */
+ 12795 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first
+ 12796 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12797 "11111111" // /* MW 5 */
+ 12798 "00110001" // /* MW 4 */
+ 12799 "00111001" // /* MW 3 */
+ 12800 "10100100" // /* MW 2 */
+ 12801 "11000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12802 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12803 "11000011" // /* MW 5 */
+ 12804 "11011011" // /* MW 4 */
+ 12805 "00110011" // /* MW 3 */
+ 12806 "11011010" // /* MW 2 */
+ 12807 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12808 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12809 "01011011" // /* MW 5 */
+ 12810 "01000011" // /* MW 4 */
+ 12811 "00111000" // /* MW 3 */
+ 12812 "11001010" // /* MW 2 */
+ 12813 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12814 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12815 "01011011" // /* MW 5 */
+ 12816 "11111100" // /* MW 4 */
+ 12817 "00111001" // /* MW 3 */
+ 12818 "10011110" // /* MW 2 */
+ 12819 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12820 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12821 "11000001" // /* MW 5 */
+ 12822 "11011010" // /* MW 4 */
+ 12823 "00111110" // /* MW 3 */
+ 12824 "11001110" // /* MW 2 */
+ 12825 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+ 12826 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12827 "11110010" // /* MW 5 */
+ 12828 "10111111" // /* MW 4 */
+ 12829 "00011110" // /* MW 3 */
+ 12830 "00100000" // /* MW 2 */
+ 12831 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12832 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12833 "10100011" // /* MW 5 */
+ 12834 "01000011" // /* MW 4 */
+ 12835 "00111000" // /* MW 3 */
+ 12836 "11011010" // /* MW 2 */
+ 12837 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140
+ 12838 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12839 "01011001" // /* MW 9 */
+ 12840 "11111111" // /* MW 8 */
+ 12841 "00001111" // /* MW 7 */
+ 12842 "01101110" // /* MW 6 */
+ 12843 "01101101" // /* MW 5 */
+ 12844 "00011111" // /* MW 4 */
+ 12845 "00110000" // /* MW 3 */
+ 12846 "11000010" // /* MW 2 */
+ 12847 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first
+ 12848 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12849 "10000001" // /* MW 5 */
+ 12850 "01101010" // /* MW 4 */
+ 12851 "00111110" // /* MW 3 */
+ 12852 "11001010" // /* MW 2 */
+ 12853 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+ 12854 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12855 "11000011" // /* MW 5 */
+ 12856 "01010010" // /* MW 4 */
+ 12857 "00111010" // /* MW 3 */
+ 12858 "11101010" // /* MW 2 */
+ 12859 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41
+ 12860 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12861 "00001000" // /* MW 11 */
+ 12862 "00010000" // /* MW 10 */
+ 12863 "01101101" // /* MW 9 */
+ 12864 "10110010" // /* MW 8 */
+ 12865 "00001000" // /* MW 7 */
+ 12866 "10101011" // /* MW 6 */
+ 12867 "01110001" // /* MW 5 */
+ 12868 "00011110" // /* MW 4 */
+ 12869 "00000111" // /* MW 3 */
+ 12870 "00010001" // /* MW 2 */
+ 12871 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first
+ 12872 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12873 "01110001" // /* MW 3 */
+ 12874 "00011110" // /* MW 2 */
+ 12875 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first
+ 12876 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12877 "11111011" // /* MW 5 */
+ 12878 "01010010" // /* MW 4 */
+ 12879 "00111000" // /* MW 3 */
+ 12880 "11000110" // /* MW 2 */
+ 12881 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+ 12882 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12883 "10000011" // /* MW 5 */
+ 12884 "01000010" // /* MW 4 */
+ 12885 "00111100" // /* MW 3 */
+ 12886 "11000010" // /* MW 2 */
+ 12887 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first
+ 12888 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12889 "11111011" // /* MW 5 */
+ 12890 "01010010" // /* MW 4 */
+ 12891 "00111001" // /* MW 3 */
+ 12892 "11000110" // /* MW 2 */
+ 12893 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12894 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12895 "10000011" // /* MW 5 */
+ 12896 "01000010" // /* MW 4 */
+ 12897 "00111100" // /* MW 3 */
+ 12898 "11000010" // /* MW 2 */
+ 12899 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first
+ 12900 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12901 "01010001" // /* MW 3 */
+ 12902 "00011110" // /* MW 2 */
+ 12903 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first
+ 12904 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12905 "00110001" // /* MW 3 */
+ 12906 "00011110" // /* MW 2 */
+ 12907 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first
+ 12908 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12909 "00010001" // /* MW 3 */
+ 12910 "00001010" // /* MW 2 */
+ 12911 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first
+ 12912 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12913 "00001010" // /* MW 3 */
+ 12914 "00000110" // /* MW 2 */
+ 12915 "00000111" // /* MW 1 */
+ 12916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12917 "00000000" // /* MW 1 */
+ 12918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12919 "00000000" // /* MW 1 */
+ 12920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12921 "00000000" // /* MW 1 */
+ 12922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12923 "00000000" // /* MW 1 */
+ 12924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12925 "00000000" // /* MW 1 */
+ 12926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12927 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 58
+ 12928 "10000100" // JZ r16, #12960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12960 delay_slots=5 */
+ 12929 "00000001" // /* MW 5 */
+ 12930 "00000000" // /* MW 4 */
+ 12931 "01010000" // /* MW 3 */
+ 12932 "00011001" // /* MW 2 */
+ 12933 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12934 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12935 "01100000" // /* MW 3 */
+ 12936 "00111011" // /* MW 2 */
+ 12937 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12938 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12939 "00000000" // /* MW 5 */
+ 12940 "10100000" // /* MW 4 */
+ 12941 "00001001" // /* MW 3 */
+ 12942 "01111111" // /* MW 2 */
+ 12943 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12945 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12947 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12949 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12950 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12951 "00000001" // /* MW 9 */
+ 12952 "00100110" // /* MW 8 */
+ 12953 "00000000" // /* MW 7 */
+ 12954 "00000000" // /* MW 6 */
+ 12955 "01011011" // /* MW 5 */
+ 12956 "00000001" // /* MW 4 */
+ 12957 "11110000" // /* MW 3 */
+ 12958 "00101100" // /* MW 2 */
+ 12959 "00000000" // /* MW 1 */
+.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267
+ 12960 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12961 "00010000" // /* MW 9 */
+ 12962 "00110100" // /* MW 8 */
+ 12963 "00110010" // /* MW 7 */
+ 12964 "11110000" // /* MW 6 */
+ 12965 "00000001" // /* MW 5 */
+ 12966 "00000000" // /* MW 4 */
+ 12967 "00100000" // /* MW 3 */
+ 12968 "10000111" // /* MW 2 */
+ 12969 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12970 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12971 "11100010" // /* MW 5 */
+ 12972 "00000100" // /* MW 4 */
+ 12973 "01010000" // /* MW 3 */
+ 12974 "11000000" // /* MW 2 */
+ 12975 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39
+ 12976 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12977 "11101001" // /* MW 5 */
+ 12978 "00000010" // /* MW 4 */
+ 12979 "00100001" // /* MW 3 */
+ 12980 "10000011" // /* MW 2 */
+ 12981 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12982 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12983 "00100101" // /* MW 5 */
+ 12984 "00000001" // /* MW 4 */
+ 12985 "00100000" // /* MW 3 */
+ 12986 "00111110" // /* MW 2 */
+ 12987 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+ 12988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12989 "00000001" // /* MW 5 */
+ 12990 "00000000" // /* MW 4 */
+ 12991 "00000000" // /* MW 3 */
+ 12992 "11111000" // /* MW 2 */
+ 12993 "11111111" // /* MW 1 */
+ 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12995 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 12996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12997 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 12998 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12999 "00010111" // /* MW 3 */
+ 13000 "00000010" // /* MW 2 */
+ 13001 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13002 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13003 "01000001" // /* MW 5 */
+ 13004 "01110000" // /* MW 4 */
+ 13005 "00001111" // /* MW 3 */
+ 13006 "00000000" // /* MW 2 */
+ 13007 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13008 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13009 "00010110" // /* MW 3 */
+ 13010 "01000000" // /* MW 2 */
+ 13011 "00001000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13012 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13013 "11000000" // /* MW 3 */
+ 13014 "01100000" // /* MW 2 */
+ 13015 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13016 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13017 "00000001" // /* MW 3 */
+ 13018 "00000001" // /* MW 2 */
+ 13019 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0
+ 13023 "00000000" // /* MW 1 */
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 2 "conv2d_dw_bf16.h" 199 first
+.function_start
+ 13024 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13025 "11000000" // /* MW 3 */
+ 13026 "01010110" // /* MW 2 */
+ 13027 "00011100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 82
+ 13028 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13029 "10101001" // /* MW 5 */
+ 13030 "00000001" // /* MW 4 */
+ 13031 "11011110" // /* MW 3 */
+ 13032 "10010011" // /* MW 2 */
+ 13033 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 1 "io_buffer_main.h" 125 25
+ 13034 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13035 "00000010" // /* MW 5 */
+ 13036 "11010001" // /* MW 4 */
+ 13037 "11010110" // /* MW 3 */
+ 13038 "10000011" // /* MW 2 */
+ 13039 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 204 82 first
+ 13040 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13041 "10001010" // /* MW 3 */
+ 13042 "11101000" // /* MW 2 */
+ 13043 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4 first
+ 13044 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13045 "01000110" // /* MW 3 */
+ 13046 "11111101" // /* MW 2 */
+ 13047 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13048 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13049 "00100110" // /* MW 3 */
+ 13050 "00111101" // /* MW 2 */
+ 13051 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13052 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13053 "01000110" // /* MW 3 */
+ 13054 "11111111" // /* MW 2 */
+ 13055 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13056 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13057 "00100110" // /* MW 3 */
+ 13058 "00101111" // /* MW 2 */
+ 13059 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13060 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13061 "00000110" // /* MW 3 */
+ 13062 "00101101" // /* MW 2 */
+ 13063 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4 first
+ 13064 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13065 "01000110" // /* MW 3 */
+ 13066 "11111100" // /* MW 2 */
+ 13067 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13068 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13069 "00100110" // /* MW 3 */
+ 13070 "00111100" // /* MW 2 */
+ 13071 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13072 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13073 "01000110" // /* MW 3 */
+ 13074 "11111110" // /* MW 2 */
+ 13075 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13076 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13077 "00100110" // /* MW 3 */
+ 13078 "00101110" // /* MW 2 */
+ 13079 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13080 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13081 "00000110" // /* MW 3 */
+ 13082 "00101100" // /* MW 2 */
+ 13083 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4 first
+ 13084 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13085 "11000110" // /* MW 3 */
+ 13086 "11111100" // /* MW 2 */
+ 13087 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13088 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13089 "10100110" // /* MW 3 */
+ 13090 "00111100" // /* MW 2 */
+ 13091 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13092 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13093 "11000110" // /* MW 3 */
+ 13094 "11111110" // /* MW 2 */
+ 13095 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13096 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13097 "10100110" // /* MW 3 */
+ 13098 "00101110" // /* MW 2 */
+ 13099 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13100 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13101 "10000110" // /* MW 3 */
+ 13102 "00101100" // /* MW 2 */
+ 13103 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4 first
+ 13104 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13105 "11000110" // /* MW 3 */
+ 13106 "11111111" // /* MW 2 */
+ 13107 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+ 13108 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13109 "10100110" // /* MW 3 */
+ 13110 "00101111" // /* MW 2 */
+ 13111 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13112 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13113 "00010000" // /* MW 9 */
+ 13114 "00110100" // /* MW 8 */
+ 13115 "00110010" // /* MW 7 */
+ 13116 "11110010" // /* MW 6 */
+ 13117 "00000001" // /* MW 5 */
+ 13118 "00000000" // /* MW 4 */
+ 13119 "11010000" // /* MW 3 */
+ 13120 "11110000" // /* MW 2 */
+ 13121 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 13122 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13123 "10000001" // /* MW 5 */
+ 13124 "11000101" // /* MW 4 */
+ 13125 "01011000" // /* MW 3 */
+ 13126 "10011000" // /* MW 2 */
+ 13127 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13128 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13129 "00010000" // /* MW 3 */
+ 13130 "00001111" // /* MW 2 */
+ 13131 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+ 13132 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13133 "01011000" // /* MW 11 */
+ 13134 "00000000" // /* MW 10 */
+ 13135 "01100000" // /* MW 9 */
+ 13136 "01101010" // /* MW 8 */
+ 13137 "00100000" // /* MW 7 */
+ 13138 "00000000" // /* MW 6 */
+ 13139 "01101000" // /* MW 5 */
+ 13140 "00111011" // /* MW 4 */
+ 13141 "01110000" // /* MW 3 */
+ 13142 "10000101" // /* MW 2 */
+ 13143 "10000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43 first
+.src_ref 2 "conv2d_dw_bf16.h" 225 4 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13144 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 13145 "01100000" // /* MW 13 */
+ 13146 "00001001" // /* MW 12 */
+ 13147 "01100010" // /* MW 11 */
+ 13148 "00001011" // /* MW 10 */
+ 13149 "00010000" // /* MW 9 */
+ 13150 "11100000" // /* MW 8 */
+ 13151 "00101101" // /* MW 7 */
+ 13152 "00000100" // /* MW 6 */
+ 13153 "11101001" // /* MW 5 */
+ 13154 "00111000" // /* MW 4 */
+ 13155 "11010000" // /* MW 3 */
+ 13156 "10111000" // /* MW 2 */
+ 13157 "01111111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13158 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13159 "01110010" // /* MW 9 */
+ 13160 "10010000" // /* MW 8 */
+ 13161 "10000000" // /* MW 7 */
+ 13162 "00000010" // /* MW 6 */
+ 13163 "01001011" // /* MW 5 */
+ 13164 "00001100" // /* MW 4 */
+ 13165 "11010001" // /* MW 3 */
+ 13166 "10110100" // /* MW 2 */
+ 13167 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13168 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13169 "01111110" // /* MW 9 */
+ 13170 "11000000" // /* MW 8 */
+ 13171 "11100001" // /* MW 7 */
+ 13172 "00000011" // /* MW 6 */
+ 13173 "10010000" // /* MW 5 */
+ 13174 "10101011" // /* MW 4 */
+ 13175 "11010001" // /* MW 3 */
+ 13176 "00110000" // /* MW 2 */
+ 13177 "01101101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 13178 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13179 "01011110" // /* MW 9 */
+ 13180 "10010000" // /* MW 8 */
+ 13181 "00000111" // /* MW 7 */
+ 13182 "00000010" // /* MW 6 */
+ 13183 "11110100" // /* MW 5 */
+ 13184 "11110000" // /* MW 4 */
+ 13185 "11010001" // /* MW 3 */
+ 13186 "00001010" // /* MW 2 */
+ 13187 "01111001" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13188 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13189 "10000010" // /* MW 5 */
+ 13190 "00000000" // /* MW 4 */
+ 13191 "01010000" // /* MW 3 */
+ 13192 "00011110" // /* MW 2 */
+ 13193 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+ 13194 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13195 "00010000" // /* MW 11 */
+ 13196 "11111000" // /* MW 10 */
+ 13197 "01111001" // /* MW 9 */
+ 13198 "00001100" // /* MW 8 */
+ 13199 "00000000" // /* MW 7 */
+ 13200 "00000000" // /* MW 6 */
+ 13201 "01001011" // /* MW 5 */
+ 13202 "00010000" // /* MW 4 */
+ 13203 "11010110" // /* MW 3 */
+ 13204 "11000000" // /* MW 2 */
+ 13205 "01101001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+ 13206 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13207 "00010000" // /* MW 11 */
+ 13208 "00101000" // /* MW 10 */
+ 13209 "10111010" // /* MW 9 */
+ 13210 "00001101" // /* MW 8 */
+ 13211 "00000000" // /* MW 7 */
+ 13212 "00000000" // /* MW 6 */
+ 13213 "01001011" // /* MW 5 */
+ 13214 "00010000" // /* MW 4 */
+ 13215 "11010010" // /* MW 3 */
+ 13216 "10010010" // /* MW 2 */
+ 13217 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13218 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13219 "00000101" // /* MW 5 */
+ 13220 "01100001" // /* MW 4 */
+ 13221 "10000100" // /* MW 3 */
+ 13222 "00010110" // /* MW 2 */
+ 13223 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+ 13224 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13225 "10001010" // /* MW 3 */
+ 13226 "00000000" // /* MW 2 */
+ 13227 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 244 4
+ 13228 "10111010" // LDA r5, [p3]; MOVXM p3, #13456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13229 "00010000" // /* MW 9 */
+ 13230 "01001000" // /* MW 8 */
+ 13231 "10110010" // /* MW 7 */
+ 13232 "00001101" // /* MW 6 */
+ 13233 "00000000" // /* MW 5 */
+ 13234 "00000000" // /* MW 4 */
+ 13235 "11010000" // /* MW 3 */
+ 13236 "10010110" // /* MW 2 */
+ 13237 "01100000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+ 13238 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13239 "10101000" // /* MW 9 */
+ 13240 "00000001" // /* MW 8 */
+ 13241 "10001110" // /* MW 7 */
+ 13242 "00001010" // /* MW 6 */
+ 13243 "00010100" // /* MW 5 */
+ 13244 "00000000" // /* MW 4 */
+ 13245 "11110000" // /* MW 3 */
+ 13246 "00101100" // /* MW 2 */
+ 13247 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.src_ref 2 "conv2d_dw_bf16.h" 271 12
+.src_ref 2 "conv2d_dw_bf16.h" 272 12
+.src_ref 2 "conv2d_dw_bf16.h" 273 12
+.src_ref 2 "conv2d_dw_bf16.h" 274 12
+.src_ref 2 "conv2d_dw_bf16.h" 275 12
+.src_ref 2 "conv2d_dw_bf16.h" 276 12
+.src_ref 2 "conv2d_dw_bf16.h" 277 12
+ 13248 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13249 "00000000" // /* MW 15 */
+ 13250 "00000000" // /* MW 14 */
+ 13251 "01111000" // /* MW 13 */
+ 13252 "10111001" // /* MW 12 */
+ 13253 "00001110" // /* MW 11 */
+ 13254 "00001000" // /* MW 10 */
+ 13255 "00110110" // /* MW 9 */
+ 13256 "00000000" // /* MW 8 */
+ 13257 "01011011" // /* MW 7 */
+ 13258 "00000001" // /* MW 6 */
+ 13259 "00100000" // /* MW 5 */
+ 13260 "00000000" // /* MW 4 */
+ 13261 "00000000" // /* MW 3 */
+ 13262 "10010001" // /* MW 2 */
+ 13263 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13264 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13265 "01101010" // /* MW 15 */
+ 13266 "01100011" // /* MW 14 */
+ 13267 "10101100" // /* MW 13 */
+ 13268 "00000011" // /* MW 12 */
+ 13269 "00001110" // /* MW 11 */
+ 13270 "00000010" // /* MW 10 */
+ 13271 "11010100" // /* MW 9 */
+ 13272 "00001101" // /* MW 8 */
+ 13273 "01001011" // /* MW 7 */
+ 13274 "00010000" // /* MW 6 */
+ 13275 "00100000" // /* MW 5 */
+ 13276 "00000000" // /* MW 4 */
+ 13277 "11110000" // /* MW 3 */
+ 13278 "00101100" // /* MW 2 */
+ 13279 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13280 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13281 "00011010" // /* MW 15 */
+ 13282 "01001000" // /* MW 14 */
+ 13283 "11001100" // /* MW 13 */
+ 13284 "00111111" // /* MW 12 */
+ 13285 "10111001" // /* MW 11 */
+ 13286 "11011010" // /* MW 10 */
+ 13287 "00101111" // /* MW 9 */
+ 13288 "00000100" // /* MW 8 */
+ 13289 "01001011" // /* MW 7 */
+ 13290 "00010000" // /* MW 6 */
+ 13291 "00100101" // /* MW 5 */
+ 13292 "00000000" // /* MW 4 */
+ 13293 "11010000" // /* MW 3 */
+ 13294 "10100011" // /* MW 2 */
+ 13295 "01000000" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+.loop_nesting 1
+ 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13297 "01101110" // /* MW 9 */
+ 13298 "10000001" // /* MW 8 */
+ 13299 "10000100" // /* MW 7 */
+ 13300 "00000010" // /* MW 6 */
+ 13301 "11110100" // /* MW 5 */
+ 13302 "11110000" // /* MW 4 */
+ 13303 "01110001" // /* MW 3 */
+ 13304 "10110011" // /* MW 2 */
+ 13305 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13307 "00000001" // /* MW 9 */
+ 13308 "10001001" // /* MW 8 */
+ 13309 "10001010" // /* MW 7 */
+ 13310 "01000110" // /* MW 6 */
+ 13311 "00001011" // /* MW 5 */
+ 13312 "10011100" // /* MW 4 */
+ 13313 "11101010" // /* MW 3 */
+ 13314 "00111000" // /* MW 2 */
+ 13315 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13317 "00000001" // /* MW 9 */
+ 13318 "00110101" // /* MW 8 */
+ 13319 "10001001" // /* MW 7 */
+ 13320 "11000110" // /* MW 6 */
+ 13321 "10000110" // /* MW 5 */
+ 13322 "00110000" // /* MW 4 */
+ 13323 "01101010" // /* MW 3 */
+ 13324 "10110001" // /* MW 2 */
+ 13325 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13327 "00000110" // /* MW 3 */
+ 13328 "10001001" // /* MW 2 */
+ 13329 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13331 "10100001" // /* MW 7 */
+ 13332 "01001000" // /* MW 6 */
+ 13333 "10001100" // /* MW 5 */
+ 13334 "11000110" // /* MW 4 */
+ 13335 "10001110" // /* MW 3 */
+ 13336 "10110000" // /* MW 2 */
+ 13337 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13339 "10100001" // /* MW 7 */
+ 13340 "00110110" // /* MW 6 */
+ 13341 "10001010" // /* MW 5 */
+ 13342 "01000110" // /* MW 4 */
+ 13343 "00001111" // /* MW 3 */
+ 13344 "10011100" // /* MW 2 */
+ 13345 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13347 "00001110" // /* MW 3 */
+ 13348 "10001001" // /* MW 2 */
+ 13349 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13351 "11100001" // /* MW 7 */
+ 13352 "10010010" // /* MW 6 */
+ 13353 "10001011" // /* MW 5 */
+ 13354 "01000110" // /* MW 4 */
+ 13355 "00000011" // /* MW 3 */
+ 13356 "00011100" // /* MW 2 */
+ 13357 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13359 "11100001" // /* MW 7 */
+ 13360 "01010110" // /* MW 6 */
+ 13361 "10001000" // /* MW 5 */
+ 13362 "01000110" // /* MW 4 */
+ 13363 "00000111" // /* MW 3 */
+ 13364 "00011100" // /* MW 2 */
+ 13365 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13366 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13367 "01101110" // /* MW 9 */
+ 13368 "01000001" // /* MW 8 */
+ 13369 "00011000" // /* MW 7 */
+ 13370 "00000001" // /* MW 6 */
+ 13371 "00010000" // /* MW 5 */
+ 13372 "00000000" // /* MW 4 */
+ 13373 "11110000" // /* MW 3 */
+ 13374 "00101100" // /* MW 2 */
+ 13375 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13377 "01101010" // /* MW 15 */
+ 13378 "01100011" // /* MW 14 */
+ 13379 "01111100" // /* MW 13 */
+ 13380 "10100101" // /* MW 12 */
+ 13381 "00000001" // /* MW 11 */
+ 13382 "00000000" // /* MW 10 */
+ 13383 "00000000" // /* MW 9 */
+ 13384 "00000000" // /* MW 8 */
+ 13385 "01011011" // /* MW 7 */
+ 13386 "00000001" // /* MW 6 */
+ 13387 "00100000" // /* MW 5 */
+ 13388 "00000000" // /* MW 4 */
+ 13389 "11110000" // /* MW 3 */
+ 13390 "00101100" // /* MW 2 */
+ 13391 "00000000" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 13392 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13393 "00011010" // /* MW 15 */
+ 13394 "01001000" // /* MW 14 */
+ 13395 "01111100" // /* MW 13 */
+ 13396 "10100101" // /* MW 12 */
+ 13397 "00000001" // /* MW 11 */
+ 13398 "00000000" // /* MW 10 */
+ 13399 "00000000" // /* MW 9 */
+ 13400 "00000000" // /* MW 8 */
+ 13401 "01011011" // /* MW 7 */
+ 13402 "00000001" // /* MW 6 */
+ 13403 "00100000" // /* MW 5 */
+ 13404 "00000000" // /* MW 4 */
+ 13405 "11110000" // /* MW 3 */
+ 13406 "00101100" // /* MW 2 */
+ 13407 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13408 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13409 "01101110" // /* MW 9 */
+ 13410 "10000001" // /* MW 8 */
+ 13411 "10000100" // /* MW 7 */
+ 13412 "00000010" // /* MW 6 */
+ 13413 "10010000" // /* MW 5 */
+ 13414 "01110011" // /* MW 4 */
+ 13415 "11110100" // /* MW 3 */
+ 13416 "00001100" // /* MW 2 */
+ 13417 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13418 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13419 "00000001" // /* MW 7 */
+ 13420 "10001001" // /* MW 6 */
+ 13421 "10001010" // /* MW 5 */
+ 13422 "01000110" // /* MW 4 */
+ 13423 "00001011" // /* MW 3 */
+ 13424 "10011100" // /* MW 2 */
+ 13425 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13426 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13427 "00000001" // /* MW 7 */
+ 13428 "00110101" // /* MW 6 */
+ 13429 "10001001" // /* MW 5 */
+ 13430 "11000110" // /* MW 4 */
+ 13431 "10000110" // /* MW 3 */
+ 13432 "00110000" // /* MW 2 */
+ 13433 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13434 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13435 "00000110" // /* MW 3 */
+ 13436 "10001001" // /* MW 2 */
+ 13437 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13438 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13439 "10100001" // /* MW 7 */
+ 13440 "01001000" // /* MW 6 */
+ 13441 "10001100" // /* MW 5 */
+ 13442 "01000110" // /* MW 4 */
+ 13443 "00001111" // /* MW 3 */
+ 13444 "10011100" // /* MW 2 */
+ 13445 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13446 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13447 "10100001" // /* MW 9 */
+ 13448 "00110110" // /* MW 8 */
+ 13449 "10001010" // /* MW 7 */
+ 13450 "11000010" // /* MW 6 */
+ 13451 "10001110" // /* MW 5 */
+ 13452 "10110000" // /* MW 4 */
+ 13453 "11110100" // /* MW 3 */
+ 13454 "00101100" // /* MW 2 */
+ 13455 "00000000" // /* MW 1 */
+.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13456 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13457 "00011101" // /* MW 5 */
+ 13458 "00010010" // /* MW 4 */
+ 13459 "10001011" // /* MW 3 */
+ 13460 "00011110" // /* MW 2 */
+ 13461 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13462 "01011010" // MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13463 "11100001" // /* MW 9 */
+ 13464 "10010010" // /* MW 8 */
+ 13465 "10001011" // /* MW 7 */
+ 13466 "00000010" // /* MW 6 */
+ 13467 "01010100" // /* MW 5 */
+ 13468 "10110111" // /* MW 4 */
+ 13469 "00000001" // /* MW 3 */
+ 13470 "00000000" // /* MW 2 */
+ 13471 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13472 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13473 "11100001" // /* MW 11 */
+ 13474 "01010110" // /* MW 10 */
+ 13475 "10001000" // /* MW 9 */
+ 13476 "00000010" // /* MW 8 */
+ 13477 "01001111" // /* MW 7 */
+ 13478 "10001111" // /* MW 6 */
+ 13479 "00000001" // /* MW 5 */
+ 13480 "00000000" // /* MW 4 */
+ 13481 "01110000" // /* MW 3 */
+ 13482 "10000101" // /* MW 2 */
+ 13483 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13484 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13485 "01111111" // /* MW 3 */
+ 13486 "01110010" // /* MW 2 */
+ 13487 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13488 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13489 "10011011" // /* MW 3 */
+ 13490 "00011101" // /* MW 2 */
+ 13491 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13492 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13493 "01110100" // /* MW 3 */
+ 13494 "00011100" // /* MW 2 */
+ 13495 "00111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13496 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13497 "10110100" // /* MW 3 */
+ 13498 "01011000" // /* MW 2 */
+ 13499 "00111000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13500 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13501 "10010110" // /* MW 3 */
+ 13502 "00010001" // /* MW 2 */
+ 13503 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13504 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13505 "00010110" // /* MW 3 */
+ 13506 "00010000" // /* MW 2 */
+ 13507 "00001011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13508 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13509 "01101100" // /* MW 3 */
+ 13510 "01010000" // /* MW 2 */
+ 13511 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13512 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13513 "00010100" // /* MW 3 */
+ 13514 "01010011" // /* MW 2 */
+ 13515 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13516 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13517 "01110000" // /* MW 7 */
+ 13518 "00110110" // /* MW 6 */
+ 13519 "10101000" // /* MW 5 */
+ 13520 "00000010" // /* MW 4 */
+ 13521 "01100000" // /* MW 3 */
+ 13522 "01000010" // /* MW 2 */
+ 13523 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13524 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13525 "00000011" // /* MW 3 */
+ 13526 "00011100" // /* MW 2 */
+ 13527 "00011101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13528 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13529 "01110000" // /* MW 7 */
+ 13530 "01000101" // /* MW 6 */
+ 13531 "10000000" // /* MW 5 */
+ 13532 "00000001" // /* MW 4 */
+ 13533 "01100000" // /* MW 3 */
+ 13534 "01010010" // /* MW 2 */
+ 13535 "01000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13536 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13537 "01000001" // /* MW 7 */
+ 13538 "01101101" // /* MW 6 */
+ 13539 "10001100" // /* MW 5 */
+ 13540 "01000110" // /* MW 4 */
+ 13541 "00000111" // /* MW 3 */
+ 13542 "00011100" // /* MW 2 */
+ 13543 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13544 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13545 "01000001" // /* MW 7 */
+ 13546 "00000011" // /* MW 6 */
+ 13547 "10001001" // /* MW 5 */
+ 13548 "11000110" // /* MW 4 */
+ 13549 "10000010" // /* MW 3 */
+ 13550 "00110000" // /* MW 2 */
+ 13551 "00000010" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+.loop_nesting 2
+ 13552 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13553 "01101110" // /* MW 9 */
+ 13554 "10000001" // /* MW 8 */
+ 13555 "10000100" // /* MW 7 */
+ 13556 "00000010" // /* MW 6 */
+ 13557 "11110100" // /* MW 5 */
+ 13558 "11110000" // /* MW 4 */
+ 13559 "01110001" // /* MW 3 */
+ 13560 "10110011" // /* MW 2 */
+ 13561 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13562 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13563 "00000001" // /* MW 9 */
+ 13564 "10001001" // /* MW 8 */
+ 13565 "10001010" // /* MW 7 */
+ 13566 "01000110" // /* MW 6 */
+ 13567 "00001011" // /* MW 5 */
+ 13568 "10011100" // /* MW 4 */
+ 13569 "11101010" // /* MW 3 */
+ 13570 "00111000" // /* MW 2 */
+ 13571 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13572 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13573 "00000001" // /* MW 9 */
+ 13574 "00110101" // /* MW 8 */
+ 13575 "10001001" // /* MW 7 */
+ 13576 "11000110" // /* MW 6 */
+ 13577 "10000110" // /* MW 5 */
+ 13578 "00110000" // /* MW 4 */
+ 13579 "01101010" // /* MW 3 */
+ 13580 "10110001" // /* MW 2 */
+ 13581 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13582 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13583 "00000110" // /* MW 3 */
+ 13584 "10001001" // /* MW 2 */
+ 13585 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13586 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13587 "10100001" // /* MW 7 */
+ 13588 "01001000" // /* MW 6 */
+ 13589 "10001100" // /* MW 5 */
+ 13590 "11000110" // /* MW 4 */
+ 13591 "10001110" // /* MW 3 */
+ 13592 "10110000" // /* MW 2 */
+ 13593 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13594 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13595 "10100001" // /* MW 7 */
+ 13596 "00110110" // /* MW 6 */
+ 13597 "10001010" // /* MW 5 */
+ 13598 "01000110" // /* MW 4 */
+ 13599 "00001111" // /* MW 3 */
+ 13600 "10011100" // /* MW 2 */
+ 13601 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13602 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13603 "00001110" // /* MW 3 */
+ 13604 "10001001" // /* MW 2 */
+ 13605 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13606 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13607 "11100001" // /* MW 7 */
+ 13608 "10010010" // /* MW 6 */
+ 13609 "10001011" // /* MW 5 */
+ 13610 "01000110" // /* MW 4 */
+ 13611 "00000011" // /* MW 3 */
+ 13612 "00011100" // /* MW 2 */
+ 13613 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13614 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13615 "11100001" // /* MW 7 */
+ 13616 "01010110" // /* MW 6 */
+ 13617 "10001000" // /* MW 5 */
+ 13618 "01000110" // /* MW 4 */
+ 13619 "00000111" // /* MW 3 */
+ 13620 "00011100" // /* MW 2 */
+ 13621 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13622 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13623 "00000101" // /* MW 5 */
+ 13624 "01100001" // /* MW 4 */
+ 13625 "11110100" // /* MW 3 */
+ 13626 "00101100" // /* MW 2 */
+ 13627 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13628 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13629 "01000001" // /* MW 3 */
+ 13630 "01101101" // /* MW 2 */
+ 13631 "10001100" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 13632 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13633 "00011010" // /* MW 15 */
+ 13634 "01001000" // /* MW 14 */
+ 13635 "01111100" // /* MW 13 */
+ 13636 "10100101" // /* MW 12 */
+ 13637 "00000001" // /* MW 11 */
+ 13638 "00000000" // /* MW 10 */
+ 13639 "00000000" // /* MW 9 */
+ 13640 "00000000" // /* MW 8 */
+ 13641 "01011011" // /* MW 7 */
+ 13642 "00000001" // /* MW 6 */
+ 13643 "00100000" // /* MW 5 */
+ 13644 "00000000" // /* MW 4 */
+ 13645 "11110000" // /* MW 3 */
+ 13646 "00101100" // /* MW 2 */
+ 13647 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 4 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13648 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 13649 "01101000" // /* MW 11 */
+ 13650 "10000001" // /* MW 10 */
+ 13651 "10000100" // /* MW 9 */
+ 13652 "00000010" // /* MW 8 */
+ 13653 "00100111" // /* MW 7 */
+ 13654 "00000100" // /* MW 6 */
+ 13655 "00100000" // /* MW 5 */
+ 13656 "11100111" // /* MW 4 */
+ 13657 "11111000" // /* MW 3 */
+ 13658 "00001100" // /* MW 2 */
+ 13659 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13660 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13661 "00000001" // /* MW 7 */
+ 13662 "10001001" // /* MW 6 */
+ 13663 "10001010" // /* MW 5 */
+ 13664 "01000110" // /* MW 4 */
+ 13665 "00001011" // /* MW 3 */
+ 13666 "10011100" // /* MW 2 */
+ 13667 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13668 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13669 "00000001" // /* MW 7 */
+ 13670 "00110101" // /* MW 6 */
+ 13671 "10001001" // /* MW 5 */
+ 13672 "11000110" // /* MW 4 */
+ 13673 "10000110" // /* MW 3 */
+ 13674 "00110000" // /* MW 2 */
+ 13675 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13676 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13677 "00000110" // /* MW 3 */
+ 13678 "10001001" // /* MW 2 */
+ 13679 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13680 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13681 "10100001" // /* MW 7 */
+ 13682 "01001000" // /* MW 6 */
+ 13683 "10001100" // /* MW 5 */
+ 13684 "01000110" // /* MW 4 */
+ 13685 "00001111" // /* MW 3 */
+ 13686 "10011100" // /* MW 2 */
+ 13687 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13688 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13689 "10100001" // /* MW 7 */
+ 13690 "00110110" // /* MW 6 */
+ 13691 "10001010" // /* MW 5 */
+ 13692 "11000110" // /* MW 4 */
+ 13693 "10001110" // /* MW 3 */
+ 13694 "10110000" // /* MW 2 */
+ 13695 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13696 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13697 "00001110" // /* MW 3 */
+ 13698 "10001001" // /* MW 2 */
+ 13699 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13700 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13701 "11100001" // /* MW 3 */
+ 13702 "10010010" // /* MW 2 */
+ 13703 "10001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13704 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13705 "11100001" // /* MW 3 */
+ 13706 "01010110" // /* MW 2 */
+ 13707 "10001000" // /* MW 1 */
+ 13708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13709 "00000000" // /* MW 1 */
+ 13710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13711 "00000000" // /* MW 1 */
+ 13712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13713 "00000000" // /* MW 1 */
+ 13714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13715 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+ 13716 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13717 "10010110" // /* MW 3 */
+ 13718 "00010001" // /* MW 2 */
+ 13719 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 290 first
+ 13720 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13721 "00000000" // /* MW 5 */
+ 13722 "01010000" // /* MW 4 */
+ 13723 "11000000" // /* MW 3 */
+ 13724 "00000010" // /* MW 2 */
+ 13725 "01100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13726 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13727 "01101100" // /* MW 3 */
+ 13728 "01010000" // /* MW 2 */
+ 13729 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.delay_slot
+ 13730 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13731 "00010100" // /* MW 3 */
+ 13732 "01010011" // /* MW 2 */
+ 13733 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13734 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13735 "01101100" // /* MW 3 */
+ 13736 "01010000" // /* MW 2 */
+ 13737 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.delay_slot
+ 13738 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13739 "00010011" // /* MW 3 */
+ 13740 "10001010" // /* MW 2 */
+ 13741 "00001010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33
+.delay_slot
+ 13742 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13743 "10010011" // /* MW 3 */
+ 13744 "00111010" // /* MW 2 */
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0
+ 13745 "00001010" // /* MW 1 */
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 444 first
+.src_ref 7 "superkernels.cpp" 449 6
+.function_start
+ 13760 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13761 "10000000" // /* MW 5 */
+ 13762 "11001000" // /* MW 4 */
+ 13763 "11001000" // /* MW 3 */
+ 13764 "00000111" // /* MW 2 */
+ 13765 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6 first
+ 13766 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13767 "01000001" // /* MW 5 */
+ 13768 "00101111" // /* MW 4 */
+ 13769 "11010000" // /* MW 3 */
+ 13770 "11000010" // /* MW 2 */
+ 13771 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 444
+ 13772 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13773 "00000001" // /* MW 5 */
+ 13774 "00000000" // /* MW 4 */
+ 13775 "00000000" // /* MW 3 */
+ 13776 "00010000" // /* MW 2 */
+ 13777 "00000000" // /* MW 1 */
+ 13778 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13779 "01110000" // /* MW 7 */
+ 13780 "01110000" // /* MW 6 */
+ 13781 "00101101" // /* MW 5 */
+ 13782 "00000010" // /* MW 4 */
+ 13783 "10110000" // /* MW 3 */
+ 13784 "00111010" // /* MW 2 */
+ 13785 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+ 13786 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13787 "01110000" // /* MW 7 */
+ 13788 "11110000" // /* MW 6 */
+ 13789 "10101000" // /* MW 5 */
+ 13790 "00000001" // /* MW 4 */
+ 13791 "10110000" // /* MW 3 */
+ 13792 "10110110" // /* MW 2 */
+ 13793 "11111111" // /* MW 1 */
+ 13794 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13795 "00011101" // /* MW 3 */
+ 13796 "11101100" // /* MW 2 */
+ 13797 "00001111" // /* MW 1 */
+ 13798 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13799 "10011101" // /* MW 3 */
+ 13800 "11110111" // /* MW 2 */
+ 13801 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+ 13802 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13803 "01110000" // /* MW 7 */
+ 13804 "01100000" // /* MW 6 */
+ 13805 "11001010" // /* MW 5 */
+ 13806 "00000001" // /* MW 4 */
+ 13807 "10110000" // /* MW 3 */
+ 13808 "00000010" // /* MW 2 */
+ 13809 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6
+.src_ref 7 "superkernels.cpp" 449 16
+ 13810 "10000100" // JNZ r16, #13936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13936 delay_slots=5 */
+ 13811 "00000001" // /* MW 5 */
+ 13812 "01000000" // /* MW 4 */
+ 13813 "00111000" // /* MW 3 */
+ 13814 "00011011" // /* MW 2 */
+ 13815 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 13816 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13817 "11000000" // /* MW 3 */
+ 13818 "11010110" // /* MW 2 */
+ 13819 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 22 first
+.delay_slot
+ 13820 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13821 "10010000" // /* MW 3 */
+ 13822 "01100010" // /* MW 2 */
+ 13823 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 30
+.delay_slot
+ 13824 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13825 "11111011" // /* MW 3 */
+ 13826 "01100011" // /* MW 2 */
+ 13827 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13828 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13829 "10100000" // /* MW 5 */
+ 13830 "11001000" // /* MW 4 */
+ 13831 "11000110" // /* MW 3 */
+ 13832 "00000111" // /* MW 2 */
+ 13833 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13834 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13835 "00110001" // /* MW 3 */
+ 13836 "00000110" // /* MW 2 */
+ 13837 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13838 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13839 "00010001" // /* MW 9 */
+ 13840 "00110100" // /* MW 8 */
+ 13841 "10110010" // /* MW 7 */
+ 13842 "11110000" // /* MW 6 */
+ 13843 "00000001" // /* MW 5 */
+ 13844 "00000000" // /* MW 4 */
+ 13845 "01100000" // /* MW 3 */
+ 13846 "10010001" // /* MW 2 */
+ 13847 "11110000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13848 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13849 "00010000" // /* MW 11 */
+ 13850 "00110010" // /* MW 10 */
+ 13851 "10110010" // /* MW 9 */
+ 13852 "11110000" // /* MW 8 */
+ 13853 "00000001" // /* MW 7 */
+ 13854 "00000000" // /* MW 6 */
+ 13855 "10001011" // /* MW 5 */
+ 13856 "10001000" // /* MW 4 */
+ 13857 "11100000" // /* MW 3 */
+ 13858 "11000000" // /* MW 2 */
+ 13859 "00100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13861 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 13862 "00000100" // JL #12352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12352 delay_slots=5 */
+ 13863 "00000001" // /* MW 5 */
+ 13864 "00000000" // /* MW 4 */
+ 13865 "00100000" // /* MW 3 */
+ 13866 "00011000" // /* MW 2 */
+ 13867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13869 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13871 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13872 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13873 "00110001" // /* MW 3 */
+ 13874 "00100000" // /* MW 2 */
+ 13875 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 13876 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13877 "00000101" // /* MW 3 */
+ 13878 "00100000" // /* MW 2 */
+ 13879 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 13880 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13881 "01110000" // /* MW 7 */
+ 13882 "10100101" // /* MW 6 */
+ 13883 "00000001" // /* MW 5 */
+ 13884 "00000000" // /* MW 4 */
+ 13885 "00110000" // /* MW 3 */
+ 13886 "11000010" // /* MW 2 */
+ 13887 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+.src_ref 7 "superkernels.cpp" 461 2
+.return_address
+ 13888 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13889 "00000000" // /* MW 7 */
+ 13890 "10000010" // /* MW 6 */
+ 13891 "00110011" // /* MW 5 */
+ 13892 "00000001" // /* MW 4 */
+ 13893 "01100000" // /* MW 3 */
+ 13894 "10010001" // /* MW 2 */
+ 13895 "00110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 17 first
+ 13896 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13897 "00111010" // /* MW 3 */
+ 13898 "00000110" // /* MW 2 */
+ 13899 "00000010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13
+.src_ref 7 "superkernels.cpp" 453 15 first
+ 13900 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13901 "00010000" // /* MW 9 */
+ 13902 "00110000" // /* MW 8 */
+ 13903 "00110010" // /* MW 7 */
+ 13904 "11110001" // /* MW 6 */
+ 13905 "00000001" // /* MW 5 */
+ 13906 "00000000" // /* MW 4 */
+ 13907 "01010000" // /* MW 3 */
+ 13908 "11000011" // /* MW 2 */
+ 13909 "01000100" // /* MW 1 */
+ 13910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13911 "00000000" // /* MW 1 */
+ 13912 "10000100" // J #13952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13952 delay_slots=5 */
+ 13913 "00000000" // /* MW 5 */
+ 13914 "00000000" // /* MW 4 */
+ 13915 "01000000" // /* MW 3 */
+ 13916 "00011011" // /* MW 2 */
+ 13917 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15
+.src_ref 7 "superkernels.cpp" 457 26
+.delay_slot
+ 13918 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13919 "10110000" // /* MW 5 */
+ 13920 "11001000" // /* MW 4 */
+ 13921 "11000110" // /* MW 3 */
+ 13922 "00000111" // /* MW 2 */
+ 13923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13927 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15 first
+.delay_slot
+ 13928 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13929 "00110001" // /* MW 3 */
+ 13930 "00000110" // /* MW 2 */
+ 13931 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13 first
+.delay_slot
+ 13932 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13933 "00010001" // /* MW 3 */
+ 13934 "00000110" // /* MW 2 */
+ 13935 "00001010" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176
+.src_ref 7 "superkernels.cpp" 457 26
+ 13936 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13937 "00000000" // /* MW 15 */
+ 13938 "00000000" // /* MW 14 */
+ 13939 "00010000" // /* MW 13 */
+ 13940 "00101100" // /* MW 12 */
+ 13941 "10110010" // /* MW 11 */
+ 13942 "11110001" // /* MW 10 */
+ 13943 "00000001" // /* MW 9 */
+ 13944 "00000000" // /* MW 8 */
+ 13945 "01011011" // /* MW 7 */
+ 13946 "00000001" // /* MW 6 */
+ 13947 "00100000" // /* MW 5 */
+ 13948 "00000000" // /* MW 4 */
+ 13949 "11110000" // /* MW 3 */
+ 13950 "00101100" // /* MW 2 */
+ 13951 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 13952 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13953 "10000110" // /* MW 3 */
+ 13954 "01100111" // /* MW 2 */
+ 13955 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15
+.src_ref 1 "io_buffer_main.h" 218 49
+ 13956 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13957 "00010000" // /* MW 9 */
+ 13958 "00101000" // /* MW 8 */
+ 13959 "00110010" // /* MW 7 */
+ 13960 "11110010" // /* MW 6 */
+ 13961 "00000001" // /* MW 5 */
+ 13962 "00000000" // /* MW 4 */
+ 13963 "11010000" // /* MW 3 */
+ 13964 "11101110" // /* MW 2 */
+ 13965 "01011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 13966 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13967 "00010110" // /* MW 3 */
+ 13968 "11111110" // /* MW 2 */
+ 13969 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 13970 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13971 "00110110" // /* MW 3 */
+ 13972 "11111110" // /* MW 2 */
+ 13973 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 13974 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13975 "01010110" // /* MW 3 */
+ 13976 "01000110" // /* MW 2 */
+ 13977 "00000010" // /* MW 1 */
+ 13978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13979 "00000000" // /* MW 1 */
+ 13980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13981 "00000000" // /* MW 1 */
+ 13982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13983 "00000000" // /* MW 1 */
+ 13984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13985 "00000000" // /* MW 1 */
+ 13986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13987 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 13988 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13989 "00000010" // /* MW 3 */
+ 13990 "01100001" // /* MW 2 */
+ 13991 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 13992 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13993 "00010001" // /* MW 3 */
+ 13994 "00000110" // /* MW 2 */
+ 13995 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 13996 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13997 "11111101" // /* MW 3 */
+ 13998 "11100000" // /* MW 2 */
+ 13999 "00010111" // /* MW 1 */
+ 14000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14001 "00000000" // /* MW 1 */
+ 14002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14003 "00000000" // /* MW 1 */
+ 14004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14005 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14006 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14007 "00001000" // /* MW 3 */
+ 14008 "10010011" // /* MW 2 */
+ 14009 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11
+.src_ref 7 "superkernels.cpp" 459 47
+.src_ref 7 "superkernels.cpp" 464 6
+.src_ref 7 "superkernels.cpp" 465 16
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 14010 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14011 "00010000" // /* MW 9 */
+ 14012 "00100000" // /* MW 8 */
+ 14013 "10110010" // /* MW 7 */
+ 14014 "11110011" // /* MW 6 */
+ 14015 "00000001" // /* MW 5 */
+ 14016 "00000000" // /* MW 4 */
+ 14017 "00000000" // /* MW 3 */
+ 14018 "00101111" // /* MW 2 */
+ 14019 "00000000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+ 14020 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14021 "11000001" // /* MW 5 */
+ 14022 "00101011" // /* MW 4 */
+ 14023 "00101000" // /* MW 3 */
+ 14024 "00000000" // /* MW 2 */
+ 14025 "00000110" // /* MW 1 */
+ 14026 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14027 "01011010" // /* MW 3 */
+ 14028 "01101000" // /* MW 2 */
+ 14029 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 14030 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14031 "10000001" // /* MW 5 */
+ 14032 "00101001" // /* MW 4 */
+ 14033 "00100111" // /* MW 3 */
+ 14034 "11010011" // /* MW 2 */
+ 14035 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15 first
+ 14036 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14037 "00110110" // /* MW 3 */
+ 14038 "00000110" // /* MW 2 */
+ 14039 "00000100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 26
+.src_ref 7 "superkernels.cpp" 461 2
+ 14040 "10111010" // LDA r16, [p3]; MOVXM p3, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14041 "00010000" // /* MW 9 */
+ 14042 "11100000" // /* MW 8 */
+ 14043 "10110011" // /* MW 7 */
+ 14044 "11110001" // /* MW 6 */
+ 14045 "00000001" // /* MW 5 */
+ 14046 "00000000" // /* MW 4 */
+ 14047 "11010000" // /* MW 3 */
+ 14048 "11000010" // /* MW 2 */
+ 14049 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 14050 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14051 "01010110" // /* MW 3 */
+ 14052 "00000110" // /* MW 2 */
+ 14053 "00000111" // /* MW 1 */
+ 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14055 "00000000" // /* MW 1 */
+ 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14057 "00000000" // /* MW 1 */
+ 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14059 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 14060 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14061 "01110110" // /* MW 3 */
+ 14062 "00000110" // /* MW 2 */
+ 14063 "00000101" // /* MW 1 */
+ 14064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14065 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 24 first
+ 14066 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14067 "00001111" // /* MW 3 */
+ 14068 "01100001" // /* MW 2 */
+ 14069 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 14070 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14071 "00000111" // /* MW 3 */
+ 14072 "10100010" // /* MW 2 */
+ 14073 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+ 14074 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14075 "11111101" // /* MW 3 */
+ 14076 "00100000" // /* MW 2 */
+ 14077 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2 first
+.no_stack_arguments
+ 14078 "00000100" // JL #13024 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13024 delay_slots=5 */
+ 14079 "00000001" // /* MW 5 */
+ 14080 "00000000" // /* MW 4 */
+ 14081 "01110000" // /* MW 3 */
+ 14082 "00011001" // /* MW 2 */
+ 14083 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+.delay_slot
+ 14084 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14085 "00110001" // /* MW 3 */
+ 14086 "00000110" // /* MW 2 */
+ 14087 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+.delay_slot
+ 14088 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14089 "11000001" // /* MW 3 */
+ 14090 "01001001" // /* MW 2 */
+ 14091 "00011000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 201 10 first
+.delay_slot
+ 14092 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14093 "00100101" // /* MW 3 */
+ 14094 "10110100" // /* MW 2 */
+ 14095 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16 first
+.delay_slot
+ 14096 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14097 "00010101" // /* MW 3 */
+ 14098 "10111011" // /* MW 2 */
+ 14099 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+.delay_slot
+ 14100 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14101 "11000001" // /* MW 11 */
+ 14102 "10001010" // /* MW 10 */
+ 14103 "11011111" // /* MW 9 */
+ 14104 "00000011" // /* MW 8 */
+ 14105 "00000000" // /* MW 7 */
+ 14106 "00000000" // /* MW 6 */
+ 14107 "00100000" // /* MW 5 */
+ 14108 "00000000" // /* MW 4 */
+ 14109 "11110000" // /* MW 3 */
+ 14110 "00101100" // /* MW 2 */
+ 14111 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 14112 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14113 "00001010" // /* MW 3 */
+ 14114 "01100111" // /* MW 2 */
+ 14115 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 14116 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14117 "00010110" // /* MW 3 */
+ 14118 "00000110" // /* MW 2 */
+ 14119 "00000010" // /* MW 1 */
+ 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14121 "00000000" // /* MW 1 */
+ 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14123 "00000000" // /* MW 1 */
+ 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14125 "00000000" // /* MW 1 */
+ 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14127 "00000000" // /* MW 1 */
+ 14128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14129 "00000000" // /* MW 1 */
+ 14130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14131 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 14132 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14133 "11111000" // /* MW 3 */
+ 14134 "00010000" // /* MW 2 */
+ 14135 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 14136 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14137 "00010000" // /* MW 9 */
+ 14138 "00110000" // /* MW 8 */
+ 14139 "10110010" // /* MW 7 */
+ 14140 "11110000" // /* MW 6 */
+ 14141 "00000001" // /* MW 5 */
+ 14142 "00000000" // /* MW 4 */
+ 14143 "11010000" // /* MW 3 */
+ 14144 "11000010" // /* MW 2 */
+ 14145 "01011100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19 first
+ 14146 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14147 "01010110" // /* MW 3 */
+ 14148 "00000110" // /* MW 2 */
+ 14149 "00000001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 14150 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14151 "00110110" // /* MW 3 */
+ 14152 "00000110" // /* MW 2 */
+ 14153 "00000111" // /* MW 1 */
+ 14154 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14155 "10011001" // /* MW 3 */
+ 14156 "11110100" // /* MW 2 */
+ 14157 "00000111" // /* MW 1 */
+ 14158 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14159 "11010001" // /* MW 3 */
+ 14160 "11111001" // /* MW 2 */
+ 14161 "00000111" // /* MW 1 */
+ 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14163 "00000000" // /* MW 1 */
+ 14164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14165 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 14166 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14167 "00000001" // /* MW 3 */
+ 14168 "11100001" // /* MW 2 */
+ 14169 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 14170 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14171 "00010001" // /* MW 3 */
+ 14172 "11100110" // /* MW 2 */
+ 14173 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 16 first
+ 14174 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14175 "00101000" // /* MW 3 */
+ 14176 "01100001" // /* MW 2 */
+ 14177 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 14178 "10000100" // JNZ r16, #14208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14208 delay_slots=5 */
+ 14179 "00000001" // /* MW 5 */
+ 14180 "01000000" // /* MW 4 */
+ 14181 "11000000" // /* MW 3 */
+ 14182 "00011011" // /* MW 2 */
+ 14183 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16
+.delay_slot
+ 14184 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14185 "00000001" // /* MW 3 */
+ 14186 "00110000" // /* MW 2 */
+ 14187 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14189 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14191 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14193 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14195 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16 first
+ 14196 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14197 "11000001" // /* MW 11 */
+ 14198 "10001000" // /* MW 10 */
+ 14199 "10000011" // /* MW 9 */
+ 14200 "00000011" // /* MW 8 */
+ 14201 "00000000" // /* MW 7 */
+ 14202 "00000000" // /* MW 6 */
+ 14203 "00100000" // /* MW 5 */
+ 14204 "00000000" // /* MW 4 */
+ 14205 "11110000" // /* MW 3 */
+ 14206 "00101100" // /* MW 2 */
+ 14207 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 467
+ 14208 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14209 "01000001" // /* MW 5 */
+ 14210 "11101101" // /* MW 4 */
+ 14211 "00101110" // /* MW 3 */
+ 14212 "10110110" // /* MW 2 */
+ 14213 "11111111" // /* MW 1 */
+ 14214 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14215 "11110001" // /* MW 3 */
+ 14216 "11110001" // /* MW 2 */
+ 14217 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467 first
+ 14218 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 14219 "00000000" // /* MW 3 */
+ 14220 "00101000" // /* MW 2 */
+ 14221 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+.delay_slot
+ 14222 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14223 "00000001" // /* MW 5 */
+ 14224 "00000000" // /* MW 4 */
+ 14225 "00000000" // /* MW 3 */
+ 14226 "11110000" // /* MW 2 */
+ 14227 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14229 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14233 "00000000" // /* MW 1 */
+.delay_slot
+ 14234 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14235 "11000000" // /* MW 3 */
+ 14236 "01100010" // /* MW 2 */
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 14237 "00011111" // /* MW 1 */
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+.function superkernel_conv_eltbinary _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+.src_ref 7 "superkernels.cpp" 578
+.src_ref 7 "superkernels.cpp" 578 first
+.function_start
+ 14240 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14241 "00000001" // /* MW 5 */
+ 14242 "00000000" // /* MW 4 */
+ 14243 "00000000" // /* MW 3 */
+ 14244 "00001000" // /* MW 2 */
+ 14245 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6
+ 14246 "00111010" // ST p7, [sp, #-8]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14247 "00010001" // /* MW 9 */
+ 14248 "00100000" // /* MW 8 */
+ 14249 "10110010" // /* MW 7 */
+ 14250 "11110011" // /* MW 6 */
+ 14251 "00000001" // /* MW 5 */
+ 14252 "00000000" // /* MW 4 */
+ 14253 "10110000" // /* MW 3 */
+ 14254 "01110011" // /* MW 2 */
+ 14255 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6 first
+ 14256 "10111010" // LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14257 "01110010" // /* MW 9 */
+ 14258 "01110000" // /* MW 8 */
+ 14259 "00101101" // /* MW 7 */
+ 14260 "10000010" // /* MW 6 */
+ 14261 "00011101" // /* MW 5 */
+ 14262 "11111111" // /* MW 4 */
+ 14263 "11010111" // /* MW 3 */
+ 14264 "11000010" // /* MW 2 */
+ 14265 "11100000" // /* MW 1 */
+ 14266 "10011000" // ST p4, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14267 "00011101" // /* MW 3 */
+ 14268 "11110110" // /* MW 2 */
+ 14269 "00001111" // /* MW 1 */
+ 14270 "10011000" // ST p2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14271 "00011101" // /* MW 3 */
+ 14272 "11110001" // /* MW 2 */
+ 14273 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 7 "superkernels.cpp" 590 35
+.src_ref 7 "superkernels.cpp" 599 105
+.src_ref 7 "superkernels.cpp" 629 34
+ 14274 "00000010" // ST lr, [sp, #-20]; MOV p7, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 14275 "01110000" // /* MW 7 */
+ 14276 "01100000" // /* MW 6 */
+ 14277 "10110011" // /* MW 5 */
+ 14278 "00000011" // /* MW 4 */
+ 14279 "10110000" // /* MW 3 */
+ 14280 "10000111" // /* MW 2 */
+ 14281 "11111101" // /* MW 1 */
+ 14282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14283 "00000000" // /* MW 1 */
+ 14284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14285 "00000000" // /* MW 1 */
+ 14286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14287 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6
+.src_ref 7 "superkernels.cpp" 583 16
+ 14288 "10000100" // JNZ r16, #14688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14688 delay_slots=5 */
+ 14289 "00000001" // /* MW 5 */
+ 14290 "01000000" // /* MW 4 */
+ 14291 "10110000" // /* MW 3 */
+ 14292 "00011100" // /* MW 2 */
+ 14293 "10000000" // /* MW 1 */
+.delay_slot
+ 14294 "10011000" // ST p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14295 "00011101" // /* MW 3 */
+ 14296 "11101000" // /* MW 2 */
+ 14297 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 22 first
+.delay_slot
+ 14298 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14299 "10010000" // /* MW 3 */
+ 14300 "01100010" // /* MW 2 */
+ 14301 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 30
+.delay_slot
+ 14302 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14303 "11111011" // /* MW 3 */
+ 14304 "01100011" // /* MW 2 */
+ 14305 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 11
+.delay_slot
+ 14306 "01000100" // MOVXM p6, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14307 "10100000" // /* MW 5 */
+ 14308 "11001000" // /* MW 4 */
+ 14309 "11001100" // /* MW 3 */
+ 14310 "00000111" // /* MW 2 */
+ 14311 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 11
+.delay_slot
+ 14312 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14313 "00110001" // /* MW 3 */
+ 14314 "00000110" // /* MW 2 */
+ 14315 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 14316 "10111010" // MOVA r0, #1; MOVXM p6, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14317 "00010000" // /* MW 9 */
+ 14318 "00110100" // /* MW 8 */
+ 14319 "00110010" // /* MW 7 */
+ 14320 "11110011" // /* MW 6 */
+ 14321 "00000001" // /* MW 5 */
+ 14322 "00000000" // /* MW 4 */
+ 14323 "00000000" // /* MW 3 */
+ 14324 "00100000" // /* MW 2 */
+ 14325 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 14326 "01110110" // ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14327 "00010000" // /* MW 11 */
+ 14328 "00110010" // /* MW 10 */
+ 14329 "00110010" // /* MW 9 */
+ 14330 "11110000" // /* MW 8 */
+ 14331 "00000001" // /* MW 7 */
+ 14332 "00000000" // /* MW 6 */
+ 14333 "10001011" // /* MW 5 */
+ 14334 "10000100" // /* MW 4 */
+ 14335 "11100110" // /* MW 3 */
+ 14336 "11000000" // /* MW 2 */
+ 14337 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 7 "superkernels.cpp" 587 4
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14338 "10111010" // MOVA r1, #0; MOVXM p1, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14339 "00010000" // /* MW 9 */
+ 14340 "00000000" // /* MW 8 */
+ 14341 "10110011" // /* MW 7 */
+ 14342 "11110000" // /* MW 6 */
+ 14343 "00000001" // /* MW 5 */
+ 14344 "00000000" // /* MW 4 */
+ 14345 "00000000" // /* MW 3 */
+ 14346 "00000001" // /* MW 2 */
+ 14347 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 14348 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */
+ 14349 "00000001" // /* MW 5 */
+ 14350 "00000000" // /* MW 4 */
+ 14351 "01100000" // /* MW 3 */
+ 14352 "00000101" // /* MW 2 */
+ 14353 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14355 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14357 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14358 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14359 "00110001" // /* MW 3 */
+ 14360 "00100000" // /* MW 2 */
+ 14361 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 14362 "00101100" // NOPA; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14363 "00001010" // /* MW 5 */
+ 14364 "01000000" // /* MW 4 */
+ 14365 "11110000" // /* MW 3 */
+ 14366 "00101100" // /* MW 2 */
+ 14367 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 14368 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14369 "00000000" // /* MW 15 */
+ 14370 "00000000" // /* MW 14 */
+ 14371 "01111000" // /* MW 13 */
+ 14372 "01100000" // /* MW 12 */
+ 14373 "00110111" // /* MW 11 */
+ 14374 "00000000" // /* MW 10 */
+ 14375 "00000000" // /* MW 9 */
+ 14376 "10000000" // /* MW 8 */
+ 14377 "00010001" // /* MW 7 */
+ 14378 "00000110" // /* MW 6 */
+ 14379 "00100000" // /* MW 5 */
+ 14380 "00000000" // /* MW 4 */
+ 14381 "11110000" // /* MW 3 */
+ 14382 "00101100" // /* MW 2 */
+ 14383 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 590 35
+.src_ref 7 "superkernels.cpp" 591 4
+.return_address
+ 14384 "01100100" // MOVX r16, #1; MOV dj0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14385 "00000001" // /* MW 5 */
+ 14386 "00000001" // /* MW 4 */
+ 14387 "10100001" // /* MW 3 */
+ 14388 "00000000" // /* MW 2 */
+ 14389 "00000100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 590 35 first
+ 14390 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14391 "01010110" // /* MW 3 */
+ 14392 "00000010" // /* MW 2 */
+ 14393 "00000111" // /* MW 1 */
+ 14394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14395 "00000000" // /* MW 1 */
+ 14396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14397 "00000000" // /* MW 1 */
+ 14398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14399 "00000000" // /* MW 1 */
+ 14400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14401 "00000000" // /* MW 1 */
+ 14402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14403 "00000000" // /* MW 1 */
+ 14404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14405 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4 first
+ 14406 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14407 "00000111" // /* MW 3 */
+ 14408 "10100001" // /* MW 2 */
+ 14409 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4
+ 14410 "10000100" // JNZ r16, #14544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14544 delay_slots=5 */
+ 14411 "00000001" // /* MW 5 */
+ 14412 "01000000" // /* MW 4 */
+ 14413 "01101000" // /* MW 3 */
+ 14414 "00011100" // /* MW 2 */
+ 14415 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 105
+.delay_slot
+ 14416 "11111000" // MOV r17, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14417 "11000000" // /* MW 3 */
+ 14418 "01011110" // /* MW 2 */
+ 14419 "00011100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 105 first
+.delay_slot
+ 14420 "00011000" // ADD.NC dc0, r17, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14421 "10010000" // /* MW 3 */
+ 14422 "11001000" // /* MW 2 */
+ 14423 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14427 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14429 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4 first
+ 14430 "10000100" // JNZ r18, #14512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14512 delay_slots=5 */
+ 14431 "00000001" // /* MW 5 */
+ 14432 "01000000" // /* MW 4 */
+ 14433 "01011000" // /* MW 3 */
+ 14434 "00011100" // /* MW 2 */
+ 14435 "10010000" // /* MW 1 */
+.delay_slot
+ 14436 "01000100" // MOVXM r16, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14437 "00000000" // /* MW 5 */
+ 14438 "00101100" // /* MW 4 */
+ 14439 "11001000" // /* MW 3 */
+ 14440 "00000111" // /* MW 2 */
+ 14441 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 27
+.delay_slot
+ 14442 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14443 "00000001" // /* MW 3 */
+ 14444 "00100010" // /* MW 2 */
+ 14445 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14451 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8 first
+.no_stack_arguments
+ 14452 "00111010" // ST p6, [sp, #-28]; JL #11136 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11136 delay_slots=5 */
+ 14453 "01000001" // /* MW 9 */
+ 14454 "00000000" // /* MW 8 */
+ 14455 "00000000" // /* MW 7 */
+ 14456 "01110000" // /* MW 6 */
+ 14457 "00000101" // /* MW 5 */
+ 14458 "00000000" // /* MW 4 */
+ 14459 "10110000" // /* MW 3 */
+ 14460 "11100011" // /* MW 2 */
+ 14461 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 595 38
+.delay_slot
+ 14462 "01000100" // MOVXM p6, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14463 "10000000" // /* MW 5 */
+ 14464 "11001010" // /* MW 4 */
+ 14465 "11001100" // /* MW 3 */
+ 14466 "00000111" // /* MW 2 */
+ 14467 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8
+.delay_slot
+ 14468 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14469 "10000000" // /* MW 5 */
+ 14470 "11001010" // /* MW 4 */
+ 14471 "11000000" // /* MW 3 */
+ 14472 "00000111" // /* MW 2 */
+ 14473 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8
+.delay_slot
+ 14474 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14475 "10000000" // /* MW 3 */
+ 14476 "01100001" // /* MW 2 */
+ 14477 "00011001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14480 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14481 "00000000" // /* MW 15 */
+ 14482 "00000000" // /* MW 14 */
+ 14483 "01111000" // /* MW 13 */
+ 14484 "10100101" // /* MW 12 */
+ 14485 "00000001" // /* MW 11 */
+ 14486 "00000000" // /* MW 10 */
+ 14487 "00000000" // /* MW 9 */
+ 14488 "00000000" // /* MW 8 */
+ 14489 "01011011" // /* MW 7 */
+ 14490 "00000001" // /* MW 6 */
+ 14491 "00100000" // /* MW 5 */
+ 14492 "00000000" // /* MW 4 */
+ 14493 "11110000" // /* MW 3 */
+ 14494 "00101100" // /* MW 2 */
+ 14495 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 595 38 first
+.return_address
+ 14496 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14497 "00010000" // /* MW 9 */
+ 14498 "00000000" // /* MW 8 */
+ 14499 "00001011" // /* MW 7 */
+ 14500 "11110010" // /* MW 6 */
+ 14501 "00000001" // /* MW 5 */
+ 14502 "00000000" // /* MW 4 */
+ 14503 "11010000" // /* MW 3 */
+ 14504 "11000110" // /* MW 2 */
+ 14505 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14506 "00111100" // LDA p6, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14507 "00100000" // /* MW 5 */
+ 14508 "00000000" // /* MW 4 */
+ 14509 "00100000" // /* MW 3 */
+ 14510 "11100011" // /* MW 2 */
+ 14511 "11111100" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272
+ 14512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14513 "00000000" // /* MW 1 */
+ 14514 "10000100" // J #14592 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=14592 delay_slots=5 */
+ 14515 "00000000" // /* MW 5 */
+ 14516 "00000000" // /* MW 4 */
+ 14517 "10000000" // /* MW 3 */
+ 14518 "00011100" // /* MW 2 */
+ 14519 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14521 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14527 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.delay_slot
+ 14528 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14529 "00000000" // /* MW 15 */
+ 14530 "00000000" // /* MW 14 */
+ 14531 "01111000" // /* MW 13 */
+ 14532 "01100000" // /* MW 12 */
+ 14533 "10110110" // /* MW 11 */
+ 14534 "00000000" // /* MW 10 */
+ 14535 "00000000" // /* MW 9 */
+ 14536 "00000000" // /* MW 8 */
+ 14537 "01011011" // /* MW 7 */
+ 14538 "00000001" // /* MW 6 */
+ 14539 "00100000" // /* MW 5 */
+ 14540 "00000000" // /* MW 4 */
+ 14541 "11110000" // /* MW 3 */
+ 14542 "00101100" // /* MW 2 */
+ 14543 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304
+.src_ref 7 "superkernels.cpp" 599 8 first
+.no_stack_arguments
+ 14544 "00111010" // ST p6, [sp, #-28]; JL #11296 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */
+ 14545 "01000001" // /* MW 9 */
+ 14546 "00000000" // /* MW 8 */
+ 14547 "00000000" // /* MW 7 */
+ 14548 "10000100" // /* MW 6 */
+ 14549 "00000101" // /* MW 5 */
+ 14550 "00000000" // /* MW 4 */
+ 14551 "10110000" // /* MW 3 */
+ 14552 "11100011" // /* MW 2 */
+ 14553 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 600 38
+.delay_slot
+ 14554 "01000100" // MOVXM p6, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14555 "00000000" // /* MW 5 */
+ 14556 "11001011" // /* MW 4 */
+ 14557 "11001100" // /* MW 3 */
+ 14558 "00000111" // /* MW 2 */
+ 14559 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 8
+.delay_slot
+ 14560 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14561 "00000000" // /* MW 5 */
+ 14562 "11001011" // /* MW 4 */
+ 14563 "11000000" // /* MW 3 */
+ 14564 "00000111" // /* MW 2 */
+ 14565 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 8
+.delay_slot
+ 14566 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14567 "10000000" // /* MW 3 */
+ 14568 "01100001" // /* MW 2 */
+ 14569 "00011001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14571 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14572 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14573 "01100111" // /* MW 3 */
+ 14574 "00000001" // /* MW 2 */
+ 14575 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 600 38 first
+.return_address
+ 14576 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14577 "00010000" // /* MW 9 */
+ 14578 "00000000" // /* MW 8 */
+ 14579 "00001011" // /* MW 7 */
+ 14580 "11110010" // /* MW 6 */
+ 14581 "00000001" // /* MW 5 */
+ 14582 "00000000" // /* MW 4 */
+ 14583 "11010000" // /* MW 3 */
+ 14584 "11000110" // /* MW 2 */
+ 14585 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14586 "00111100" // LDA p1, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14587 "00100000" // /* MW 5 */
+ 14588 "00000000" // /* MW 4 */
+ 14589 "00100000" // /* MW 3 */
+ 14590 "10010011" // /* MW 2 */
+ 14591 "11111100" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352
+ 14592 "10011000" // ADD.NC p3, r16, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14593 "00000101" // /* MW 3 */
+ 14594 "01101000" // /* MW 2 */
+ 14595 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 35 first
+.src_ref 7 "superkernels.cpp" 611 18
+ 14596 "10111010" // LDA.u8 r19, [p3], #7; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14597 "00010000" // /* MW 9 */
+ 14598 "00101000" // /* MW 8 */
+ 14599 "00110010" // /* MW 7 */
+ 14600 "11110011" // /* MW 6 */
+ 14601 "00000001" // /* MW 5 */
+ 14602 "00000000" // /* MW 4 */
+ 14603 "01010000" // /* MW 3 */
+ 14604 "11001101" // /* MW 2 */
+ 14605 "01101111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 18 first
+ 14606 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14607 "01010110" // /* MW 3 */
+ 14608 "00000110" // /* MW 2 */
+ 14609 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 37 first
+ 14610 "10011000" // LDA.u16 r21, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14611 "10111010" // /* MW 3 */
+ 14612 "00011110" // /* MW 2 */
+ 14613 "00000011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 73
+ 14614 "10011000" // LDA.u16 r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14615 "00011010" // /* MW 3 */
+ 14616 "00000110" // /* MW 2 */
+ 14617 "00000011" // /* MW 1 */
+ 14618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14619 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 110
+ 14620 "10011000" // LDA.u16 r20, [p3, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14621 "10011010" // /* MW 3 */
+ 14622 "00010110" // /* MW 2 */
+ 14623 "00000011" // /* MW 1 */
+ 14624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14625 "00000000" // /* MW 1 */
+ 14626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14627 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 19
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 14628 "01000100" // MOVXM p0, #508996 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14629 "10001000" // /* MW 5 */
+ 14630 "11001000" // /* MW 4 */
+ 14631 "11000000" // /* MW 3 */
+ 14632 "00000111" // /* MW 2 */
+ 14633 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 57
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 14634 "10011000" // MUL r19, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14635 "01011111" // /* MW 3 */
+ 14636 "11100111" // /* MW 2 */
+ 14637 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 19 first
+.src_ref 7 "superkernels.cpp" 611 16
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14638 "00111010" // ST r19, [p0]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14639 "00010001" // /* MW 9 */
+ 14640 "00101110" // /* MW 8 */
+ 14641 "00110010" // /* MW 7 */
+ 14642 "11110001" // /* MW 6 */
+ 14643 "00000001" // /* MW 5 */
+ 14644 "00000000" // /* MW 4 */
+ 14645 "00110000" // /* MW 3 */
+ 14646 "11001110" // /* MW 2 */
+ 14647 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 94 first
+ 14648 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14649 "00001111" // /* MW 3 */
+ 14650 "11100001" // /* MW 2 */
+ 14651 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 27 first
+ 14652 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14653 "00101111" // /* MW 3 */
+ 14654 "01100011" // /* MW 2 */
+ 14655 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 28 first
+ 14656 "10011000" // MUL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14657 "00001111" // /* MW 3 */
+ 14658 "00100001" // /* MW 2 */
+ 14659 "00010101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 13
+.src_ref 7 "superkernels.cpp" 611 16 first
+ 14660 "01110110" // NOPA; ST r17, [p2]; MOVXM p6, #509024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14661 "00010000" // /* MW 11 */
+ 14662 "00110000" // /* MW 10 */
+ 14663 "00110010" // /* MW 9 */
+ 14664 "11110011" // /* MW 8 */
+ 14665 "00000001" // /* MW 7 */
+ 14666 "10000000" // /* MW 6 */
+ 14667 "00110001" // /* MW 5 */
+ 14668 "00000110" // /* MW 4 */
+ 14669 "11110010" // /* MW 3 */
+ 14670 "00101100" // /* MW 2 */
+ 14671 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 13 first
+ 14672 "11100001" // NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14673 "00000000" // /* MW 15 */
+ 14674 "00000000" // /* MW 14 */
+ 14675 "01111000" // /* MW 13 */
+ 14676 "10100101" // /* MW 12 */
+ 14677 "00000001" // /* MW 11 */
+ 14678 "00000000" // /* MW 10 */
+ 14679 "00000000" // /* MW 9 */
+ 14680 "10000000" // /* MW 8 */
+ 14681 "00010001" // /* MW 7 */
+ 14682 "00000110" // /* MW 6 */
+ 14683 "00100110" // /* MW 5 */
+ 14684 "00000000" // /* MW 4 */
+ 14685 "11110000" // /* MW 3 */
+ 14686 "00101100" // /* MW 2 */
+ 14687 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448
+.src_ref 7 "superkernels.cpp" 614 12
+ 14688 "01000100" // MOVXM p0, #509000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14689 "10010000" // /* MW 5 */
+ 14690 "11001000" // /* MW 4 */
+ 14691 "11000000" // /* MW 3 */
+ 14692 "00000111" // /* MW 2 */
+ 14693 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.src_ref 7 "superkernels.cpp" 616 11
+ 14694 "10111010" // LDA r16, [p0]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14695 "00010000" // /* MW 9 */
+ 14696 "00100000" // /* MW 8 */
+ 14697 "00110010" // /* MW 7 */
+ 14698 "11110001" // /* MW 6 */
+ 14699 "00000001" // /* MW 5 */
+ 14700 "00000000" // /* MW 4 */
+ 14701 "11010000" // /* MW 3 */
+ 14702 "11000010" // /* MW 2 */
+ 14703 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13
+.src_ref 7 "superkernels.cpp" 616 11 first
+ 14704 "10111010" // LDA r17, [p2]; MOVXM p6, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14705 "00010000" // /* MW 9 */
+ 14706 "00100110" // /* MW 8 */
+ 14707 "00110010" // /* MW 7 */
+ 14708 "11110011" // /* MW 6 */
+ 14709 "00000001" // /* MW 5 */
+ 14710 "00000000" // /* MW 4 */
+ 14711 "11010000" // /* MW 3 */
+ 14712 "11000110" // /* MW 2 */
+ 14713 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+ 14714 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14715 "01010110" // /* MW 3 */
+ 14716 "00000110" // /* MW 2 */
+ 14717 "00000110" // /* MW 1 */
+ 14718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14719 "00000000" // /* MW 1 */
+ 14720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14721 "00000000" // /* MW 1 */
+ 14722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14723 "00000000" // /* MW 1 */
+ 14724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14725 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 618 6 first
+.src_ref 7 "superkernels.cpp" 618 17 first
+ 14726 "10000100" // JNZ r16, #14832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14832 delay_slots=5 */
+ 14727 "00000001" // /* MW 5 */
+ 14728 "01000000" // /* MW 4 */
+ 14729 "11111000" // /* MW 3 */
+ 14730 "00011100" // /* MW 2 */
+ 14731 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.src_ref 7 "superkernels.cpp" 616 11 first
+.delay_slot
+ 14732 "00100100" // ADD r17, r17, #1; ADD.NC r19, r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14733 "00000001" // /* MW 5 */
+ 14734 "10110000" // /* MW 4 */
+ 14735 "11101001" // /* MW 3 */
+ 14736 "01000000" // /* MW 2 */
+ 14737 "10001100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+.delay_slot
+ 14738 "00011000" // ADD r18, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14739 "00000111" // /* MW 3 */
+ 14740 "10100100" // /* MW 2 */
+ 14741 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 616 11 first
+.delay_slot
+ 14742 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14743 "00110001" // /* MW 3 */
+ 14744 "00000110" // /* MW 2 */
+ 14745 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+.delay_slot
+ 14746 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14747 "01010001" // /* MW 3 */
+ 14748 "00000110" // /* MW 2 */
+ 14749 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.delay_slot
+ 14750 "10011000" // ST r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14751 "01110001" // /* MW 3 */
+ 14752 "00000110" // /* MW 2 */
+ 14753 "00001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14754 "00011000" // LDA r17, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14755 "00110001" // /* MW 3 */
+ 14756 "11110110" // /* MW 2 */
+ 14757 "00000111" // /* MW 1 */
+ 14758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14759 "00000000" // /* MW 1 */
+ 14760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14761 "00000000" // /* MW 1 */
+ 14762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14763 "00000000" // /* MW 1 */
+ 14764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14765 "00000000" // /* MW 1 */
+ 14766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14767 "00000000" // /* MW 1 */
+ 14768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14769 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 14770 "00011000" // ADD.NC p6, r17, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14771 "10000110" // /* MW 3 */
+ 14772 "01101000" // /* MW 2 */
+ 14773 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14774 "10011000" // LDA r27, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14775 "01110110" // /* MW 3 */
+ 14776 "11111111" // /* MW 2 */
+ 14777 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 14778 "10011000" // LDA r17, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14779 "00110110" // /* MW 3 */
+ 14780 "11111110" // /* MW 2 */
+ 14781 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 14782 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14783 "01010110" // /* MW 3 */
+ 14784 "11111110" // /* MW 2 */
+ 14785 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 14786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14787 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 14788 "10011000" // LDA r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14789 "00110110" // /* MW 3 */
+ 14790 "01000110" // /* MW 2 */
+ 14791 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14793 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14795 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14797 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14799 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14800 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14801 "00010010" // /* MW 3 */
+ 14802 "10100011" // /* MW 2 */
+ 14803 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.src_ref 1 "io_buffer_main.h" 395 8
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14804 "01011100" // ST r17, [p6]; MOVX r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14805 "11111010" // /* MW 5 */
+ 14806 "11000001" // /* MW 4 */
+ 14807 "00111111" // /* MW 3 */
+ 14808 "11000110" // /* MW 2 */
+ 14809 "11000000" // /* MW 1 */
+ 14810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14811 "00000000" // /* MW 1 */
+ 14812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14813 "00000000" // /* MW 1 */
+ 14814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14815 "00000000" // /* MW 1 */
+ 14816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14817 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14818 "01111110" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 14819 "01100000" // /* MW 13 */
+ 14820 "00101011" // /* MW 12 */
+ 14821 "00000000" // /* MW 11 */
+ 14822 "10101111" // /* MW 10 */
+ 14823 "00110100" // /* MW 9 */
+ 14824 "00000000" // /* MW 8 */
+ 14825 "00001000" // /* MW 7 */
+ 14826 "01010011" // /* MW 6 */
+ 14827 "00100100" // /* MW 5 */
+ 14828 "00000000" // /* MW 4 */
+ 14829 "11110000" // /* MW 3 */
+ 14830 "00101100" // /* MW 2 */
+ 14831 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592
+ 14832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14833 "00000000" // /* MW 1 */
+ 14834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14835 "00000000" // /* MW 1 */
+ 14836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14837 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.src_ref 1 "io_buffer_main.h" 125 25
+ 14838 "00011000" // LDA p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14839 "00011001" // /* MW 3 */
+ 14840 "11110101" // /* MW 2 */
+ 14841 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14842 "00011000" // LDA p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14843 "00011001" // /* MW 3 */
+ 14844 "11101000" // /* MW 2 */
+ 14845 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2 first
+.no_stack_arguments
+ 14846 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */
+ 14847 "00000001" // /* MW 5 */
+ 14848 "00000000" // /* MW 4 */
+ 14849 "10111000" // /* MW 3 */
+ 14850 "00001000" // /* MW 2 */
+ 14851 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.delay_slot
+ 14852 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14853 "00000000" // /* MW 5 */
+ 14854 "11001100" // /* MW 4 */
+ 14855 "11000110" // /* MW 3 */
+ 14856 "00000111" // /* MW 2 */
+ 14857 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14859 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14861 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14863 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 14864 "11100001" // NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14865 "00000000" // /* MW 15 */
+ 14866 "00000000" // /* MW 14 */
+ 14867 "01111000" // /* MW 13 */
+ 14868 "10100101" // /* MW 12 */
+ 14869 "00000001" // /* MW 11 */
+ 14870 "00000000" // /* MW 10 */
+ 14871 "00000000" // /* MW 9 */
+ 14872 "00000000" // /* MW 8 */
+ 14873 "10001011" // /* MW 7 */
+ 14874 "10001000" // /* MW 6 */
+ 14875 "00100110" // /* MW 5 */
+ 14876 "00000000" // /* MW 4 */
+ 14877 "11110000" // /* MW 3 */
+ 14878 "00101100" // /* MW 2 */
+ 14879 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6
+.src_ref 1 "io_buffer_main.h" 218 49
+.return_address
+ 14880 "10111010" // LDA r16, [sp, #-16]; MOVXM p1, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14881 "00010000" // /* MW 9 */
+ 14882 "00100100" // /* MW 8 */
+ 14883 "10110010" // /* MW 7 */
+ 14884 "11110000" // /* MW 6 */
+ 14885 "00000001" // /* MW 5 */
+ 14886 "00000000" // /* MW 4 */
+ 14887 "00100000" // /* MW 3 */
+ 14888 "01000010" // /* MW 2 */
+ 14889 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6 first
+.src_ref 7 "superkernels.cpp" 623 20
+ 14890 "10111010" // LDA r17, [p1]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14891 "00010000" // /* MW 9 */
+ 14892 "00100010" // /* MW 8 */
+ 14893 "10110010" // /* MW 7 */
+ 14894 "11110000" // /* MW 6 */
+ 14895 "00000001" // /* MW 5 */
+ 14896 "00000000" // /* MW 4 */
+ 14897 "11010000" // /* MW 3 */
+ 14898 "11000110" // /* MW 2 */
+ 14899 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 20
+ 14900 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14901 "01010110" // /* MW 3 */
+ 14902 "00000110" // /* MW 2 */
+ 14903 "00000001" // /* MW 1 */
+ 14904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14905 "00000000" // /* MW 1 */
+ 14906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14907 "00000000" // /* MW 1 */
+ 14908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14909 "00000000" // /* MW 1 */
+ 14910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14911 "00000000" // /* MW 1 */
+ 14912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14913 "00000000" // /* MW 1 */
+ 14914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14915 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 17
+ 14916 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14917 "00101000" // /* MW 3 */
+ 14918 "01100011" // /* MW 2 */
+ 14919 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6
+ 14920 "10000100" // JNZ r17, #15264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15264 delay_slots=5 */
+ 14921 "00000001" // /* MW 5 */
+ 14922 "01000000" // /* MW 4 */
+ 14923 "11010000" // /* MW 3 */
+ 14924 "00011101" // /* MW 2 */
+ 14925 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14933 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14935 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 629 34
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.src_ref 1 "io_buffer_main.h" 395 8
+ 14936 "10111010" // MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14937 "00001000" // /* MW 9 */
+ 14938 "00000011" // /* MW 8 */
+ 14939 "10110100" // /* MW 7 */
+ 14940 "11101000" // /* MW 6 */
+ 14941 "00010111" // /* MW 5 */
+ 14942 "00111111" // /* MW 4 */
+ 14943 "10000000" // /* MW 3 */
+ 14944 "00000010" // /* MW 2 */
+ 14945 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 52
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14946 "10111010" // LDA r27, [p1], #-4; MOVXM p0, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14947 "00010000" // /* MW 9 */
+ 14948 "00101110" // /* MW 8 */
+ 14949 "00110010" // /* MW 7 */
+ 14950 "11110000" // /* MW 6 */
+ 14951 "00000001" // /* MW 5 */
+ 14952 "00000000" // /* MW 4 */
+ 14953 "11010000" // /* MW 3 */
+ 14954 "11101110" // /* MW 2 */
+ 14955 "00111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 14956 "10011000" // LDA r18, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14957 "01010110" // /* MW 3 */
+ 14958 "11111110" // /* MW 2 */
+ 14959 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 14960 "10011000" // LDA r19, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14961 "01110110" // /* MW 3 */
+ 14962 "11111110" // /* MW 2 */
+ 14963 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 47 first
+ 14964 "10011000" // LDA r20, [p1, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14965 "10010110" // /* MW 3 */
+ 14966 "01010110" // /* MW 2 */
+ 14967 "00000001" // /* MW 1 */
+ 14968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14969 "00000000" // /* MW 1 */
+ 14970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14971 "00000000" // /* MW 1 */
+ 14972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14973 "00000000" // /* MW 1 */
+ 14974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14975 "00000000" // /* MW 1 */
+ 14976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14977 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 14978 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14979 "00100010" // /* MW 3 */
+ 14980 "11100101" // /* MW 2 */
+ 14981 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50
+.src_ref 7 "superkernels.cpp" 630 3
+.src_ref 1 "io_buffer_main.h" 218 20
+ 14982 "01011100" // ST r18, [p1]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14983 "00001010" // /* MW 5 */
+ 14984 "01000000" // /* MW 4 */
+ 14985 "00110000" // /* MW 3 */
+ 14986 "11001010" // /* MW 2 */
+ 14987 "00100000" // /* MW 1 */
+ 14988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14989 "00000000" // /* MW 1 */
+ 14990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14991 "00000000" // /* MW 1 */
+ 14992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14993 "00000000" // /* MW 1 */
+ 14994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14995 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14996 "00011000" // ACQ r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14997 "00011000" // /* MW 3 */
+ 14998 "00010011" // /* MW 2 */
+ 14999 "00010101" // /* MW 1 */
+ 15000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15001 "00000000" // /* MW 1 */
+ 15002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15003 "00000000" // /* MW 1 */
+ 15004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15005 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 52 first
+ 15006 "10011000" // LDA r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15007 "01110110" // /* MW 3 */
+ 15008 "00000110" // /* MW 2 */
+ 15009 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 629 34 first
+ 15010 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15011 "01010110" // /* MW 3 */
+ 15012 "00000010" // /* MW 2 */
+ 15013 "00000111" // /* MW 1 */
+ 15014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15015 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 15016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15017 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 15018 "10011000" // LDA p0, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15019 "00011110" // /* MW 3 */
+ 15020 "01011100" // /* MW 2 */
+ 15021 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15023 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15025 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50 first
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 32
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15026 "10111010" // LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15027 "01111000" // /* MW 9 */
+ 15028 "01100000" // /* MW 8 */
+ 15029 "00110001" // /* MW 7 */
+ 15030 "01101100" // /* MW 6 */
+ 15031 "00111000" // /* MW 5 */
+ 15032 "00100111" // /* MW 4 */
+ 15033 "11010000" // /* MW 3 */
+ 15034 "11000110" // /* MW 2 */
+ 15035 "00101001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15036 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15037 "00000111" // /* MW 3 */
+ 15038 "10100001" // /* MW 2 */
+ 15039 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15040 "10000100" // JNZ r16, #15120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15120 delay_slots=5 */
+ 15041 "00000001" // /* MW 5 */
+ 15042 "01000000" // /* MW 4 */
+ 15043 "10001000" // /* MW 3 */
+ 15044 "00011101" // /* MW 2 */
+ 15045 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 32
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 15046 "00011000" // MOVS p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15047 "10001011" // /* MW 3 */
+ 15048 "10000000" // /* MW 2 */
+ 15049 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15051 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15053 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15055 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50 first
+.delay_slot
+ 15056 "00000010" // ST p1, [sp, #-16]; ADD.NC p1, r19, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 15057 "10100000" // /* MW 7 */
+ 15058 "11100010" // /* MW 6 */
+ 15059 "10110100" // /* MW 5 */
+ 15060 "00000000" // /* MW 4 */
+ 15061 "10110000" // /* MW 3 */
+ 15062 "00010011" // /* MW 2 */
+ 15063 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3 first
+ 15064 "10000100" // JNZ r18, #15152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15152 delay_slots=5 */
+ 15065 "00000001" // /* MW 5 */
+ 15066 "01000000" // /* MW 4 */
+ 15067 "10011000" // /* MW 3 */
+ 15068 "00011101" // /* MW 2 */
+ 15069 "10010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15073 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15075 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15077 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15079 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8 first
+.no_stack_arguments
+ 15080 "00000100" // JL #11248 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11248 delay_slots=5 */
+ 15081 "00000001" // /* MW 5 */
+ 15082 "00000000" // /* MW 4 */
+ 15083 "11111000" // /* MW 3 */
+ 15084 "00010101" // /* MW 2 */
+ 15085 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8
+.delay_slot
+ 15086 "01000100" // MOVXM p3, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15087 "10000000" // /* MW 5 */
+ 15088 "11001010" // /* MW 4 */
+ 15089 "11000110" // /* MW 3 */
+ 15090 "00000111" // /* MW 2 */
+ 15091 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15093 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15095 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15097 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8
+.delay_slot
+ 15098 "11010100" // NOPA; MOV p2, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15099 "10000001" // /* MW 5 */
+ 15100 "11000001" // /* MW 4 */
+ 15101 "11110100" // /* MW 3 */
+ 15102 "00101100" // /* MW 2 */
+ 15103 "00000000" // /* MW 1 */
+.return_address
+ 15104 "10000100" // J #15152 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15152 delay_slots=5 */
+ 15105 "00000000" // /* MW 5 */
+ 15106 "00000000" // /* MW 4 */
+ 15107 "10011000" // /* MW 3 */
+ 15108 "00011101" // /* MW 2 */
+ 15109 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15111 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15113 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15115 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15117 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15119 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880
+.src_ref 7 "superkernels.cpp" 637 8 first
+.no_stack_arguments
+ 15120 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */
+ 15121 "00000001" // /* MW 5 */
+ 15122 "00000000" // /* MW 4 */
+ 15123 "01011000" // /* MW 3 */
+ 15124 "00010110" // /* MW 2 */
+ 15125 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 637 8
+.delay_slot
+ 15126 "01000100" // MOVXM p3, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15127 "00000000" // /* MW 5 */
+ 15128 "11001011" // /* MW 4 */
+ 15129 "11000110" // /* MW 3 */
+ 15130 "00000111" // /* MW 2 */
+ 15131 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 637 8
+.delay_slot
+ 15132 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15133 "11000000" // /* MW 3 */
+ 15134 "01100000" // /* MW 2 */
+ 15135 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15137 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15139 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15140 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 15141 "10000001" // /* MW 11 */
+ 15142 "10101101" // /* MW 10 */
+ 15143 "00000000" // /* MW 9 */
+ 15144 "00000000" // /* MW 8 */
+ 15145 "00000000" // /* MW 7 */
+ 15146 "00000000" // /* MW 6 */
+ 15147 "00100000" // /* MW 5 */
+ 15148 "00000000" // /* MW 4 */
+ 15149 "11110000" // /* MW 3 */
+ 15150 "00101100" // /* MW 2 */
+ 15151 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.return_address
+ 15152 "00011000" // LDA p1, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15153 "10011001" // /* MW 3 */
+ 15154 "11110000" // /* MW 2 */
+ 15155 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+ 15156 "00101100" // LDA p0, [sp, #-12]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15157 "00001010" // /* MW 5 */
+ 15158 "01000100" // /* MW 4 */
+ 15159 "00100000" // /* MW 3 */
+ 15160 "10000011" // /* MW 2 */
+ 15161 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15
+.src_ref 1 "io_buffer_main.h" 324 32 first
+ 15162 "10111010" // LDA r16, [p7, #16]; MOVXM p7, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15163 "00010000" // /* MW 9 */
+ 15164 "00100100" // /* MW 8 */
+ 15165 "10110010" // /* MW 7 */
+ 15166 "11110011" // /* MW 6 */
+ 15167 "00000001" // /* MW 5 */
+ 15168 "00000000" // /* MW 4 */
+ 15169 "11010000" // /* MW 3 */
+ 15170 "11000010" // /* MW 2 */
+ 15171 "11101000" // /* MW 1 */
+ 15172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15173 "00000000" // /* MW 1 */
+ 15174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15175 "00000000" // /* MW 1 */
+ 15176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15177 "00000000" // /* MW 1 */
+ 15178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15179 "00000000" // /* MW 1 */
+ 15180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15181 "00000000" // /* MW 1 */
+ 15182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15183 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 15184 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15185 "00011000" // /* MW 3 */
+ 15186 "00010001" // /* MW 2 */
+ 15187 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 15188 "10011000" // LDA r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15189 "01010110" // /* MW 3 */
+ 15190 "11110110" // /* MW 2 */
+ 15191 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 15192 "10011000" // LDA r16, [p0, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15193 "00010110" // /* MW 3 */
+ 15194 "01010110" // /* MW 2 */
+ 15195 "00000000" // /* MW 1 */
+ 15196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15197 "00000000" // /* MW 1 */
+ 15198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15199 "00000000" // /* MW 1 */
+ 15200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15201 "00000000" // /* MW 1 */
+ 15202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15203 "00000000" // /* MW 1 */
+ 15204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15205 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 15206 "10011000" // SUB r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15207 "00100001" // /* MW 3 */
+ 15208 "01100101" // /* MW 2 */
+ 15209 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 15210 "10011000" // ST r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15211 "01010001" // /* MW 3 */
+ 15212 "11110110" // /* MW 2 */
+ 15213 "00001001" // /* MW 1 */
+ 15214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15215 "00000000" // /* MW 1 */
+ 15216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15217 "00000000" // /* MW 1 */
+ 15218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15219 "00000000" // /* MW 1 */
+ 15220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15221 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 15222 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15223 "00011000" // /* MW 3 */
+ 15224 "00010001" // /* MW 2 */
+ 15225 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 15226 "10011000" // LDA r18, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15227 "01010110" // /* MW 3 */
+ 15228 "11100110" // /* MW 2 */
+ 15229 "00000110" // /* MW 1 */
+ 15230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15231 "00000000" // /* MW 1 */
+ 15232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15233 "00000000" // /* MW 1 */
+ 15234 "10000100" // J #15280 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15280 delay_slots=5 */
+ 15235 "00000000" // /* MW 5 */
+ 15236 "00000000" // /* MW 4 */
+ 15237 "11011000" // /* MW 3 */
+ 15238 "00011101" // /* MW 2 */
+ 15239 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15241 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15243 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15
+.src_ref 7 "superkernels.cpp" 649 14
+.delay_slot
+ 15244 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15245 "00000001" // /* MW 3 */
+ 15246 "00100000" // /* MW 2 */
+ 15247 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15 first
+.src_ref 1 "io_buffer_main.h" 327 32
+.delay_slot
+ 15248 "01011100" // ST r16, [p7]; SUB r17, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15249 "01000011" // /* MW 5 */
+ 15250 "11000110" // /* MW 4 */
+ 15251 "00111000" // /* MW 3 */
+ 15252 "11000010" // /* MW 2 */
+ 15253 "11100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28 first
+.delay_slot
+ 15254 "01111010" // NOPA; ST r17, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15255 "00000000" // /* MW 9 */
+ 15256 "00000000" // /* MW 8 */
+ 15257 "00000000" // /* MW 7 */
+ 15258 "10000000" // /* MW 6 */
+ 15259 "00110001" // /* MW 5 */
+ 15260 "11100110" // /* MW 4 */
+ 15261 "11110110" // /* MW 3 */
+ 15262 "00101100" // /* MW 2 */
+ 15263 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024
+.src_ref 7 "superkernels.cpp" 649 14
+ 15264 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 15265 "00000000" // /* MW 15 */
+ 15266 "00000000" // /* MW 14 */
+ 15267 "01111000" // /* MW 13 */
+ 15268 "10100101" // /* MW 12 */
+ 15269 "00000001" // /* MW 11 */
+ 15270 "00001000" // /* MW 10 */
+ 15271 "00000000" // /* MW 9 */
+ 15272 "00000001" // /* MW 8 */
+ 15273 "01011011" // /* MW 7 */
+ 15274 "00000001" // /* MW 6 */
+ 15275 "00100000" // /* MW 5 */
+ 15276 "00000000" // /* MW 4 */
+ 15277 "11110000" // /* MW 3 */
+ 15278 "00101100" // /* MW 2 */
+ 15279 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040
+.src_ref 7 "superkernels.cpp" 648 19
+.src_ref 7 "superkernels.cpp" 651
+ 15280 "10111010" // LDA lr, [sp, #-20]; MOVXM p7, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15281 "00010000" // /* MW 9 */
+ 15282 "00110000" // /* MW 8 */
+ 15283 "10110010" // /* MW 7 */
+ 15284 "11110011" // /* MW 6 */
+ 15285 "00000001" // /* MW 5 */
+ 15286 "00000000" // /* MW 4 */
+ 15287 "00100000" // /* MW 3 */
+ 15288 "10000111" // /* MW 2 */
+ 15289 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+.src_ref 7 "superkernels.cpp" 648 19 first
+.src_ref 7 "superkernels.cpp" 649 14
+ 15290 "10111010" // LDA r18, [p7]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15291 "00010000" // /* MW 9 */
+ 15292 "00100000" // /* MW 8 */
+ 15293 "00110010" // /* MW 7 */
+ 15294 "11110011" // /* MW 6 */
+ 15295 "00000001" // /* MW 5 */
+ 15296 "00000000" // /* MW 4 */
+ 15297 "11010000" // /* MW 3 */
+ 15298 "11001010" // /* MW 2 */
+ 15299 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+ 15300 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15301 "00110110" // /* MW 3 */
+ 15302 "00000110" // /* MW 2 */
+ 15303 "00000110" // /* MW 1 */
+ 15304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15305 "00000000" // /* MW 1 */
+ 15306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15307 "00000000" // /* MW 1 */
+ 15308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15309 "00000000" // /* MW 1 */
+ 15310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15311 "00000000" // /* MW 1 */
+ 15312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15313 "00000000" // /* MW 1 */
+ 15314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15315 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 16
+ 15316 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15317 "00101000" // /* MW 3 */
+ 15318 "01100011" // /* MW 2 */
+ 15319 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+ 15320 "10000100" // JNZ r17, #15344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15344 delay_slots=5 */
+ 15321 "00000001" // /* MW 5 */
+ 15322 "01000000" // /* MW 4 */
+ 15323 "11111000" // /* MW 3 */
+ 15324 "00011101" // /* MW 2 */
+ 15325 "10001000" // /* MW 1 */
+.delay_slot
+ 15326 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15327 "10011001" // /* MW 3 */
+ 15328 "11111011" // /* MW 2 */
+ 15329 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15331 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15333 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15335 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15337 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 649 14 first
+ 15338 "00001100" // NOPA; ST r16, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15339 "00100011" // /* MW 5 */
+ 15340 "00001100" // /* MW 4 */
+ 15341 "11111100" // /* MW 3 */
+ 15342 "00101100" // /* MW 2 */
+ 15343 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104
+ 15344 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15345 "00011001" // /* MW 3 */
+ 15346 "11111111" // /* MW 2 */
+ 15347 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 651 first
+ 15348 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 15349 "00000000" // /* MW 3 */
+ 15350 "00101000" // /* MW 2 */
+ 15351 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 651
+.delay_slot
+ 15352 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15353 "00000001" // /* MW 5 */
+ 15354 "00000000" // /* MW 4 */
+ 15355 "00000000" // /* MW 3 */
+ 15356 "11111000" // /* MW 2 */
+ 15357 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15359 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15361 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15363 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0
+ 15365 "00000000" // /* MW 1 */
+.label __Z13_b896_wrapperPPv___func_begin0
+.label _Z13_b896_wrapperPPv
+.function _b896_wrapper _Z13_b896_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 21 first
+.src_ref 0 "0_0_reloadable5.cc" 23 79
+.function_start
+ 15376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15377 "11000000" // /* MW 3 */
+ 15378 "01100000" // /* MW 2 */
+ 15379 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 23 79 first
+ 15380 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15381 "00011110" // /* MW 3 */
+ 15382 "00011100" // /* MW 2 */
+ 15383 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 24 79 first
+ 15384 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15385 "10011110" // /* MW 3 */
+ 15386 "00101100" // /* MW 2 */
+ 15387 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 26 81 first
+ 15388 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15389 "10011110" // /* MW 3 */
+ 15390 "11110101" // /* MW 2 */
+ 15391 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 25 47 first
+ 15392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15393 "00011110" // /* MW 3 */
+ 15394 "00000101" // /* MW 2 */
+ 15395 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 22 4 first
+.tail_call
+ 15396 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */
+ 15397 "00000000" // /* MW 5 */
+ 15398 "00000000" // /* MW 4 */
+ 15399 "01110000" // /* MW 3 */
+ 15400 "00001101" // /* MW 2 */
+ 15401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b896_wrapperPPv__end
+.label __Z13_b896_wrapperPPv___func_end0
+ 15411 "00000000" // /* MW 1 */
+.label __Z13_b901_wrapperPPv___func_begin0
+.label _Z13_b901_wrapperPPv
+.function _b901_wrapper _Z13_b901_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 30 first
+.src_ref 0 "0_0_reloadable5.cc" 32 79
+.function_start
+ 15424 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15425 "11000000" // /* MW 3 */
+ 15426 "01100000" // /* MW 2 */
+ 15427 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 32 79 first
+ 15428 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15429 "00011110" // /* MW 3 */
+ 15430 "00101100" // /* MW 2 */
+ 15431 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 34 81 first
+ 15432 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15433 "00011110" // /* MW 3 */
+ 15434 "11110101" // /* MW 2 */
+ 15435 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 33 47 first
+ 15436 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15437 "10011110" // /* MW 3 */
+ 15438 "00000100" // /* MW 2 */
+ 15439 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 31 4 first
+.tail_call
+ 15440 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */
+ 15441 "00000000" // /* MW 5 */
+ 15442 "00000000" // /* MW 4 */
+ 15443 "00011000" // /* MW 3 */
+ 15444 "00010000" // /* MW 2 */
+ 15445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15451 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b901_wrapperPPv__end
+.label __Z13_b901_wrapperPPv___func_end0
+ 15455 "00000000" // /* MW 1 */
+.label __Z13_b906_wrapperPPv___func_begin0
+.label _Z13_b906_wrapperPPv
+.function _b906_wrapper _Z13_b906_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 38 first
+.src_ref 0 "0_0_reloadable5.cc" 40 79
+.function_start
+ 15456 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15457 "11000000" // /* MW 3 */
+ 15458 "01100000" // /* MW 2 */
+ 15459 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 40 79 first
+ 15460 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15461 "00011110" // /* MW 3 */
+ 15462 "00101100" // /* MW 2 */
+ 15463 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 42 81 first
+ 15464 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15465 "00011110" // /* MW 3 */
+ 15466 "11110101" // /* MW 2 */
+ 15467 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 41 47 first
+ 15468 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15469 "10011110" // /* MW 3 */
+ 15470 "00000100" // /* MW 2 */
+ 15471 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 39 4 first
+.tail_call
+ 15472 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */
+ 15473 "00000000" // /* MW 5 */
+ 15474 "00000000" // /* MW 4 */
+ 15475 "11001000" // /* MW 3 */
+ 15476 "00010001" // /* MW 2 */
+ 15477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15483 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15485 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b906_wrapperPPv__end
+.label __Z13_b906_wrapperPPv___func_end0
+ 15487 "00000000" // /* MW 1 */
+.label __Z13_b881_wrapperPPv___func_begin0
+.label _Z13_b881_wrapperPPv
+.function _b881_wrapper _Z13_b881_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 46 first
+.src_ref 0 "0_0_reloadable5.cc" 48 79
+.function_start
+ 15488 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15489 "11000000" // /* MW 3 */
+ 15490 "01100000" // /* MW 2 */
+ 15491 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 48 79 first
+ 15492 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15493 "00011110" // /* MW 3 */
+ 15494 "00101100" // /* MW 2 */
+ 15495 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 50 81 first
+ 15496 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15497 "00011110" // /* MW 3 */
+ 15498 "11110101" // /* MW 2 */
+ 15499 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 49 47 first
+ 15500 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15501 "10011110" // /* MW 3 */
+ 15502 "00000100" // /* MW 2 */
+ 15503 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 47 4 first
+.tail_call
+ 15504 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */
+ 15505 "00000000" // /* MW 5 */
+ 15506 "00000000" // /* MW 4 */
+ 15507 "10001000" // /* MW 3 */
+ 15508 "00010100" // /* MW 2 */
+ 15509 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15511 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15513 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15515 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15517 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b881_wrapperPPv__end
+.label __Z13_b881_wrapperPPv___func_end0
+ 15519 "00000000" // /* MW 1 */
+.label __Z13_b891_wrapperPPv___func_begin0
+.label _Z13_b891_wrapperPPv
+.function _b891_wrapper _Z13_b891_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 54 first
+.src_ref 0 "0_0_reloadable5.cc" 56 79
+.function_start
+ 15520 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15521 "11000000" // /* MW 3 */
+ 15522 "01100000" // /* MW 2 */
+ 15523 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 56 79 first
+ 15524 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15525 "00011110" // /* MW 3 */
+ 15526 "00111100" // /* MW 2 */
+ 15527 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 57 47 first
+ 15528 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15529 "10011110" // /* MW 3 */
+ 15530 "11101100" // /* MW 2 */
+ 15531 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 59 81 first
+ 15532 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15533 "10011110" // /* MW 3 */
+ 15534 "00010101" // /* MW 2 */
+ 15535 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 58 80 first
+ 15536 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15537 "00011110" // /* MW 3 */
+ 15538 "00000101" // /* MW 2 */
+ 15539 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 55 4 first
+.tail_call
+ 15540 "10000100" // J #11744 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */
+ 15541 "00000000" // /* MW 5 */
+ 15542 "00000000" // /* MW 4 */
+ 15543 "11110000" // /* MW 3 */
+ 15544 "00010110" // /* MW 2 */
+ 15545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15549 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15551 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15553 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b891_wrapperPPv__end
+.label __Z13_b891_wrapperPPv___func_end0
+ 15555 "00000000" // /* MW 1 */
+.label __Z13_b924_wrapperPPv___func_begin0
+.label _Z13_b924_wrapperPPv
+.function _b924_wrapper _Z13_b924_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 63 first
+.src_ref 0 "0_0_reloadable5.cc" 65 79
+.function_start
+ 15568 "11111000" // MOV p3, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15569 "11000000" // /* MW 3 */
+ 15570 "01100000" // /* MW 2 */
+ 15571 "00011011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 65 79 first
+ 15572 "10011000" // LDA p0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15573 "00011110" // /* MW 3 */
+ 15574 "00011100" // /* MW 2 */
+ 15575 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 66 79 first
+ 15576 "10011000" // LDA p1, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15577 "10011110" // /* MW 3 */
+ 15578 "00011100" // /* MW 2 */
+ 15579 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 67 80 first
+ 15580 "10011000" // LDA p2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15581 "00011110" // /* MW 3 */
+ 15582 "00101101" // /* MW 2 */
+ 15583 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 69 81 first
+ 15584 "10011000" // LDA p4, [p3, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15585 "00011110" // /* MW 3 */
+ 15586 "11110110" // /* MW 2 */
+ 15587 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 68 47 first
+ 15588 "10011000" // LDA p3, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15589 "10011110" // /* MW 3 */
+ 15590 "00000101" // /* MW 2 */
+ 15591 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 64 4 first
+.tail_call
+ 15592 "10000100" // J #14240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=14240 delay_slots=5 */
+ 15593 "00000000" // /* MW 5 */
+ 15594 "00000000" // /* MW 4 */
+ 15595 "11010000" // /* MW 3 */
+ 15596 "00011011" // /* MW 2 */
+ 15597 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15599 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15601 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15603 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15605 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b924_wrapperPPv__end
+.label __Z13_b924_wrapperPPv___func_end0
+ 15607 "00000000" // /* MW 1 */
+.label __Z13_b919_wrapperPPv___func_begin0
+.label _Z13_b919_wrapperPPv
+.function _b919_wrapper _Z13_b919_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 73 first
+.src_ref 0 "0_0_reloadable5.cc" 75 79
+.function_start
+ 15616 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15617 "11000000" // /* MW 3 */
+ 15618 "01100000" // /* MW 2 */
+ 15619 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 75 79 first
+ 15620 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15621 "00011110" // /* MW 3 */
+ 15622 "00011100" // /* MW 2 */
+ 15623 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 76 79 first
+ 15624 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15625 "10011110" // /* MW 3 */
+ 15626 "00101100" // /* MW 2 */
+ 15627 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 78 81 first
+ 15628 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15629 "10011110" // /* MW 3 */
+ 15630 "11110101" // /* MW 2 */
+ 15631 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 77 47 first
+ 15632 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15633 "00011110" // /* MW 3 */
+ 15634 "00000101" // /* MW 2 */
+ 15635 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 74 4 first
+.tail_call
+ 15636 "10000100" // J #13760 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13760 delay_slots=5 */
+ 15637 "00000000" // /* MW 5 */
+ 15638 "00000000" // /* MW 4 */
+ 15639 "11100000" // /* MW 3 */
+ 15640 "00011010" // /* MW 2 */
+ 15641 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15643 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15647 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15649 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b919_wrapperPPv__end
+.label __Z13_b919_wrapperPPv___func_end0
+ 15651 "00000000" // /* MW 1 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 115 4 first
+.function_start
+ 15664 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15665 "01000001" // /* MW 5 */
+ 15666 "10100000" // /* MW 4 */
+ 15667 "00101111" // /* MW 3 */
+ 15668 "11000000" // /* MW 2 */
+ 15669 "00000000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15670 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15671 "00011100" // /* MW 3 */
+ 15672 "11000110" // /* MW 2 */
+ 15673 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15674 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15675 "00011100" // /* MW 3 */
+ 15676 "11000110" // /* MW 2 */
+ 15677 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15678 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15679 "00011100" // /* MW 3 */
+ 15680 "11000110" // /* MW 2 */
+ 15681 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15682 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15683 "00011100" // /* MW 3 */
+ 15684 "11000110" // /* MW 2 */
+ 15685 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15686 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15687 "00011100" // /* MW 3 */
+ 15688 "11000110" // /* MW 2 */
+ 15689 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15690 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15691 "00011100" // /* MW 3 */
+ 15692 "11000110" // /* MW 2 */
+ 15693 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15694 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15695 "00011100" // /* MW 3 */
+ 15696 "11000110" // /* MW 2 */
+ 15697 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15698 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15699 "00011100" // /* MW 3 */
+ 15700 "11000110" // /* MW 2 */
+ 15701 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15702 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15703 "00011100" // /* MW 3 */
+ 15704 "11000110" // /* MW 2 */
+ 15705 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15706 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15707 "00011100" // /* MW 3 */
+ 15708 "11000110" // /* MW 2 */
+ 15709 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15710 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15711 "00011100" // /* MW 3 */
+ 15712 "11000110" // /* MW 2 */
+ 15713 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15714 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15715 "00011100" // /* MW 3 */
+ 15716 "11000110" // /* MW 2 */
+ 15717 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15718 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15719 "00011100" // /* MW 3 */
+ 15720 "11000110" // /* MW 2 */
+ 15721 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15722 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15723 "00011100" // /* MW 3 */
+ 15724 "11000110" // /* MW 2 */
+ 15725 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15726 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15727 "00011100" // /* MW 3 */
+ 15728 "11000110" // /* MW 2 */
+ 15729 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15730 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15731 "00011100" // /* MW 3 */
+ 15732 "11000110" // /* MW 2 */
+ 15733 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15734 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15735 "00011100" // /* MW 3 */
+ 15736 "11000110" // /* MW 2 */
+ 15737 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15738 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15739 "00011100" // /* MW 3 */
+ 15740 "11000110" // /* MW 2 */
+ 15741 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15742 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15743 "00011100" // /* MW 3 */
+ 15744 "11000110" // /* MW 2 */
+ 15745 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15746 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15747 "00011100" // /* MW 3 */
+ 15748 "11000110" // /* MW 2 */
+ 15749 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15750 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15751 "00011100" // /* MW 3 */
+ 15752 "11000110" // /* MW 2 */
+ 15753 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15754 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15755 "00011100" // /* MW 3 */
+ 15756 "11000110" // /* MW 2 */
+ 15757 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15758 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15759 "00011100" // /* MW 3 */
+ 15760 "11000110" // /* MW 2 */
+ 15761 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15762 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15763 "00011100" // /* MW 3 */
+ 15764 "11000110" // /* MW 2 */
+ 15765 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15766 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15767 "00011100" // /* MW 3 */
+ 15768 "11000110" // /* MW 2 */
+ 15769 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15770 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15771 "00011100" // /* MW 3 */
+ 15772 "11000110" // /* MW 2 */
+ 15773 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15774 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15775 "00011100" // /* MW 3 */
+ 15776 "11000110" // /* MW 2 */
+ 15777 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15778 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15779 "00011100" // /* MW 3 */
+ 15780 "11000110" // /* MW 2 */
+ 15781 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 119 first
+ 15782 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 15783 "00000000" // /* MW 3 */
+ 15784 "00101000" // /* MW 2 */
+ 15785 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19 first
+.delay_slot
+ 15786 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15787 "00011100" // /* MW 3 */
+ 15788 "11000110" // /* MW 2 */
+ 15789 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15790 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15791 "00011100" // /* MW 3 */
+ 15792 "11000110" // /* MW 2 */
+ 15793 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15794 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15795 "00011100" // /* MW 3 */
+ 15796 "11000110" // /* MW 2 */
+ 15797 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15798 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15799 "00011100" // /* MW 3 */
+ 15800 "11000110" // /* MW 2 */
+ 15801 "00010000" // /* MW 1 */
+.delay_slot
+ 15802 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15803 "10100000" // /* MW 3 */
+ 15804 "10011111" // /* MW 2 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+ 15805 "00011000" // /* MW 1 */
+.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src"
+.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer"
+.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv"
+.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common"
+.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2"
+.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p"
+.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib"
+.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend"
+.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc"
+.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail"
+.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib"
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaa1644fb33f11a55e17a2e7f02cedec89cc05c6
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/Release/3_3_reloadable11.txt
@@ -0,0 +1,5263 @@
+Contents of the .debug_line section:
+
+sigmoid_carf_templated_lut.h:
+File name Line number Starting address View Stmt
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 205 0x2580 x
+elementwise_binary_shared.h 211 0x2580 1 x
+elementwise_binary_shared.h 216 0x2580 2
+elementwise_binary_shared.h 216 0x2580 3
+elementwise_binary_shared.h 216 0x258a
+elementwise_binary_shared.h 211 0x2598 x
+elementwise_binary_shared.h 212 0x259c x
+elementwise_binary_shared.h 212 0x25ac
+elementwise_binary_shared.h 213 0x25b0 x
+elementwise_binary_shared.h 213 0x25c0
+elementwise_binary_shared.h 214 0x25c4 x
+elementwise_binary_shared.h 214 0x25d4
+elementwise_binary_shared.h 216 0x25d8 x
+elementwise_binary_shared.h 217 0x25dc x
+elementwise_binary_shared.h 216 0x25e0
+elementwise_binary_shared.h 216 0x25e6 x
+elementwise_binary_shared.h 216 0x25ea
+elementwise_binary_shared.h 216 0x25ee
+elementwise_binary_shared.h 107 0x2650 x
+elementwise_binary_shared.h 119 0x2650 1
+elementwise_binary_shared.h 126 0x2650 2
+elementwise_binary_shared.h 131 0x2650 3
+elementwise_binary_shared.h 119 0x2654 x
+elementwise_binary_shared.h 122 0x2658 x
+elementwise_binary_shared.h 124 0x265c x
+elementwise_binary_shared.h 124 0x2668
+elementwise_binary_shared.h 107 0x266c
+elementwise_binary_shared.h 124 0x2672
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 65 0x2676
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 124 0x2676 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 65 0x2680 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 150 0x268c
+elementwise_binary_shared.h 119 0x2692 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x2696 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 126 0x2696 1
+elementwise_binary_shared.h 126 0x2696 2
+elementwise_binary_shared.h 131 0x2696 3
+elementwise_binary_shared.h 131 0x2696 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26a0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 126 0x26a0 1 x
+elementwise_binary_shared.h 131 0x26a0 2 x
+elementwise_binary_shared.h 171 0x26a0 3
+elementwise_binary_shared.h 131 0x26b2
+elementwise_binary_shared.h 131 0x26b2 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26b8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x26b8 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x26b8 2
+elementwise_binary_shared.h 166 0x26bc
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26c8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x26c8 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x26da x
+vector.hpp 1139 0x26e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x26e0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26e4
+vector.hpp 1159 0x26e4 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 171 0x26e4 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26f6
+vector.hpp 1139 0x26f6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x26f6 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x26f6 3
+elementwise_binary_shared.h 173 0x26f6 4
+elementwise_binary_shared.h 150 0x2710
+elementwise_binary_shared.h 150 0x2714 x
+elementwise_binary_shared.h 150 0x2718
+elementwise_binary_shared.h 150 0x271e
+elementwise_binary_shared.h 150 0x2724
+elementwise_binary_shared.h 166 0x2724 1
+elementwise_binary_shared.h 150 0x2730
+elementwise_binary_shared.h 150 0x2740
+elementwise_binary_shared.h 150 0x2740 1
+elementwise_binary_shared.h 150 0x2740 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x274a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x274a 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x274a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x274e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x274e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2752
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 171 0x2752 1
+elementwise_binary_shared.h 150 0x2758
+elementwise_binary_shared.h 150 0x275c
+elementwise_binary_shared.h 150 0x275c 1
+elementwise_binary_shared.h 150 0x2762
+elementwise_binary_shared.h 150 0x2766
+elementwise_binary_shared.h 150 0x276c
+elementwise_binary_shared.h 150 0x2774
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x2784 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x278a x
+vector.hpp 1139 0x2790 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 166 0x2790 1 x
+elementwise_binary_shared.h 166 0x2790 2 x
+elementwise_binary_shared.h 169 0x2790 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x279c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 166 0x279c 1
+elementwise_binary_shared.h 166 0x279c 2
+elementwise_binary_shared.h 171 0x279c 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27a8 x
+vector.hpp 1139 0x27a8 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x27a8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x27a8 3 x
+elementwise_binary_shared.h 173 0x27a8 4 x
+elementwise_binary_shared.h 177 0x27a8 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27b0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 166 0x27b0 1 x
+elementwise_binary_shared.h 171 0x27b0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x27b8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x27b8 2 x
+elementwise_binary_shared.h 166 0x27be x
+elementwise_binary_shared.h 166 0x27c2
+elementwise_binary_shared.h 177 0x27c2 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27ca x
+vector.hpp 1139 0x27ca 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x27ca 2 x
+elementwise_binary_shared.h 171 0x27ca 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27d0
+vector.hpp 1159 0x27d0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x27d0 2 x
+accum.hpp 1110 0x27d0 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x27d0 4 x
+elementwise_binary_shared.h 185 0x27d0 5
+elementwise_binary_shared.h 177 0x27f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2800 x
+vector.hpp 1139 0x2800 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x2800 2 x
+elementwise_binary_shared.h 171 0x2800 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2810
+vector.hpp 1159 0x2810 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x2810 2 x
+accum.hpp 1110 0x2810 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x2810 4 x
+elementwise_binary_shared.h 185 0x2810 5 x
+elementwise_binary_shared.h 177 0x2830 x
+elementwise_binary_shared.h 187 0x2840 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2846 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2846 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 185 0x2846 2 x
+elementwise_binary_shared.h 177 0x284c x
+elementwise_binary_shared.h 187 0x2852 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2856 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2856 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 185 0x2856 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2860
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2860 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 185 0x2860 2
+elementwise_binary_shared.h 205 0x2b00 x
+elementwise_binary_shared.h 211 0x2b00 1 x
+elementwise_binary_shared.h 216 0x2b00 2
+elementwise_binary_shared.h 216 0x2b00 3
+elementwise_binary_shared.h 216 0x2b0a
+elementwise_binary_shared.h 211 0x2b18 x
+elementwise_binary_shared.h 212 0x2b1c x
+elementwise_binary_shared.h 212 0x2b2c
+elementwise_binary_shared.h 213 0x2b30 x
+elementwise_binary_shared.h 213 0x2b40
+elementwise_binary_shared.h 214 0x2b44 x
+elementwise_binary_shared.h 214 0x2b54
+elementwise_binary_shared.h 216 0x2b58 x
+elementwise_binary_shared.h 217 0x2b5c x
+elementwise_binary_shared.h 216 0x2b60
+elementwise_binary_shared.h 216 0x2b66 x
+elementwise_binary_shared.h 216 0x2b6a
+elementwise_binary_shared.h 216 0x2b6e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x32e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 199 0x32e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x32e4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x32e4 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x32ea
+io_buffer_main.h 125 0x32ea 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x32f0 x
+conv2d_dw_bf16.h 221 0x32f4 x
+conv2d_dw_bf16.h 221 0x32f8
+conv2d_dw_bf16.h 221 0x32fc
+conv2d_dw_bf16.h 221 0x3300
+conv2d_dw_bf16.h 221 0x3304
+conv2d_dw_bf16.h 222 0x3308 x
+conv2d_dw_bf16.h 222 0x330c
+conv2d_dw_bf16.h 222 0x3310
+conv2d_dw_bf16.h 222 0x3314
+conv2d_dw_bf16.h 222 0x3318
+conv2d_dw_bf16.h 223 0x331c x
+conv2d_dw_bf16.h 223 0x3320
+conv2d_dw_bf16.h 223 0x3324
+conv2d_dw_bf16.h 223 0x3328
+conv2d_dw_bf16.h 223 0x332c
+conv2d_dw_bf16.h 224 0x3330 x
+conv2d_dw_bf16.h 224 0x3334
+conv2d_dw_bf16.h 224 0x3338
+conv2d_dw_bf16.h 244 0x3338 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3342
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3342 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x3342 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3342 3 x
+conv2d_dw_bf16.h 225 0x3348
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x334c
+aie_core.h 81 0x334c 1
+aie_core.h 100 0x334c 2
+aie_core.h 100 0x334c 3
+aie_core.h 100 0x334c 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x334c 5
+vector.hpp 1139 0x334c 6
+vector.hpp 1139 0x334c 7 x
+vector.hpp 1139 0x334c 8 x
+vector.hpp 1159 0x334c 9
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x334c 10 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x334c 11
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3358
+aie_core.h 81 0x3358 1
+aie_core.h 100 0x3358 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3358 3
+vector.hpp 1139 0x3358 4
+vector.hpp 1159 0x3358 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x3358 6 x
+conv2d_dw_bf16.h 225 0x3358 7 x
+conv2d_dw_bf16.h 244 0x3358 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3366
+aie_core.h 100 0x3366 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3366 2
+vector.hpp 1159 0x3366 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x3366 4
+conv2d_dw_bf16.h 225 0x3366 5
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3370
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3370 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x3370 2
+conv2d_dw_bf16.h 225 0x3370 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x337a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x337a 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x337a 2
+conv2d_dw_bf16.h 244 0x337a 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3384
+shuffle.hpp 142 0x3384 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3384 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x338a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x338a 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x338a 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x3396
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3396 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3396 2 x
+conv2d_dw_bf16.h 250 0x3396 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x33a2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33a2 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x33a2 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x33a8
+conv2d_dw_bf16.h 244 0x33ac
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x33b6
+shuffle.hpp 142 0x33b6 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 268 0x33b6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x33c0
+shuffle.hpp 142 0x33c0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x33c0 2
+conv2d_dw_bf16.h 271 0x33c0 3
+conv2d_dw_bf16.h 272 0x33c0 4
+conv2d_dw_bf16.h 273 0x33c0 5
+conv2d_dw_bf16.h 274 0x33c0 6
+conv2d_dw_bf16.h 275 0x33c0 7
+conv2d_dw_bf16.h 276 0x33c0 8
+conv2d_dw_bf16.h 277 0x33c0 9
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x33d0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x33d0 1
+accum.hpp 1110 0x33d0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 265 0x33d0 3 x
+conv2d_dw_bf16.h 270 0x33d0 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x33e0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x33e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x33e0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x33e0 3 x
+conv2d_dw_bf16.h 274 0x33e0 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x33f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33f0 1 x
+vector.hpp 1139 0x33f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x33f0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33fa
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 266 0x33fa 1 x
+conv2d_dw_bf16.h 271 0x33fa 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x3404 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3404 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3404 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3404 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x340e
+shuffle.hpp 142 0x3412
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 272 0x3412 1 x
+conv2d_dw_bf16.h 267 0x341a x
+conv2d_dw_bf16.h 276 0x341a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3422 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 268 0x3426 x
+conv2d_dw_bf16.h 273 0x3426 1 x
+conv2d_dw_bf16.h 265 0x342e x
+conv2d_dw_bf16.h 277 0x342e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3436 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x3440 x
+conv2d_dw_bf16.h 274 0x3450 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3460 x
+aie_core.h 100 0x3460 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3460 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 266 0x346a x
+conv2d_dw_bf16.h 271 0x346a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3472 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3472 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x347a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 267 0x347e x
+conv2d_dw_bf16.h 272 0x347e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3486 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 276 0x3486 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3490 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3490 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3490 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x3496 x
+conv2d_dw_bf16.h 273 0x3496 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x34a0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x34a0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x34a0 2
+conv2d_dw_bf16.h 277 0x34a0 3 x
+conv2d_dw_bf16.h 250 0x34ac x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x34b0 x
+vector.hpp 1139 0x34b4
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x34b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x34b8 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x34bc x
+accum.hpp 1110 0x34c0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x34c4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 286 0x34c8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x34cc x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x34cc 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 285 0x34cc 2 x
+conv2d_dw_bf16.h 268 0x34d4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x34d8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x34d8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x34d8 2
+conv2d_dw_bf16.h 265 0x34e0 x
+conv2d_dw_bf16.h 270 0x34e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x34e8 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 274 0x34e8 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x34f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x34f0 1 x
+vector.hpp 1139 0x34f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x34f0 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x34fa
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 266 0x34fa 1 x
+conv2d_dw_bf16.h 271 0x34fa 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x3504 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3504 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3504 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3504 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x350e
+shuffle.hpp 142 0x3512
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 272 0x3512 1 x
+conv2d_dw_bf16.h 267 0x351a x
+conv2d_dw_bf16.h 276 0x351a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3522 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 268 0x3526 x
+conv2d_dw_bf16.h 273 0x3526 1 x
+conv2d_dw_bf16.h 265 0x352e x
+conv2d_dw_bf16.h 277 0x352e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3536 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x353c x
+conv2d_dw_bf16.h 274 0x3540 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3550 x
+aie_core.h 100 0x3550 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3550 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3550 3 x
+conv2d_dw_bf16.h 266 0x355c x
+conv2d_dw_bf16.h 271 0x355c 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3564 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3564 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x356c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 267 0x3570 x
+conv2d_dw_bf16.h 272 0x3570 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3578 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 276 0x3578 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3580
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 273 0x3584 x
+conv2d_dw_bf16.h 277 0x3588 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x3594 x
+accum.hpp 1110 0x3598
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 290 0x3598 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x359e x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 286 0x35a2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x35a6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x35aa x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 285 0x35aa 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x35ae x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x35ae 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 432 0xac0 x
+conv2d_bf16_params.h 438 0xac0 1 x
+conv2d_bf16_params.h 452 0xac0 2
+conv2d_bf16_params.h 453 0xac0 3
+conv2d_bf16_params.h 458 0xac0 4
+conv2d_bf16_params.h 470 0xac0 5
+conv2d_bf16_params.h 438 0xaca
+conv2d_bf16_params.h 438 0xaca 1 x
+conv2d_bf16_params.h 452 0xaca 2
+conv2d_bf16_params.h 462 0xaca 3
+conv2d_bf16_params.h 432 0xad4
+conv2d_bf16_params.h 444 0xad4 1
+conv2d_bf16_params.h 453 0xade
+conv2d_bf16_params.h 458 0xade 1
+conv2d_bf16_params.h 458 0xade 2
+conv2d_bf16_params.h 444 0xaea
+conv2d_bf16_params.h 470 0xaea 1
+conv2d_bf16_params.h 477 0xaea 2
+conv2d_bf16_params.h 557 0xaea 3
+conv2d_bf16_params.h 452 0xaf6
+conv2d_bf16_params.h 458 0xaf6 1
+conv2d_bf16_params.h 462 0xaf6 2
+conv2d_bf16_params.h 438 0xafe
+conv2d_bf16_params.h 438 0xb02
+conv2d_bf16_params.h 438 0xb06
+conv2d_bf16_params.h 438 0xb0a
+conv2d_bf16_params.h 438 0xb18
+conv2d_bf16_params.h 438 0xb1c
+conv2d_bf16_params.h 438 0xb20
+conv2d_bf16_params.h 438 0xb24
+conv2d_bf16_params.h 438 0xb32
+conv2d_bf16_params.h 438 0xb36
+conv2d_bf16_params.h 438 0xb3a
+conv2d_bf16_params.h 438 0xb3e
+conv2d_bf16_params.h 438 0xb4c
+conv2d_bf16_params.h 438 0xb50
+conv2d_bf16_params.h 444 0xb54 x
+conv2d_bf16_params.h 447 0xb58 x
+conv2d_bf16_params.h 448 0xb5c x
+conv2d_bf16_params.h 452 0xb60 x
+conv2d_bf16_params.h 453 0xb64 x
+conv2d_bf16_params.h 458 0xb68 x
+conv2d_bf16_params.h 444 0xb6e x
+conv2d_bf16_params.h 458 0xb72 x
+conv2d_bf16_params.h 462 0xb72 1 x
+conv2d_bf16_params.h 462 0xb78
+conv2d_bf16_params.h 452 0xb7c x
+conv2d_bf16_params.h 452 0xb80
+conv2d_bf16_params.h 462 0xb80 1 x
+conv2d_bf16_params.h 557 0xb80 2
+conv2d_bf16_params.h 462 0xb86
+conv2d_bf16_params.h 458 0xb8a x
+conv2d_bf16_params.h 458 0xb8e
+conv2d_bf16_params.h 458 0xb92
+conv2d_bf16_params.h 477 0xb92 1
+conv2d_bf16_params.h 557 0xb92 2 x
+conv2d_bf16_params.h 458 0xb98 x
+conv2d_bf16_params.h 458 0xb9e
+conv2d_bf16_params.h 477 0xb9e 1 x
+conv2d_bf16_params.h 458 0xba4 x
+conv2d_bf16_params.h 444 0xba8 x
+conv2d_bf16_params.h 462 0xbac x
+conv2d_bf16_params.h 470 0xbb0 x
+conv2d_bf16_params.h 470 0xbb4
+conv2d_bf16_params.h 477 0xbb4 1 x
+conv2d_bf16_params.h 477 0xbb8
+conv2d_bf16_params.h 491 0xbc8
+conv2d_bf16_params.h 492 0xbc8 1
+conv2d_bf16_params.h 495 0xbc8 2
+conv2d_bf16_params.h 502 0xbc8 3
+conv2d_bf16_params.h 533 0xbc8 4
+conv2d_bf16_params.h 539 0xbc8 5
+conv2d_bf16_params.h 557 0xbc8 6
+conv2d_bf16_params.h 621 0xbc8 7
+conv2d_bf16_params.h 645 0xbc8 8
+conv2d_bf16_params.h 709 0xbc8 9
+conv2d_bf16_params.h 477 0xbd2
+conv2d_bf16_params.h 481 0xbd2 1
+conv2d_bf16_params.h 500 0xbd2 2
+conv2d_bf16_params.h 506 0xbd2 3
+conv2d_bf16_params.h 507 0xbd2 4
+conv2d_bf16_params.h 524 0xbd2 5
+conv2d_bf16_params.h 539 0xbd2 6
+conv2d_bf16_params.h 655 0xbd2 7
+conv2d_bf16_params.h 477 0xbdc
+conv2d_bf16_params.h 504 0xbdc 1
+conv2d_bf16_params.h 510 0xbdc 2
+conv2d_bf16_params.h 520 0xbdc 3
+conv2d_bf16_params.h 700 0xbdc 4
+conv2d_bf16_params.h 477 0xbe2
+conv2d_bf16_params.h 539 0xbe2 1
+conv2d_bf16_params.h 578 0xbe2 2
+conv2d_bf16_params.h 642 0xbe2 3
+conv2d_bf16_params.h 529 0xbe6
+conv2d_bf16_params.h 642 0xbe6 1
+conv2d_bf16_params.h 642 0xbe6 2
+conv2d_bf16_params.h 655 0xbea
+conv2d_bf16_params.h 453 0xbf0
+conv2d_bf16_params.h 453 0xbf0 1
+conv2d_bf16_params.h 477 0xbf0 2
+conv2d_bf16_params.h 504 0xbf0 3
+conv2d_bf16_params.h 655 0xbf0 4
+conv2d_bf16_params.h 453 0xbfc x
+conv2d_bf16_params.h 477 0xbfc 1
+conv2d_bf16_params.h 481 0xbfc 2
+conv2d_bf16_params.h 500 0xbfc 3
+conv2d_bf16_params.h 506 0xbfc 4
+conv2d_bf16_params.h 507 0xbfc 5
+conv2d_bf16_params.h 524 0xbfc 6
+conv2d_bf16_params.h 539 0xbfc 7
+conv2d_bf16_params.h 491 0xc06
+conv2d_bf16_params.h 492 0xc06 1
+conv2d_bf16_params.h 495 0xc06 2
+conv2d_bf16_params.h 502 0xc06 3
+conv2d_bf16_params.h 510 0xc06 4
+conv2d_bf16_params.h 520 0xc06 5
+conv2d_bf16_params.h 533 0xc06 6
+conv2d_bf16_params.h 539 0xc06 7
+conv2d_bf16_params.h 557 0xc06 8
+conv2d_bf16_params.h 621 0xc06 9
+conv2d_bf16_params.h 645 0xc06 10
+conv2d_bf16_params.h 655 0xc06 11
+conv2d_bf16_params.h 700 0xc06 12
+conv2d_bf16_params.h 709 0xc06 13
+conv2d_bf16_params.h 477 0xc10
+conv2d_bf16_params.h 529 0xc10 1
+conv2d_bf16_params.h 539 0xc10 2
+conv2d_bf16_params.h 578 0xc10 3
+conv2d_bf16_params.h 642 0xc10 4
+conv2d_bf16_params.h 642 0xc10 5
+conv2d_bf16_params.h 642 0xc10 6
+conv2d_bf16_params.h 477 0xc20 x
+conv2d_bf16_params.h 495 0xc20 1 x
+conv2d_bf16_params.h 495 0xc20 2
+conv2d_bf16_params.h 682 0xc20 3
+conv2d_bf16_params.h 477 0xc2a
+conv2d_bf16_params.h 481 0xc2a 1 x
+conv2d_bf16_params.h 495 0xc2a 2
+conv2d_bf16_params.h 495 0xc2a 3
+conv2d_bf16_params.h 477 0xc34 x
+conv2d_bf16_params.h 496 0xc34 1
+conv2d_bf16_params.h 504 0xc34 2
+conv2d_bf16_params.h 539 0xc34 3
+conv2d_bf16_params.h 578 0xc34 4
+conv2d_bf16_params.h 496 0xc3e
+conv2d_bf16_params.h 499 0xc3e 1
+conv2d_bf16_params.h 504 0xc3e 2 x
+conv2d_bf16_params.h 509 0xc3e 3
+conv2d_bf16_params.h 519 0xc3e 4
+conv2d_bf16_params.h 700 0xc3e 5
+conv2d_bf16_params.h 492 0xc48 x
+conv2d_bf16_params.h 497 0xc48 1
+conv2d_bf16_params.h 509 0xc48 2
+conv2d_bf16_params.h 500 0xc52
+conv2d_bf16_params.h 520 0xc52 1 x
+conv2d_bf16_params.h 502 0xc58
+conv2d_bf16_params.h 520 0xc58 1
+conv2d_bf16_params.h 502 0xc62
+conv2d_bf16_params.h 507 0xc62 1 x
+conv2d_bf16_params.h 495 0xc68 x
+conv2d_bf16_params.h 495 0xc6c
+conv2d_bf16_params.h 495 0xc6c 1
+conv2d_bf16_params.h 610 0xc6c 2
+conv2d_bf16_params.h 709 0xc6c 3
+conv2d_bf16_params.h 507 0xc72 x
+conv2d_bf16_params.h 495 0xc76 x
+conv2d_bf16_params.h 495 0xc7a
+conv2d_bf16_params.h 506 0xc7a 1
+conv2d_bf16_params.h 519 0xc7a 2 x
+conv2d_bf16_params.h 496 0xc84 x
+conv2d_bf16_params.h 504 0xc84 1 x
+conv2d_bf16_params.h 522 0xc84 2
+conv2d_bf16_params.h 509 0xc8e x
+conv2d_bf16_params.h 496 0xc94 x
+conv2d_bf16_params.h 520 0xc94 1 x
+conv2d_bf16_params.h 529 0xc94 2
+conv2d_bf16_params.h 497 0xc9e x
+conv2d_bf16_params.h 509 0xc9e 1 x
+conv2d_bf16_params.h 533 0xc9e 2
+conv2d_bf16_params.h 539 0xca8 x
+conv2d_bf16_params.h 499 0xcac x
+conv2d_bf16_params.h 499 0xcb0
+conv2d_bf16_params.h 529 0xcb4 x
+conv2d_bf16_params.h 507 0xcb8 x
+conv2d_bf16_params.h 511 0xcb8 1
+conv2d_bf16_params.h 491 0xcbe x
+conv2d_bf16_params.h 507 0xcbe 1
+conv2d_bf16_params.h 500 0xcc8 x
+conv2d_bf16_params.h 511 0xcc8 1 x
+conv2d_bf16_params.h 500 0xcce
+conv2d_bf16_params.h 534 0xcce 1
+conv2d_bf16_params.h 502 0xcd6 x
+conv2d_bf16_params.h 509 0xcd6 1 x
+conv2d_bf16_params.h 642 0xcd6 2
+conv2d_bf16_params.h 510 0xce2 x
+conv2d_bf16_params.h 506 0xce6 x
+conv2d_bf16_params.h 527 0xcea x
+conv2d_bf16_params.h 502 0xcf4 x
+conv2d_bf16_params.h 502 0xcf8
+conv2d_bf16_params.h 506 0xcfc x
+conv2d_bf16_params.h 506 0xd0c
+conv2d_bf16_params.h 506 0xd10
+conv2d_bf16_params.h 510 0xd14 x
+conv2d_bf16_params.h 510 0xd18
+conv2d_bf16_params.h 510 0xd1e
+conv2d_bf16_params.h 510 0xd22
+conv2d_bf16_params.h 510 0xd28
+conv2d_bf16_params.h 539 0xd28 1
+conv2d_bf16_params.h 642 0xd28 2
+conv2d_bf16_params.h 511 0xd2e x
+conv2d_bf16_params.h 524 0xd2e 1
+conv2d_bf16_params.h 539 0xd2e 2
+conv2d_bf16_params.h 512 0xd34 x
+conv2d_bf16_params.h 524 0xd34 1 x
+conv2d_bf16_params.h 524 0xd3a
+conv2d_bf16_params.h 524 0xd3e
+conv2d_bf16_params.h 520 0xd42 x
+conv2d_bf16_params.h 511 0xd46 x
+conv2d_bf16_params.h 522 0xd46 1 x
+conv2d_bf16_params.h 524 0xd4c x
+conv2d_bf16_params.h 529 0xd4c 1 x
+conv2d_bf16_params.h 539 0xd4c 2 x
+conv2d_bf16_params.h 534 0xd56
+conv2d_bf16_params.h 539 0xd56 1
+conv2d_bf16_params.h 527 0xd5c x
+conv2d_bf16_params.h 533 0xd5c 1 x
+conv2d_bf16_params.h 529 0xd6a x
+conv2d_bf16_params.h 533 0xd6a 1
+conv2d_bf16_params.h 539 0xd70 x
+conv2d_bf16_params.h 529 0xd76 x
+conv2d_bf16_params.h 529 0xd76 1
+conv2d_bf16_params.h 529 0xd7c
+conv2d_bf16_params.h 534 0xd80 x
+conv2d_bf16_params.h 534 0xd84
+conv2d_bf16_params.h 539 0xd84 1 x
+conv2d_bf16_params.h 555 0xd84 2
+conv2d_bf16_params.h 559 0xd84 3
+conv2d_bf16_params.h 700 0xd84 4
+conv2d_bf16_params.h 669 0xd8e
+conv2d_bf16_params.h 700 0xd8e 1
+conv2d_bf16_params.h 539 0xd92
+conv2d_bf16_params.h 539 0xda2
+conv2d_bf16_params.h 539 0xdb2
+conv2d_bf16_params.h 539 0xdb2 1
+conv2d_bf16_params.h 539 0xdb2 2
+conv2d_bf16_params.h 539 0xdb2 3
+conv2d_bf16_params.h 539 0xdbc
+conv2d_bf16_params.h 539 0xdc0
+conv2d_bf16_params.h 539 0xdc4
+conv2d_bf16_params.h 539 0xdc4 1
+conv2d_bf16_params.h 539 0xdca
+conv2d_bf16_params.h 539 0xdce
+conv2d_bf16_params.h 539 0xdd2
+conv2d_bf16_params.h 669 0xdd2 1
+conv2d_bf16_params.h 539 0xdd8
+conv2d_bf16_params.h 539 0xddc
+conv2d_bf16_params.h 539 0xde0
+conv2d_bf16_params.h 539 0xde4
+conv2d_bf16_params.h 555 0xde8 x
+conv2d_bf16_params.h 642 0xdf0
+conv2d_bf16_params.h 669 0xdf0 1
+conv2d_bf16_params.h 669 0xdf0 2
+conv2d_bf16_params.h 669 0xdfa x
+conv2d_bf16_params.h 497 0xdfe x
+conv2d_bf16_params.h 641 0xdfe 1 x
+conv2d_bf16_params.h 645 0xdfe 2
+conv2d_bf16_params.h 559 0xe08 x
+conv2d_bf16_params.h 640 0xe08 1
+conv2d_bf16_params.h 642 0xe08 2
+conv2d_bf16_params.h 642 0xe08 3
+conv2d_bf16_params.h 642 0xe12 x
+conv2d_bf16_params.h 578 0xe16 x
+conv2d_bf16_params.h 640 0xe1a x
+conv2d_bf16_params.h 557 0xe1e
+conv2d_bf16_params.h 645 0xe1e 1
+conv2d_bf16_params.h 641 0xe28 x
+conv2d_bf16_params.h 642 0xe28 1 x
+conv2d_bf16_params.h 642 0xe2e
+conv2d_bf16_params.h 642 0xe2e 1
+conv2d_bf16_params.h 558 0xe32 x
+conv2d_bf16_params.h 645 0xe32 1
+conv2d_bf16_params.h 540 0xe38
+conv2d_bf16_params.h 645 0xe38 1 x
+conv2d_bf16_params.h 540 0xe3e x
+conv2d_bf16_params.h 557 0xe3e 1
+conv2d_bf16_params.h 642 0xe44 x
+conv2d_bf16_params.h 557 0xe48 x
+conv2d_bf16_params.h 655 0xe48 1
+conv2d_bf16_params.h 558 0xe4e
+conv2d_bf16_params.h 655 0xe4e 1 x
+conv2d_bf16_params.h 558 0xe54 x
+conv2d_bf16_params.h 540 0xe58 x
+conv2d_bf16_params.h 655 0xe58 1
+conv2d_bf16_params.h 655 0xe58 2
+conv2d_bf16_params.h 679 0xe58 3
+conv2d_bf16_params.h 655 0xe62 x
+conv2d_bf16_params.h 558 0xe66 x
+conv2d_bf16_params.h 655 0xe66 1
+conv2d_bf16_params.h 655 0xe66 2
+conv2d_bf16_params.h 679 0xe66 3
+conv2d_bf16_params.h 655 0xe70 x
+conv2d_bf16_params.h 126 0xe74 x
+conv2d_bf16_params.h 559 0xe74 1 x
+conv2d_bf16_params.h 669 0xe7a x
+conv2d_bf16_params.h 700 0xe7a 1
+conv2d_bf16_params.h 558 0xe80 x
+conv2d_bf16_params.h 700 0xe86 x
+conv2d_bf16_params.h 578 0xe8a x
+conv2d_bf16_params.h 559 0xe8e x
+conv2d_bf16_params.h 578 0xe92 x
+conv2d_bf16_params.h 610 0xe96 x
+conv2d_bf16_params.h 611 0xe96 1
+conv2d_bf16_params.h 621 0xe96 2
+conv2d_bf16_params.h 621 0xe96 3
+conv2d_bf16_params.h 629 0xe96 4
+conv2d_bf16_params.h 621 0xea2
+conv2d_bf16_params.h 621 0xea2 1 x
+conv2d_bf16_params.h 645 0xea2 2
+conv2d_bf16_params.h 649 0xea2 3
+conv2d_bf16_params.h 645 0xea8
+conv2d_bf16_params.h 554 0xeae x
+conv2d_bf16_params.h 645 0xeae 1 x
+conv2d_bf16_params.h 554 0xeb8
+conv2d_bf16_params.h 555 0xeb8 1
+conv2d_bf16_params.h 555 0xeb8 2 x
+conv2d_bf16_params.h 645 0xeb8 3
+conv2d_bf16_params.h 555 0xec4
+conv2d_bf16_params.h 621 0xec4 1
+conv2d_bf16_params.h 621 0xec4 2 x
+conv2d_bf16_params.h 645 0xec4 3
+conv2d_bf16_params.h 558 0xece x
+conv2d_bf16_params.h 559 0xece 1
+conv2d_bf16_params.h 621 0xece 2
+conv2d_bf16_params.h 621 0xece 3
+conv2d_bf16_params.h 645 0xece 4
+conv2d_bf16_params.h 559 0xeda x
+conv2d_bf16_params.h 621 0xeda 1 x
+conv2d_bf16_params.h 645 0xeda 2 x
+conv2d_bf16_params.h 610 0xee0 x
+conv2d_bf16_params.h 621 0xee0 1
+conv2d_bf16_params.h 655 0xee0 2
+conv2d_bf16_params.h 679 0xee0 3
+conv2d_bf16_params.h 621 0xeec
+conv2d_bf16_params.h 649 0xeec 1
+conv2d_bf16_params.h 655 0xeec 2 x
+conv2d_bf16_params.h 661 0xeec 3
+conv2d_bf16_params.h 127 0xef6 x
+conv2d_bf16_params.h 127 0xef6 1 x
+conv2d_bf16_params.h 621 0xef6 2
+conv2d_bf16_params.h 649 0xef6 3
+conv2d_bf16_params.h 655 0xef6 4
+conv2d_bf16_params.h 679 0xef6 5
+conv2d_bf16_params.h 710 0xef6 6
+conv2d_bf16_params.h 710 0xef6 7
+conv2d_bf16_params.h 655 0xf00 x
+conv2d_bf16_params.h 679 0xf00 1 x
+conv2d_bf16_params.h 621 0xf06 x
+conv2d_bf16_params.h 649 0xf06 1 x
+conv2d_bf16_params.h 655 0xf06 2
+conv2d_bf16_params.h 655 0xf06 3
+conv2d_bf16_params.h 700 0xf06 4
+conv2d_bf16_params.h 700 0xf06 5
+conv2d_bf16_params.h 655 0xf10 x
+conv2d_bf16_params.h 700 0xf10 1 x
+conv2d_bf16_params.h 629 0xf14 x
+conv2d_bf16_params.h 611 0xf18 x
+conv2d_bf16_params.h 643 0xf26 x
+conv2d_bf16_params.h 664 0xf2a
+conv2d_bf16_params.h 621 0xf30 x
+conv2d_bf16_params.h 629 0xf30 1
+conv2d_bf16_params.h 684 0xf30 2 x
+conv2d_bf16_params.h 629 0xf3a x
+conv2d_bf16_params.h 127 0xf40 x
+conv2d_bf16_params.h 644 0xf40 1
+conv2d_bf16_params.h 700 0xf40 2 x
+conv2d_bf16_params.h 705 0xf40 3
+conv2d_bf16_params.h 705 0xf40 4
+conv2d_bf16_params.h 645 0xf4a x
+conv2d_bf16_params.h 700 0xf4a 1
+conv2d_bf16_params.h 700 0xf4a 2
+conv2d_bf16_params.h 705 0xf4a 3
+conv2d_bf16_params.h 644 0xf54
+conv2d_bf16_params.h 649 0xf54 1 x
+conv2d_bf16_params.h 674 0xf54 2
+conv2d_bf16_params.h 644 0xf5e x
+conv2d_bf16_params.h 662 0xf5e 1
+conv2d_bf16_params.h 664 0xf5e 2 x
+conv2d_bf16_params.h 127 0xf68 x
+conv2d_bf16_params.h 663 0xf68 1 x
+conv2d_bf16_params.h 664 0xf68 2
+conv2d_bf16_params.h 126 0xf6e x
+conv2d_bf16_params.h 664 0xf6e 1 x
+conv2d_bf16_params.h 126 0xf74
+conv2d_bf16_params.h 664 0xf74 1
+conv2d_bf16_params.h 127 0xf7a x
+conv2d_bf16_params.h 127 0xf7a 1 x
+conv2d_bf16_params.h 664 0xf7a 2
+conv2d_bf16_params.h 664 0xf7a 3
+conv2d_bf16_params.h 675 0xf7a 4
+conv2d_bf16_params.h 696 0xf7a 5
+conv2d_bf16_params.h 644 0xf84 x
+conv2d_bf16_params.h 664 0xf84 1 x
+conv2d_bf16_params.h 705 0xf84 2
+conv2d_bf16_params.h 664 0xf8e
+conv2d_bf16_params.h 705 0xf8e 1 x
+conv2d_bf16_params.h 705 0xf8e 2 x
+conv2d_bf16_params.h 127 0xf94
+conv2d_bf16_params.h 674 0xf94 1 x
+conv2d_bf16_params.h 675 0xf94 2 x
+conv2d_bf16_params.h 682 0xf94 3
+conv2d_bf16_params.h 718 0xf94 4
+conv2d_bf16_params.h 720 0xf94 5
+conv2d_bf16_params.h 127 0xf9e x
+conv2d_bf16_params.h 642 0xf9e 1
+conv2d_bf16_params.h 675 0xf9e 2
+conv2d_bf16_params.h 675 0xfa8 x
+conv2d_bf16_params.h 707 0xfa8 1 x
+conv2d_bf16_params.h 642 0xfae
+conv2d_bf16_params.h 674 0xfae 1 x
+conv2d_bf16_params.h 675 0xfae 2
+conv2d_bf16_params.h 642 0xfb8 x
+conv2d_bf16_params.h 655 0xfb8 1
+conv2d_bf16_params.h 655 0xfb8 2
+conv2d_bf16_params.h 675 0xfb8 3 x
+conv2d_bf16_params.h 679 0xfb8 4
+conv2d_bf16_params.h 679 0xfb8 5
+conv2d_bf16_params.h 655 0xfc4 x
+conv2d_bf16_params.h 679 0xfc4 1 x
+conv2d_bf16_params.h 713 0xfc4 2
+conv2d_bf16_params.h 691 0xfca x
+conv2d_bf16_params.h 675 0xfce
+conv2d_bf16_params.h 675 0xfce 1 x
+conv2d_bf16_params.h 709 0xfce 2 x
+conv2d_bf16_params.h 675 0xfd8
+conv2d_bf16_params.h 706 0xfd8 1 x
+conv2d_bf16_params.h 706 0xfd8 2
+conv2d_bf16_params.h 709 0xfd8 3
+conv2d_bf16_params.h 682 0xfe4 x
+conv2d_bf16_params.h 706 0xfe4 1
+conv2d_bf16_params.h 126 0xfea x
+conv2d_bf16_params.h 696 0xfea 1 x
+conv2d_bf16_params.h 127 0xff0 x
+conv2d_bf16_params.h 127 0xff0 1 x
+conv2d_bf16_params.h 696 0xff0 2
+conv2d_bf16_params.h 696 0xff6 x
+conv2d_bf16_params.h 713 0xff6 1 x
+conv2d_bf16_params.h 696 0xffc
+conv2d_bf16_params.h 706 0xffc 1
+conv2d_bf16_params.h 706 0xffc 2 x
+conv2d_bf16_params.h 706 0x1006
+conv2d_bf16_params.h 696 0x100a x
+conv2d_bf16_params.h 707 0x100a 1 x
+conv2d_bf16_params.h 696 0x1010
+conv2d_bf16_params.h 709 0x1010 1 x
+conv2d_bf16_params.h 696 0x1016 x
+conv2d_bf16_params.h 709 0x1016 1
+conv2d_bf16_params.h 707 0x1020 x
+conv2d_bf16_params.h 708 0x1020 1
+conv2d_bf16_params.h 710 0x1020 2 x
+conv2d_bf16_params.h 710 0x1020 3 x
+conv2d_bf16_params.h 708 0x102c x
+conv2d_bf16_params.h 713 0x102c 1 x
+conv2d_bf16_params.h 709 0x1036 x
+conv2d_bf16_params.h 800 0x1036 1 x
+conv2d_bf16_params.h 710 0x103c x
+conv2d_bf16_params.h 718 0x1044 x
+conv2d_bf16_params.h 718 0x1048
+conv2d_bf16_params.h 720 0x104c x
+conv2d_bf16_params.h 800 0x104c 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x1060
+utils.h 531 0x1060 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 689 0x1060 2 x
+conv2d_bf16.h 698 0x1060 3
+conv2d_bf16.h 704 0x1060 4
+conv2d_bf16.h 707 0x1060 5
+conv2d_bf16.h 707 0x1060 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x106c
+utils.h 526 0x106c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 698 0x106c 2 x
+conv2d_bf16.h 704 0x106c 3 x
+conv2d_bf16.h 707 0x106c 4
+conv2d_bf16.h 707 0x106c 5
+conv2d_bf16.h 698 0x107a
+conv2d_bf16.h 702 0x107a 1
+conv2d_bf16.h 698 0x1084
+conv2d_bf16.h 702 0x1084 1 x
+conv2d_bf16.h 699 0x108e x
+conv2d_bf16.h 702 0x108e 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1098
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 702 0x1098 1 x
+conv2d_bf16.h 702 0x109e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x10a6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x10a6 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x10ac x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 704 0x10b0 x
+conv2d_bf16.h 702 0x10b4 x
+conv2d_bf16.h 705 0x10b4 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x10ba x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 707 0x10ba 1
+conv2d_bf16.h 707 0x10ba 2
+conv2d_bf16.h 704 0x10c0 x
+conv2d_bf16.h 705 0x10c6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x10d0 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x10d0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x10d0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x10e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x10e0 1 x
+conv2d_bf16.h 704 0x10f0 x
+conv2d_bf16.h 705 0x1100 x
+conv2d_bf16.h 707 0x1100 1 x
+conv2d_bf16.h 707 0x1100 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1110 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x1110 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x1110 2
+conv2d_bf16.h 708 0x1110 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1120
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1120 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x1120 2 x
+conv2d_bf16.h 707 0x1132 x
+conv2d_bf16.h 707 0x1132 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1136 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x1136 1 x
+conv2d_bf16.h 708 0x1136 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x113e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x113e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1142 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1146 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x1146 1 x
+conv2d_bf16.h 707 0x1146 2 x
+conv2d_bf16.h 707 0x1146 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x114e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x114e 1 x
+conv2d_bf16.h 708 0x114e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1156 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 707 0x115a x
+conv2d_bf16.h 707 0x115a 1 x
+conv2d_bf16.h 723 0x115a 2 x
+conv2d_bf16.h 708 0x1160 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1164 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1170
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1836 0x1170 1
+conv2d_bf16.h 1836 0x1170 2 x
+conv2d_bf16.h 1836 0x1170 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 240 0x1170 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1836 0x117e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 241 0x117e 1
+conv2d_bf16_params.h 242 0x117e 2
+conv2d_bf16_params.h 250 0x117e 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 866 0x118a
+conv2d_bf16.h 876 0x118a 1
+conv2d_bf16.h 876 0x118a 2
+conv2d_bf16.h 881 0x118a 3
+conv2d_bf16.h 1836 0x118a 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 242 0x118a 5
+conv2d_bf16_params.h 242 0x118a 6
+conv2d_bf16_params.h 242 0x118a 7
+conv2d_bf16_params.h 242 0x118a 8
+conv2d_bf16_params.h 242 0x118a 9
+conv2d_bf16_params.h 243 0x118a 10
+conv2d_bf16_params.h 245 0x118a 11
+conv2d_bf16_params.h 250 0x118a 12
+conv2d_bf16_params.h 250 0x118a 13
+conv2d_bf16_params.h 240 0x1196
+conv2d_bf16_params.h 240 0x1196 1 x
+conv2d_bf16_params.h 242 0x11a2
+conv2d_bf16_params.h 245 0x11a2 1
+conv2d_bf16_params.h 242 0x11ae
+conv2d_bf16_params.h 244 0x11ae 1
+conv2d_bf16_params.h 244 0x11ae 2
+conv2d_bf16_params.h 249 0x11ae 3
+conv2d_bf16_params.h 243 0x11ba
+conv2d_bf16_params.h 244 0x11ba 1
+conv2d_bf16_params.h 250 0x11ba 2
+conv2d_bf16_params.h 244 0x11c6
+conv2d_bf16_params.h 240 0x11d4
+conv2d_bf16_params.h 240 0x11d8
+conv2d_bf16_params.h 241 0x11d8 1 x
+conv2d_bf16_params.h 242 0x11de x
+conv2d_bf16_params.h 242 0x11de 1 x
+conv2d_bf16_params.h 245 0x11e4 x
+conv2d_bf16_params.h 242 0x11f2 x
+conv2d_bf16_params.h 242 0x11f6
+conv2d_bf16_params.h 242 0x11fa
+conv2d_bf16_params.h 241 0x11fe x
+conv2d_bf16_params.h 242 0x11fe 1
+conv2d_bf16_params.h 242 0x1204 x
+conv2d_bf16_params.h 242 0x1208
+conv2d_bf16_params.h 242 0x120c
+conv2d_bf16_params.h 242 0x1210
+conv2d_bf16_params.h 242 0x1210 1
+conv2d_bf16_params.h 242 0x1216
+conv2d_bf16_params.h 243 0x121a x
+conv2d_bf16_params.h 242 0x121e x
+conv2d_bf16_params.h 243 0x121e 1
+conv2d_bf16_params.h 244 0x1224 x
+conv2d_bf16_params.h 245 0x1224 1 x
+conv2d_bf16_params.h 244 0x1236
+conv2d_bf16_params.h 244 0x1236 1
+conv2d_bf16_params.h 245 0x123c
+conv2d_bf16_params.h 244 0x1242
+conv2d_bf16_params.h 244 0x1246
+conv2d_bf16_params.h 244 0x124a
+conv2d_bf16_params.h 244 0x124e
+conv2d_bf16_params.h 244 0x1252
+conv2d_bf16_params.h 245 0x1256
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 876 0x1268
+conv2d_bf16.h 876 0x1268 1
+conv2d_bf16.h 1849 0x1276
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 250 0x1280 x
+conv2d_bf16_params.h 250 0x1280 1
+conv2d_bf16_params.h 250 0x128c
+conv2d_bf16_params.h 250 0x1290
+conv2d_bf16_params.h 250 0x1294
+conv2d_bf16_params.h 250 0x1298
+conv2d_bf16_params.h 250 0x1298 1
+conv2d_bf16_params.h 250 0x129e
+conv2d_bf16_params.h 249 0x12a2 x
+conv2d_bf16_params.h 249 0x12a6
+conv2d_bf16_params.h 250 0x12aa x
+conv2d_bf16_params.h 258 0x12b0 x
+conv2d_bf16_params.h 259 0x12c8
+conv2d_bf16_params.h 259 0x12ce x
+conv2d_bf16_params.h 259 0x12d2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1841 0x12e0 x
+conv2d_bf16.h 1849 0x12e0 1
+conv2d_bf16.h 1849 0x12e0 2 x
+conv2d_bf16.h 876 0x12ea
+conv2d_bf16.h 881 0x12ea 1
+conv2d_bf16.h 1841 0x12ea 2
+conv2d_bf16.h 1842 0x12ea 3
+conv2d_bf16.h 1842 0x12ea 4
+conv2d_bf16.h 1842 0x12ea 5
+conv2d_bf16.h 1845 0x12ea 6
+conv2d_bf16.h 1849 0x12ea 7
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x12ea 8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1842 0x12f4 x
+conv2d_bf16.h 1842 0x12f4 1
+conv2d_bf16.h 1849 0x12f4 2
+conv2d_bf16.h 862 0x1300
+conv2d_bf16.h 1842 0x1300 1
+conv2d_bf16.h 1845 0x1300 2
+conv2d_bf16.h 1845 0x130c x
+conv2d_bf16.h 862 0x1310 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1314 x
+io_buffer_main.h 125 0x1318
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1841 0x1318 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x131e x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1842 0x1322 x
+conv2d_bf16.h 1845 0x1328 x
+conv2d_bf16.h 866 0x132c x
+conv2d_bf16.h 866 0x1330
+conv2d_bf16.h 1842 0x1336 x
+conv2d_bf16.h 1842 0x1336 1 x
+conv2d_bf16.h 1842 0x133c
+conv2d_bf16.h 1845 0x133c 1 x
+conv2d_bf16.h 1841 0x1342 x
+conv2d_bf16.h 881 0x134a
+conv2d_bf16.h 885 0x134a 1
+conv2d_bf16.h 1845 0x134e x
+conv2d_bf16.h 867 0x1352
+conv2d_bf16.h 867 0x1358
+conv2d_bf16.h 867 0x1358 1 x
+conv2d_bf16.h 867 0x1360
+conv2d_bf16.h 867 0x1366
+conv2d_bf16.h 867 0x1372
+conv2d_bf16.h 867 0x1372 1
+conv2d_bf16.h 867 0x1378
+conv2d_bf16.h 867 0x137c
+conv2d_bf16.h 867 0x1382
+conv2d_bf16.h 867 0x138a
+conv2d_bf16.h 881 0x13a0
+conv2d_bf16.h 883 0x13a0 1
+conv2d_bf16.h 884 0x13a0 2
+conv2d_bf16.h 876 0x13ac x
+conv2d_bf16.h 876 0x13ac 1 x
+conv2d_bf16.h 881 0x13ac 2 x
+conv2d_bf16.h 883 0x13ac 3
+conv2d_bf16.h 884 0x13ac 4
+conv2d_bf16.h 885 0x13b8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x13b8 1
+conv2d_bf16_params.h 243 0x13b8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 883 0x13c2 x
+conv2d_bf16.h 884 0x13c8 x
+conv2d_bf16.h 876 0x13ce x
+conv2d_bf16.h 876 0x13d2
+conv2d_bf16.h 881 0x13d6 x
+conv2d_bf16.h 881 0x13da
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x13da 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 881 0x13e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x13e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 885 0x13f0
+conv2d_bf16.h 885 0x13f4 x
+conv2d_bf16.h 885 0x13fe
+conv2d_bf16.h 885 0x1402
+conv2d_bf16.h 885 0x1406
+conv2d_bf16.h 896 0x1410
+conv2d_bf16.h 1115 0x1410 1
+conv2d_bf16.h 1115 0x1410 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x141a
+vector.hpp 1152 0x141a 1
+vector.hpp 1152 0x141a 2
+vector.hpp 1152 0x141a 3
+vector.hpp 1152 0x141a 4
+vector.hpp 1152 0x141a 5
+vector.hpp 1152 0x141a 6
+vector.hpp 1152 0x141a 7
+vector.hpp 1152 0x141a 8
+vector.hpp 1152 0x141a 9
+vector.hpp 1152 0x141a 10
+vector.hpp 1152 0x141a 11
+vector.hpp 1152 0x141a 12
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x141a 13
+accum.hpp 149 0x141a 14
+accum.hpp 149 0x141a 15
+accum.hpp 149 0x141a 16
+accum.hpp 149 0x141a 17
+accum.hpp 149 0x141a 18
+accum.hpp 149 0x141a 19
+accum.hpp 149 0x141a 20
+accum.hpp 149 0x141a 21
+accum.hpp 149 0x141a 22
+accum.hpp 149 0x141a 23
+accum.hpp 149 0x141a 24
+accum.hpp 149 0x141a 25
+accum.hpp 149 0x141a 26
+accum.hpp 149 0x141a 27
+accum.hpp 149 0x141a 28
+accum.hpp 1110 0x141a 29
+accum.hpp 1110 0x141a 30
+accum.hpp 1110 0x141a 31
+accum.hpp 1110 0x141a 32
+accum.hpp 1110 0x141a 33
+accum.hpp 1110 0x141a 34
+accum.hpp 1110 0x141a 35
+accum.hpp 1110 0x141a 36
+accum.hpp 1110 0x141a 37
+accum.hpp 1110 0x141a 38
+accum.hpp 1110 0x141a 39
+accum.hpp 1110 0x141a 40
+accum.hpp 1110 0x141a 41
+accum.hpp 1110 0x141a 42
+accum.hpp 1110 0x141a 43
+accum.hpp 1110 0x141a 44
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 886 0x141a 45
+conv2d_bf16.h 896 0x141a 46 x
+conv2d_bf16.h 1123 0x141a 47
+conv2d_bf16.h 896 0x1420
+conv2d_bf16.h 896 0x1424
+conv2d_bf16.h 896 0x1428
+conv2d_bf16.h 896 0x142c
+conv2d_bf16.h 896 0x1430
+conv2d_bf16.h 896 0x1434
+conv2d_bf16.h 897 0x1438 x
+conv2d_bf16.h 897 0x143c
+conv2d_bf16.h 897 0x1440
+conv2d_bf16.h 897 0x1444
+conv2d_bf16.h 897 0x1448
+conv2d_bf16.h 897 0x144c
+conv2d_bf16.h 897 0x1450
+conv2d_bf16.h 898 0x1454 x
+conv2d_bf16.h 898 0x1458
+conv2d_bf16.h 898 0x145c
+conv2d_bf16.h 898 0x1460
+conv2d_bf16.h 898 0x1464
+conv2d_bf16.h 898 0x1468
+conv2d_bf16.h 1115 0x146c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1470
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 898 0x1474 x
+conv2d_bf16.h 1115 0x1480 x
+conv2d_bf16.h 1115 0x1484
+conv2d_bf16.h 886 0x148a
+conv2d_bf16.h 886 0x1490 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1494 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1123 0x149c
+conv2d_bf16.h 1123 0x149c 1
+conv2d_bf16.h 1123 0x149c 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14a6
+aie_core.h 100 0x14a6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14a6 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x14a6 3
+accum.hpp 946 0x14a6 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1123 0x14a6 5
+conv2d_bf16.h 1125 0x14a6 6
+conv2d_bf16.h 1154 0x14a6 7
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14b0
+aie_core.h 100 0x14b0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14b0 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x14b0 3
+accum.hpp 946 0x14b0 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1123 0x14b0 5
+conv2d_bf16.h 1125 0x14b0 6
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14ba
+aie_core.h 100 0x14ba 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14ba 2
+vector.hpp 1152 0x14ba 3
+vector.hpp 1152 0x14ba 4
+vector.hpp 1152 0x14ba 5
+vector.hpp 1152 0x14ba 6
+vector.hpp 1152 0x14ba 7
+vector.hpp 1152 0x14ba 8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x14ba 9
+accum.hpp 149 0x14ba 10
+accum.hpp 149 0x14ba 11
+accum.hpp 149 0x14ba 12
+accum.hpp 149 0x14ba 13
+accum.hpp 149 0x14ba 14
+accum.hpp 149 0x14ba 15
+accum.hpp 149 0x14ba 16
+accum.hpp 578 0x14ba 17
+accum.hpp 946 0x14ba 18
+accum.hpp 1110 0x14ba 19
+accum.hpp 1110 0x14ba 20
+accum.hpp 1110 0x14ba 21
+accum.hpp 1110 0x14ba 22
+accum.hpp 1110 0x14ba 23
+accum.hpp 1110 0x14ba 24
+accum.hpp 1110 0x14ba 25
+accum.hpp 1110 0x14ba 26
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 746 0x14ba 27
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14c6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 738 0x14c6 1
+conv2d_bf16.h 1187 0x14c6 2
+conv2d_bf16.h 1199 0x14c6 3
+conv2d_bf16.h 1200 0x14c6 4
+conv2d_bf16.h 1201 0x14c6 5
+conv2d_bf16.h 1202 0x14c6 6
+conv2d_bf16.h 1143 0x14d2
+conv2d_bf16.h 1218 0x14d2 1
+conv2d_bf16.h 749 0x14dc
+conv2d_bf16.h 750 0x14dc 1
+conv2d_bf16.h 751 0x14dc 2
+conv2d_bf16.h 752 0x14dc 3
+conv2d_bf16.h 1123 0x14dc 4
+conv2d_bf16.h 736 0x14e6
+conv2d_bf16.h 738 0x14e6 1
+conv2d_bf16.h 1123 0x14e6 2
+conv2d_bf16.h 1873 0x14e6 3
+conv2d_bf16.h 1125 0x14f2 x
+conv2d_bf16.h 1125 0x14f6
+conv2d_bf16.h 1125 0x14fa
+conv2d_bf16.h 1149 0x14fe x
+conv2d_bf16.h 1154 0x1502 x
+conv2d_bf16.h 743 0x1506 x
+conv2d_bf16.h 745 0x150a x
+conv2d_bf16.h 746 0x150e x
+conv2d_bf16.h 1125 0x150e 1 x
+conv2d_bf16.h 1143 0x1514 x
+conv2d_bf16.h 1206 0x1518 x
+conv2d_bf16.h 1149 0x151c
+conv2d_bf16.h 1154 0x1524
+conv2d_bf16.h 1125 0x1528 x
+conv2d_bf16.h 1149 0x152c x
+conv2d_bf16.h 1154 0x1530 x
+conv2d_bf16.h 1287 0x1536
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1540 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1540 1 x
+accum.hpp 946 0x1540 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x1540 3
+conv2d_bf16.h 738 0x1540 4
+conv2d_bf16.h 1147 0x1540 5 x
+conv2d_bf16.h 1187 0x1540 6 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x154c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x154c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x154c 2
+accum.hpp 946 0x154c 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x154c 4 x
+conv2d_bf16.h 738 0x154c 5 x
+conv2d_bf16.h 1188 0x154c 6 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1558
+aie_core.h 100 0x1558 1
+aie_core.h 100 0x1558 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1558 3
+vector.hpp 1139 0x1558 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1558 5
+accum.hpp 578 0x1558 6
+accum.hpp 946 0x1558 7
+accum.hpp 946 0x1558 8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 737 0x1558 9 x
+conv2d_bf16.h 742 0x1558 10 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1564
+aie_core.h 100 0x1564 1
+aie_core.h 100 0x1564 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1564 3
+vector.hpp 1139 0x1564 4
+vector.hpp 1139 0x1564 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1564 6
+accum.hpp 578 0x1564 7
+accum.hpp 578 0x1564 8 x
+accum.hpp 946 0x1564 9
+accum.hpp 946 0x1564 10
+accum.hpp 946 0x1564 11 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 740 0x1564 12 x
+conv2d_bf16.h 1149 0x1564 13 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1570
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1570 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1570 2
+accum.hpp 946 0x1570 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1570 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 743 0x1570 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x157a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x157a 1 x
+accum.hpp 946 0x157a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x157a 3 x
+conv2d_bf16.h 1152 0x157a 4 x
+conv2d_bf16.h 1206 0x157a 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1586
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1586 1
+accum.hpp 946 0x1586 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 737 0x1586 3 x
+conv2d_bf16.h 1154 0x1586 4 x
+conv2d_bf16.h 1206 0x1586 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1592 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1592 1 x
+accum.hpp 946 0x1592 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 740 0x1592 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1598
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1598 1
+accum.hpp 946 0x1598 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1598 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1157 0x1598 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x159e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x159e 1 x
+accum.hpp 946 0x159e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x159e 3 x
+conv2d_bf16.h 1159 0x159e 4 x
+conv2d_bf16.h 737 0x15a4 x
+conv2d_bf16.h 738 0x15a4 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15aa x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15aa 1 x
+accum.hpp 946 0x15aa 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 740 0x15aa 3 x
+conv2d_bf16.h 1192 0x15aa 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15b0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15b0 1
+accum.hpp 946 0x15b0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x15b0 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x15b0 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15ba
+vector.hpp 1139 0x15ba 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15ba 2
+accum.hpp 578 0x15ba 3 x
+accum.hpp 946 0x15ba 4
+accum.hpp 946 0x15ba 5 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x15ba 6 x
+conv2d_bf16.h 746 0x15ba 7 x
+conv2d_bf16.h 1162 0x15ba 8
+conv2d_bf16.h 737 0x15c6 x
+conv2d_bf16.h 742 0x15c6 1 x
+conv2d_bf16.h 749 0x15c6 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x15d0 x
+aie_core.h 143 0x15d0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15d0 2 x
+vector.hpp 1152 0x15d0 3
+vector.hpp 1152 0x15d0 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15d0 5 x
+accum.hpp 946 0x15d0 6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 750 0x15d0 7 x
+conv2d_bf16.h 1286 0x15d0 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x15de
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15de 1
+vector.hpp 1139 0x15de 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15de 3
+accum.hpp 578 0x15de 4
+accum.hpp 946 0x15de 5
+accum.hpp 946 0x15de 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x15de 7 x
+conv2d_bf16.h 751 0x15de 8 x
+conv2d_bf16.h 1162 0x15de 9 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x15ec
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15ec 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15ec 2
+accum.hpp 946 0x15ec 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 743 0x15ec 4 x
+conv2d_bf16.h 746 0x15ec 5 x
+conv2d_bf16.h 1199 0x15ec 6 x
+conv2d_bf16.h 738 0x15fa x
+conv2d_bf16.h 1200 0x15fa 1 x
+conv2d_bf16.h 742 0x1602 x
+conv2d_bf16.h 1201 0x1602 1 x
+conv2d_bf16.h 743 0x160a x
+conv2d_bf16.h 752 0x160a 1 x
+conv2d_bf16.h 738 0x1612 x
+conv2d_bf16.h 740 0x1612 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1618 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x161c x
+conv2d_bf16.h 742 0x161c 1 x
+conv2d_bf16.h 1202 0x161c 2 x
+conv2d_bf16.h 1206 0x161c 3 x
+conv2d_bf16.h 737 0x1628 x
+conv2d_bf16.h 743 0x1628 1 x
+conv2d_bf16.h 749 0x1628 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1632
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 738 0x1632 1 x
+conv2d_bf16.h 740 0x1632 2 x
+conv2d_bf16.h 751 0x1632 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1640 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x1640 1 x
+conv2d_bf16.h 750 0x1640 2 x
+conv2d_bf16.h 736 0x1650 x
+conv2d_bf16.h 742 0x1650 1 x
+conv2d_bf16.h 746 0x1650 2 x
+conv2d_bf16.h 752 0x1650 3 x
+conv2d_bf16.h 737 0x1660 x
+conv2d_bf16.h 743 0x1660 1 x
+conv2d_bf16.h 749 0x1660 2 x
+conv2d_bf16.h 738 0x1670 x
+conv2d_bf16.h 740 0x1670 1 x
+conv2d_bf16.h 751 0x1670 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1680 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x1680 1 x
+conv2d_bf16.h 750 0x1680 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1690
+aie_core.h 100 0x1690 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1690 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1690 3
+accum.hpp 946 0x1690 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 742 0x1690 5 x
+conv2d_bf16.h 746 0x1690 6 x
+conv2d_bf16.h 752 0x1690 7 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x169e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x169e 1
+vector.hpp 1152 0x169e 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 743 0x169e 3 x
+conv2d_bf16.h 749 0x169e 4 x
+conv2d_bf16.h 1286 0x169e 5
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16ac
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x16ac 1
+vector.hpp 1152 0x16ac 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 751 0x16ac 3 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16b6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x16b6 1 x
+conv2d_bf16.h 750 0x16b6 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16c0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 746 0x16c0 1 x
+conv2d_bf16.h 752 0x16c0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x16ca
+vector.hpp 1152 0x16ca 1
+vector.hpp 1152 0x16ca 2
+vector.hpp 1152 0x16ca 3
+vector.hpp 1152 0x16ca 4
+vector.hpp 1152 0x16ca 5
+vector.hpp 1152 0x16ca 6
+vector.hpp 1152 0x16ca 7
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 749 0x16ca 8 x
+conv2d_bf16.h 1285 0x16ca 9 x
+conv2d_bf16.h 1286 0x16ca 10
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x16d6
+aie_core.h 100 0x16d6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x16d6 2
+vector.hpp 1152 0x16d6 3
+vector.hpp 1152 0x16d6 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x16d6 5
+accum.hpp 946 0x16d6 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 751 0x16d6 7 x
+conv2d_bf16.h 746 0x16e0 x
+conv2d_bf16.h 750 0x16e0 1 x
+conv2d_bf16.h 745 0x16e8 x
+conv2d_bf16.h 752 0x16e8 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16f0
+aie_core.h 143 0x16f4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 750 0x16f4 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16fc
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 749 0x16fc 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1704 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 752 0x1704 1 x
+conv2d_bf16.h 1286 0x1704 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x170e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x170e 1
+vector.hpp 1152 0x170e 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 751 0x170e 3 x
+conv2d_bf16.h 1286 0x170e 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x171a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x171a 1
+vector.hpp 1152 0x171a 2
+vector.hpp 1152 0x171a 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x171a 4
+accum.hpp 946 0x171a 5
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1722
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1287 0x1722 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x172a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x172a 1 x
+accum.hpp 1110 0x172a 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1732
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1732 1
+accum.hpp 1110 0x1732 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x173a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x173a 1
+conv2d_bf16.h 1287 0x173a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1744 x
+accum.hpp 1110 0x1744 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1287 0x1744 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x174c
+accum.hpp 1110 0x174c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1218 0x174c 2 x
+conv2d_bf16.h 1287 0x174c 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1758 x
+accum.hpp 1110 0x1758 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 738 0x1758 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1760
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1760 1
+accum.hpp 1110 0x1760 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1768
+vector.hpp 1152 0x1768 1
+vector.hpp 1152 0x1768 2
+vector.hpp 1152 0x1768 3
+vector.hpp 1152 0x1768 4
+vector.hpp 1152 0x1768 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1768 6
+accum.hpp 1110 0x1768 7
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1772
+vector.hpp 1152 0x1772 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1772 2 x
+accum.hpp 1110 0x1772 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1286 0x1772 4
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x177a
+aie_core.h 143 0x177a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x177a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x177a 3
+accum.hpp 946 0x177a 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1187 0x177a 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1782 x
+max_min.hpp 20 0x1786
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x178a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x178a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1792
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1792 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x179a x
+vector.hpp 1152 0x17a4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x17a4 1 x
+max_min.hpp 20 0x17ac
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17b0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x17b0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17b8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x17b8 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17c0 x
+vector.hpp 1152 0x17d0
+vector.hpp 1152 0x17d4
+vector.hpp 1152 0x17d8
+vector.hpp 1152 0x17dc
+vector.hpp 1152 0x17e0
+vector.hpp 1152 0x17e4
+vector.hpp 1152 0x17e8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x17f0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17f0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1143 0x17f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x17fc
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x17fc 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x17fc 2
+accum.hpp 946 0x17fc 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1800
+aie_core.h 100 0x1804 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1804 1
+vector.hpp 1152 0x1804 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x180a
+aie_core.h 143 0x1820
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x1820 1
+conv2d_bf16.h 1364 0x1820 2
+conv2d_bf16.h 1364 0x1820 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x182c
+aie_core.h 143 0x182c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x182c 2
+vector.hpp 1152 0x182c 3
+vector.hpp 1152 0x182c 4
+vector.hpp 1152 0x182c 5
+vector.hpp 1152 0x182c 6
+vector.hpp 1152 0x182c 7
+vector.hpp 1152 0x182c 8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x182c 9
+accum.hpp 149 0x182c 10
+accum.hpp 149 0x182c 11
+accum.hpp 149 0x182c 12
+accum.hpp 149 0x182c 13
+accum.hpp 149 0x182c 14
+accum.hpp 149 0x182c 15
+accum.hpp 149 0x182c 16
+accum.hpp 1110 0x182c 17
+accum.hpp 1110 0x182c 18
+accum.hpp 1110 0x182c 19
+accum.hpp 1110 0x182c 20
+accum.hpp 1110 0x182c 21
+accum.hpp 1110 0x182c 22
+accum.hpp 1110 0x182c 23
+accum.hpp 1110 0x182c 24
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1369 0x182c 25
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1838
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 807 0x1838 1
+conv2d_bf16.h 808 0x1838 2
+conv2d_bf16.h 809 0x1838 3
+conv2d_bf16.h 810 0x1838 4
+conv2d_bf16.h 1436 0x1838 5
+conv2d_bf16.h 1437 0x1838 6
+conv2d_bf16.h 1438 0x1838 7
+conv2d_bf16.h 1439 0x1838 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1842
+aie_core.h 143 0x1842 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 802 0x1842 2
+conv2d_bf16.h 1428 0x1842 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x184e
+aie_core.h 143 0x184e 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x184e 2
+conv2d_bf16.h 794 0x184e 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x185a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 794 0x185a 1
+conv2d_bf16.h 1455 0x185a 2
+conv2d_bf16.h 1337 0x1864
+conv2d_bf16.h 1364 0x186e x
+conv2d_bf16.h 1873 0x186e 1
+conv2d_bf16.h 1364 0x1874
+conv2d_bf16.h 1369 0x1878 x
+conv2d_bf16.h 799 0x187c x
+conv2d_bf16.h 801 0x1880 x
+conv2d_bf16.h 802 0x1884 x
+conv2d_bf16.h 1337 0x1888 x
+conv2d_bf16.h 1443 0x188c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1892
+vector.hpp 1152 0x1892 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1369 0x1892 2
+conv2d_bf16.h 1364 0x1896
+conv2d_bf16.h 1518 0x1896 1
+conv2d_bf16.h 1364 0x189a
+conv2d_bf16.h 1364 0x189e x
+conv2d_bf16.h 1369 0x18a2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x18a8
+vector.hpp 1152 0x18a8 1
+vector.hpp 1139 0x18b0
+vector.hpp 1139 0x18b0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18b0 2
+accum.hpp 578 0x18b0 3
+accum.hpp 578 0x18b0 4 x
+accum.hpp 946 0x18b0 5
+accum.hpp 946 0x18b0 6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x18b0 7 x
+conv2d_bf16.h 1362 0x18b0 8 x
+conv2d_bf16.h 1429 0x18b0 9
+conv2d_bf16.h 1443 0x18b0 10 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18be
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18be 1
+accum.hpp 946 0x18be 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 793 0x18be 3 x
+conv2d_bf16.h 1364 0x18be 4 x
+conv2d_bf16.h 1443 0x18be 5
+conv2d_bf16.h 794 0x18ca x
+conv2d_bf16.h 795 0x18ca 1 x
+conv2d_bf16.h 1428 0x18ca 2 x
+conv2d_bf16.h 1443 0x18ca 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18d6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18d6 1
+accum.hpp 578 0x18d6 2
+accum.hpp 946 0x18d6 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 796 0x18d6 4 x
+conv2d_bf16.h 799 0x18d6 5 x
+conv2d_bf16.h 1429 0x18d6 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18e0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18e0 1 x
+accum.hpp 946 0x18e0 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 797 0x18e0 3 x
+conv2d_bf16.h 1367 0x18e0 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18e6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18e6 1
+accum.hpp 946 0x18e6 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x18e6 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1369 0x18e6 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18ec x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18ec 1 x
+accum.hpp 946 0x18ec 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x18ec 3 x
+conv2d_bf16.h 1372 0x18ec 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18f2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18f2 1
+accum.hpp 946 0x18f2 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 793 0x18f2 3 x
+conv2d_bf16.h 1374 0x18f2 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18f8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18f8 1 x
+accum.hpp 946 0x18f8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 795 0x18f8 3 x
+conv2d_bf16.h 1377 0x18f8 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18fe
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18fe 1
+accum.hpp 946 0x18fe 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 796 0x18fe 3 x
+conv2d_bf16.h 1379 0x18fe 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1904 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1904 1 x
+accum.hpp 946 0x1904 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 797 0x1904 3 x
+conv2d_bf16.h 1429 0x1904 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x190a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x190a 1
+accum.hpp 946 0x190a 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x190a 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 801 0x190a 4 x
+conv2d_bf16.h 1429 0x190a 5
+conv2d_bf16.h 792 0x1914 x
+conv2d_bf16.h 794 0x1914 1 x
+conv2d_bf16.h 802 0x1914 2 x
+conv2d_bf16.h 793 0x191e x
+conv2d_bf16.h 799 0x191e 1 x
+conv2d_bf16.h 803 0x191e 2 x
+conv2d_bf16.h 807 0x191e 3 x
+conv2d_bf16.h 794 0x192a x
+conv2d_bf16.h 804 0x192a 1 x
+conv2d_bf16.h 808 0x192a 2 x
+conv2d_bf16.h 809 0x1934 x
+conv2d_bf16.h 810 0x1938 x
+conv2d_bf16.h 795 0x193c x
+conv2d_bf16.h 802 0x193c 1 x
+conv2d_bf16.h 1437 0x193c 2 x
+conv2d_bf16.h 796 0x1946 x
+conv2d_bf16.h 1436 0x1946 1 x
+conv2d_bf16.h 797 0x194e x
+conv2d_bf16.h 1438 0x194e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1956 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 799 0x1956 1 x
+conv2d_bf16.h 1439 0x1956 2 x
+conv2d_bf16.h 792 0x1960 x
+conv2d_bf16.h 801 0x1960 1 x
+conv2d_bf16.h 793 0x1966 x
+conv2d_bf16.h 804 0x1966 1 x
+conv2d_bf16.h 808 0x1966 2 x
+conv2d_bf16.h 795 0x1970 x
+conv2d_bf16.h 803 0x1970 1 x
+conv2d_bf16.h 807 0x1970 2 x
+conv2d_bf16.h 796 0x197a x
+conv2d_bf16.h 810 0x197a 1 x
+conv2d_bf16.h 794 0x1982 x
+conv2d_bf16.h 797 0x1982 1 x
+conv2d_bf16.h 809 0x1982 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1990 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 799 0x1990 1 x
+conv2d_bf16.h 802 0x1990 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x19a0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x19a0 1
+vector.hpp 1152 0x19a0 2
+vector.hpp 1152 0x19a0 3
+vector.hpp 1152 0x19a0 4
+vector.hpp 1152 0x19a0 5
+vector.hpp 1152 0x19a0 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 801 0x19a0 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x19ac
+vector.hpp 1152 0x19ac 1
+vector.hpp 1152 0x19ac 2
+vector.hpp 1152 0x19ac 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 804 0x19ac 4 x
+conv2d_bf16.h 808 0x19ac 5 x
+conv2d_bf16.h 1517 0x19ac 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x19b8
+vector.hpp 1152 0x19b8 1
+vector.hpp 1152 0x19b8 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 803 0x19b8 3 x
+conv2d_bf16.h 807 0x19b8 4 x
+conv2d_bf16.h 1518 0x19b8 5 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x19c4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 810 0x19c4 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x19cc x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 809 0x19cc 1 x
+conv2d_bf16.h 1428 0x19cc 2
+conv2d_bf16.h 801 0x19d6 x
+conv2d_bf16.h 802 0x19da x
+conv2d_bf16.h 803 0x19de x
+conv2d_bf16.h 807 0x19de 1 x
+conv2d_bf16.h 804 0x19e6 x
+conv2d_bf16.h 808 0x19e6 1 x
+conv2d_bf16.h 809 0x19ee x
+conv2d_bf16.h 810 0x19f2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x19fa x
+accum.hpp 1110 0x19fa 1 x
+accum.hpp 149 0x19fe
+accum.hpp 1110 0x19fe 1
+accum.hpp 149 0x1a02
+accum.hpp 1110 0x1a02 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1455 0x1a02 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1a0c x
+accum.hpp 1110 0x1a0c 1 x
+accum.hpp 149 0x1a10
+accum.hpp 1110 0x1a10 1
+accum.hpp 149 0x1a14
+accum.hpp 1110 0x1a14 1
+accum.hpp 149 0x1a18
+accum.hpp 1110 0x1a18 1
+accum.hpp 149 0x1a1c
+accum.hpp 1110 0x1a1c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a20 x
+max_min.hpp 20 0x1a24
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a28 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a28 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a30
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a30 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a38 x
+vector.hpp 1152 0x1a42
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a42 1 x
+max_min.hpp 20 0x1a4a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a4e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a4e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a56
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a56 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a60 x
+vector.hpp 1152 0x1a70
+vector.hpp 1152 0x1a74
+vector.hpp 1152 0x1a78
+vector.hpp 1152 0x1a7c
+vector.hpp 1152 0x1a80
+vector.hpp 1152 0x1a84
+vector.hpp 1152 0x1a88
+vector.hpp 1152 0x1a90
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1337 0x1a90 1 x
+conv2d_bf16.h 1873 0x1ac8 x
+conv2d_bf16.h 1873 0x1acc
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 74 0x1ae0 x
+superkernels.cpp 79 0x1ae0 1
+superkernels.cpp 81 0x1ae0 2
+superkernels.cpp 79 0x1aea x
+superkernels.cpp 81 0x1aea 1
+superkernels.cpp 74 0x1af4
+superkernels.cpp 79 0x1b06
+superkernels.cpp 79 0x1b06 1
+superkernels.cpp 81 0x1b1c
+superkernels.cpp 113 0x1b22
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x1b22 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 81 0x1b2c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x1b2c 1
+tile.hpp 86 0x1b2c 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 81 0x1b3c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x1b44
+tile.hpp 74 0x1b48
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 113 0x1b4c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x1b4c 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 81 0x1b54
+superkernels.cpp 81 0x1b60
+superkernels.cpp 87 0x1b64
+superkernels.cpp 87 0x1b64 1 x
+superkernels.cpp 88 0x1b6e x
+superkernels.cpp 89 0x1b6e 1
+superkernels.cpp 88 0x1b78
+superkernels.cpp 88 0x1b7e
+superkernels.cpp 87 0x1b86 x
+superkernels.cpp 113 0x1b86 1
+superkernels.cpp 88 0x1b8e x
+superkernels.cpp 88 0x1b94
+superkernels.cpp 89 0x1b9a x
+superkernels.cpp 89 0x1ba0
+superkernels.cpp 113 0x1ba0 1
+superkernels.cpp 106 0x1bb0
+superkernels.cpp 113 0x1bb0 1
+superkernels.cpp 117 0x1bb0 2
+superkernels.cpp 136 0x1bb0 3
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x1bb0 4
+io_buffer_main.h 324 0x1bb0 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 106 0x1bba x
+superkernels.cpp 108 0x1bba 1
+superkernels.cpp 107 0x1bc4
+superkernels.cpp 108 0x1bc4 1 x
+superkernels.cpp 139 0x1bc4 2
+superkernels.cpp 140 0x1bc4 3
+superkernels.cpp 107 0x1bce x
+superkernels.cpp 110 0x1bda x
+superkernels.cpp 110 0x1bda 1 x
+superkernels.cpp 108 0x1be0 x
+superkernels.cpp 107 0x1be4 x
+superkernels.cpp 108 0x1be4 1
+superkernels.cpp 106 0x1bea x
+superkernels.cpp 106 0x1bee
+superkernels.cpp 107 0x1bf2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x1bf6 x
+io_buffer_main.h 218 0x1bfa
+io_buffer_main.h 218 0x1bfe
+io_buffer_main.h 218 0x1c02
+io_buffer_main.h 235 0x1c08 x
+io_buffer_main.h 218 0x1c14 x
+io_buffer_main.h 218 0x1c14 1 x
+io_buffer_main.h 218 0x1c18
+io_buffer_main.h 395 0x1c1c
+io_buffer_main.h 395 0x1c26 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 113 0x1c30 x
+superkernels.cpp 113 0x1c36
+superkernels.cpp 113 0x1c42
+superkernels.cpp 117 0x1c50 x
+superkernels.cpp 117 0x1c50 1
+superkernels.cpp 117 0x1c5a
+superkernels.cpp 117 0x1c6c
+superkernels.cpp 117 0x1c70
+superkernels.cpp 136 0x1c76
+superkernels.cpp 140 0x1c76 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x1c82 x
+io_buffer_main.h 327 0x1c82 1
+io_buffer_main.h 425 0x1c82 2
+io_buffer_main.h 324 0x1c88
+io_buffer_main.h 425 0x1c98 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 136 0x1c9c x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x1c9c 1 x
+io_buffer_main.h 327 0x1cae
+io_buffer_main.h 327 0x1cb2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 139 0x1cc0 x
+superkernels.cpp 139 0x1cc0 1
+superkernels.cpp 139 0x1cca
+superkernels.cpp 142 0x1cd2
+superkernels.cpp 139 0x1cde
+superkernels.cpp 139 0x1ce2
+superkernels.cpp 140 0x1cf4 x
+superkernels.cpp 142 0x1d04 x
+superkernels.cpp 142 0x1d08
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 141 0x1d20 x
+elementwise_binary.h 142 0x1d20 1
+elementwise_binary.h 144 0x1d20 2 x
+elementwise_binary.h 141 0x1d26
+elementwise_binary.h 141 0x1d2a
+elementwise_binary.h 142 0x1d2e x
+elementwise_binary.h 142 0x1d32
+elementwise_binary.h 130 0x1d40 x
+elementwise_binary.h 133 0x1d40 1 x
+elementwise_binary.h 130 0x1d44
+elementwise_binary.h 133 0x1d58 x
+elementwise_binary.h 134 0x1d5c x
+elementwise_binary.h 134 0x1d6c
+elementwise_binary.h 135 0x1d70 x
+elementwise_binary.h 135 0x1d80
+elementwise_binary.h 136 0x1d84 x
+elementwise_binary.h 137 0x1d8c x
+elementwise_binary.h 136 0x1d98 x
+elementwise_binary.h 137 0x1d9c
+elementwise_binary.h 137 0x1da0
+elementwise_binary.h 139 0x1da0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 146 0x1da0 2
+add_impl.h 146 0x1daa
+add_impl.h 147 0x1daa 1
+add_impl.h 147 0x1daa 2
+add_impl.h 146 0x1db4 x
+add_impl.h 147 0x1db4 1
+add_impl.h 147 0x1dbe x
+add_impl.h 147 0x1dc6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x1dca x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 147 0x1dce
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x1dd2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 147 0x1dd8 x
+add_impl.h 147 0x1ddc
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 81 0x1df0
+elementwise_binary_broadcasting.h 81 0x1df0 1 x
+elementwise_binary_broadcasting.h 82 0x1df0 2
+elementwise_binary_broadcasting.h 82 0x1df0 3
+elementwise_binary_broadcasting.h 83 0x1df0 4
+elementwise_binary_broadcasting.h 81 0x1dfa
+elementwise_binary_broadcasting.h 82 0x1dfa 1
+elementwise_binary_broadcasting.h 82 0x1e00 x
+elementwise_binary_broadcasting.h 84 0x1e0e x
+elementwise_binary_broadcasting.h 82 0x1e12 x
+elementwise_binary_broadcasting.h 83 0x1e16 x
+elementwise_binary_broadcasting.h 82 0x1e1a x
+elementwise_binary_broadcasting.h 83 0x1e1a 1
+elementwise_binary_broadcasting.h 82 0x1e20
+elementwise_binary_broadcasting.h 82 0x1e24
+elementwise_binary_broadcasting.h 76 0x1e30
+elementwise_binary_broadcasting.h 76 0x1e30 1 x
+elementwise_binary_broadcasting.h 77 0x1e3a x
+elementwise_binary_broadcasting.h 78 0x1e44
+elementwise_binary_broadcasting.h 78 0x1e54
+elementwise_binary_broadcasting.h 78 0x1e58 x
+elementwise_binary_broadcasting.h 78 0x1e5e
+elementwise_binary_broadcasting.h 79 0x1e62 x
+elementwise_binary_broadcasting.h 89 0x1e70 x
+elementwise_binary_broadcasting.h 96 0x1e70 1 x
+elementwise_binary_broadcasting.h 102 0x1e70 2
+elementwise_binary_broadcasting.h 102 0x1e76 x
+elementwise_binary_broadcasting.h 117 0x1e76 1
+elementwise_binary_broadcasting.h 102 0x1e88
+elementwise_binary_broadcasting.h 102 0x1e88 1
+elementwise_binary_broadcasting.h 96 0x1e8e
+elementwise_binary_broadcasting.h 96 0x1e92 x
+elementwise_binary_broadcasting.h 103 0x1e9c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1eb0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1eb6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 106 0x1ec0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1ed0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1ed6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1ee0
+add_accum.hpp 19 0x1ee0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 117 0x1ee0 2 x
+elementwise_binary_broadcasting.h 117 0x1ee0 3 x
+elementwise_binary_broadcasting.h 117 0x1eea
+elementwise_binary_broadcasting.h 117 0x1eea 1
+elementwise_binary_broadcasting.h 117 0x1ef4
+elementwise_binary_broadcasting.h 117 0x1efa
+elementwise_binary_broadcasting.h 117 0x1f00
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f08 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f08 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f08 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f0c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f0c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f0c 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f10 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f10 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f10 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f14
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f14 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f14 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f18 x
+vector.hpp 1159 0x1f18 1
+vector.hpp 1159 0x1f18 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f18 3 x
+accum.hpp 1110 0x1f18 4
+accum.hpp 1110 0x1f18 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f18 6 x
+elementwise_binary.h 195 0x1f18 7
+elementwise_binary.h 218 0x1f18 8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f1e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f1e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f1e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f1e 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f26 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f26 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f26 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f2a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f2a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f2a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f2a 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f32 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f32 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f32 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f36
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f36 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f36 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f36 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f3e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f3e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f3e 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f42
+vector.hpp 1159 0x1f42 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f42 2
+accum.hpp 1110 0x1f42 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f42 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f42 5 x
+elementwise_binary.h 218 0x1f42 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f50 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f50 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f50 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f54
+vector.hpp 1159 0x1f54 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f54 2
+accum.hpp 1110 0x1f54 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f54 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f54 5 x
+elementwise_binary.h 195 0x1f54 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f60 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f60 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f60 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f70
+vector.hpp 1159 0x1f70 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f70 2
+accum.hpp 1110 0x1f70 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f70 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f70 5 x
+elementwise_binary.h 218 0x1f70 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f82
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f82 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f82 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x1f82 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f8c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f8c 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f8c 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x1f8c 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f96
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f96 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x1f96 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 121 0x1f96 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f9e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f9e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x1f9e 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1fa4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1fa4 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x1fa4 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h:
+ise_binary_attribute_broadcasting.h 82 0x1fb0
+ise_binary_attribute_broadcasting.h 82 0x1fb0 1 x
+ise_binary_attribute_broadcasting.h 90 0x1fb6
+ise_binary_attribute_broadcasting.h 90 0x1fbe x
+ise_binary_attribute_broadcasting.h 117 0x1fbe 1
+ise_binary_attribute_broadcasting.h 92 0x1fc6 x
+ise_binary_attribute_broadcasting.h 92 0x1fc6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x1fd6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp:
+vector_native_types.hpp 374 0x1fd6 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h:
+ise_binary_attribute_broadcasting.h 117 0x1fe2 x
+ise_binary_attribute_broadcasting.h 92 0x1fe8
+ise_binary_attribute_broadcasting.h 92 0x1fee x
+ise_binary_attribute_broadcasting.h 92 0x1ff2
+ise_binary_attribute_broadcasting.h 117 0x1ff2 1
+ise_binary_attribute_broadcasting.h 117 0x1ff8
+ise_binary_attribute_broadcasting.h 118 0x2000
+ise_binary_attribute_broadcasting.h 118 0x2010 x
+ise_binary_attribute_broadcasting.h 118 0x2014
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 147 0x2030 x
+superkernels.cpp 152 0x2030 1
+superkernels.cpp 152 0x2036 x
+superkernels.cpp 147 0x203c
+superkernels.cpp 149 0x204a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2054
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 152 0x205c
+superkernels.cpp 152 0x205c 1
+superkernels.cpp 149 0x2062 x
+superkernels.cpp 149 0x2066
+superkernels.cpp 149 0x206e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x206e 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 155 0x2076
+superkernels.cpp 166 0x2076 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x207c
+tile.hpp 74 0x2082
+tile.hpp 86 0x2082 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 155 0x208e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2098
+tile.hpp 74 0x209c
+tile.hpp 74 0x20a0 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 159 0x20b0
+superkernels.cpp 159 0x20b6 x
+superkernels.cpp 159 0x20b6 1
+superkernels.cpp 157 0x20c0
+superkernels.cpp 159 0x20c0 1
+superkernels.cpp 166 0x20c0 2
+superkernels.cpp 157 0x20ca x
+superkernels.cpp 159 0x20ca 1
+superkernels.cpp 164 0x20ca 2
+superkernels.cpp 157 0x20de
+superkernels.cpp 159 0x20e6 x
+superkernels.cpp 157 0x20ea x
+superkernels.cpp 159 0x20f0 x
+superkernels.cpp 164 0x2100
+superkernels.cpp 166 0x2100 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2110 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 163 0x2118
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2118 1
+io_buffer_main.h 218 0x2122
+io_buffer_main.h 218 0x2126
+io_buffer_main.h 235 0x212a x
+io_buffer_main.h 218 0x2138 x
+io_buffer_main.h 218 0x2138 1 x
+io_buffer_main.h 218 0x213c
+io_buffer_main.h 395 0x2140
+io_buffer_main.h 395 0x214a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 164 0x214e
+superkernels.cpp 163 0x2158 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x215c x
+io_buffer_main.h 324 0x215c 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 164 0x2162 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2166 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 166 0x216c x
+superkernels.cpp 163 0x2174 x
+superkernels.cpp 163 0x2178
+superkernels.cpp 164 0x217c x
+superkernels.cpp 164 0x2180
+superkernels.cpp 168 0x2190
+superkernels.cpp 169 0x2190 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2190 2 x
+io_buffer_main.h 327 0x219a
+io_buffer_main.h 425 0x219a 1
+io_buffer_main.h 425 0x21a8 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 168 0x21ac
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x21ac 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 168 0x21b6 x
+superkernels.cpp 168 0x21ba
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x21c6 x
+io_buffer_main.h 327 0x21ca
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 168 0x21ce x
+superkernels.cpp 168 0x21d2
+superkernels.cpp 169 0x21e2
+superkernels.cpp 169 0x21e6 x
+superkernels.cpp 171 0x21f0
+superkernels.cpp 171 0x2204 x
+superkernels.cpp 171 0x220c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 124 0x2220 x
+elementwise_unary.h 126 0x2220 1 x
+elementwise_unary.h 126 0x2230 x
+elementwise_unary.h 127 0x2234 x
+elementwise_unary.h 127 0x2244
+elementwise_unary.h 128 0x2248 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 113 0x224c x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 128 0x225a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 113 0x225e x
+clip_impl.h 114 0x226e x
+clip_impl.h 114 0x2272
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 130 0x2276 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2290
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 136 0x2290 1 x
+elementwise_unary.h 142 0x2290 2
+elementwise_unary.h 154 0x2290 3 x
+elementwise_unary.h 171 0x2290 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x229c x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 142 0x229c 1
+elementwise_unary.h 154 0x229c 2 x
+elementwise_unary.h 190 0x229c 3 x
+elementwise_unary.h 136 0x22a8
+elementwise_unary.h 136 0x22ac x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 103 0x22b0 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 142 0x22b4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22b8 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 171 0x22b8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 104 0x22b8 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22c4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 190 0x22c4 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22cc x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x22cc 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 142 0x22cc 2 x
+elementwise_unary.h 171 0x22cc 3 x
+elementwise_unary.h 154 0x22d6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x22de x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22e2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x22e2 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 190 0x22e2 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x22f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x22f0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x22f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2300 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2310 x
+vector.hpp 1159 0x2310 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x2310 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 171 0x2310 3 x
+elementwise_unary.h 176 0x2310 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2320
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2320 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 190 0x2320 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2330 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x2330 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x2330 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2340 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2350 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x2350 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 176 0x2350 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2358 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x235c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x235c 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x235c 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2364 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 158 0x2364 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x236a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x236a 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 176 0x236a 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2372 x
+max_min.hpp 21 0x2376 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x237a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x237a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x237e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 176 0x237e 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 176 0x2390 x
+superkernels.cpp 181 0x2390 1
+superkernels.cpp 181 0x2396 x
+superkernels.cpp 176 0x239c
+superkernels.cpp 178 0x23aa
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x23b4
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 181 0x23bc
+superkernels.cpp 181 0x23bc 1
+superkernels.cpp 178 0x23c2 x
+superkernels.cpp 178 0x23c6
+superkernels.cpp 178 0x23ce
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x23ce 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 184 0x23d6
+superkernels.cpp 195 0x23d6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x23dc
+tile.hpp 74 0x23e2
+tile.hpp 86 0x23e2 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 184 0x23ee x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x23f8
+tile.hpp 74 0x23fc
+tile.hpp 74 0x2400 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 188 0x2410
+superkernels.cpp 188 0x2416 x
+superkernels.cpp 188 0x2416 1
+superkernels.cpp 186 0x2420
+superkernels.cpp 188 0x2420 1
+superkernels.cpp 195 0x2420 2
+superkernels.cpp 186 0x242a x
+superkernels.cpp 188 0x242a 1
+superkernels.cpp 193 0x242a 2
+superkernels.cpp 186 0x243e
+superkernels.cpp 188 0x2446 x
+superkernels.cpp 186 0x244a x
+superkernels.cpp 188 0x2450 x
+superkernels.cpp 193 0x2460
+superkernels.cpp 195 0x2460 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2470 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 192 0x2478
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2478 1
+io_buffer_main.h 218 0x2482
+io_buffer_main.h 218 0x2486
+io_buffer_main.h 235 0x248a x
+io_buffer_main.h 218 0x2498 x
+io_buffer_main.h 218 0x2498 1 x
+io_buffer_main.h 218 0x249c
+io_buffer_main.h 395 0x24a0
+io_buffer_main.h 395 0x24aa x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 193 0x24ae
+superkernels.cpp 192 0x24b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x24bc x
+io_buffer_main.h 324 0x24bc 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 193 0x24c2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x24c6 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 195 0x24cc x
+superkernels.cpp 192 0x24d4 x
+superkernels.cpp 192 0x24d8
+superkernels.cpp 193 0x24dc x
+superkernels.cpp 193 0x24e0
+superkernels.cpp 197 0x24f0
+superkernels.cpp 198 0x24f0 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x24f0 2 x
+io_buffer_main.h 327 0x24fa
+io_buffer_main.h 425 0x24fa 1
+io_buffer_main.h 425 0x2508 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 197 0x250c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x250c 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 197 0x2516 x
+superkernels.cpp 197 0x251a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2526 x
+io_buffer_main.h 327 0x252a
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 197 0x252e x
+superkernels.cpp 197 0x2532
+superkernels.cpp 198 0x2542
+superkernels.cpp 198 0x2546 x
+superkernels.cpp 200 0x2550
+superkernels.cpp 200 0x2564 x
+superkernels.cpp 200 0x256c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 219 0x2600
+elementwise_binary_shared.h 219 0x2600 1 x
+elementwise_binary_shared.h 220 0x260a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 193 0x2614
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 222 0x2620
+elementwise_binary_shared.h 222 0x2632 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 193 0x263c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 222 0x2640
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 193 0x2640 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x2870
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 237 0x2870 1 x
+elementwise_binary_shared.h 244 0x2870 2
+elementwise_binary_shared.h 245 0x2870 3
+elementwise_binary_shared.h 247 0x2870 4
+elementwise_binary_shared.h 250 0x2870 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x287a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 244 0x287a 1 x
+elementwise_binary_shared.h 245 0x287a 2
+elementwise_binary_shared.h 247 0x287a 3
+elementwise_binary_shared.h 244 0x288c
+elementwise_binary_shared.h 244 0x288c 1
+elementwise_binary_shared.h 237 0x2892
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x28a0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp:
+vector_native_types.hpp 374 0x28a0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 247 0x28a6 x
+elementwise_binary_shared.h 245 0x28d0 x
+elementwise_binary_shared.h 245 0x28d6
+elementwise_binary_shared.h 245 0x28d6 1
+elementwise_binary_shared.h 250 0x28f0
+elementwise_binary_shared.h 250 0x28f4 x
+elementwise_binary_shared.h 250 0x28f8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 205 0x2910 x
+superkernels.cpp 210 0x2910 1
+superkernels.cpp 210 0x2916 x
+superkernels.cpp 205 0x291c
+superkernels.cpp 207 0x292a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2934
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 210 0x293c
+superkernels.cpp 210 0x293c 1
+superkernels.cpp 207 0x2942 x
+superkernels.cpp 207 0x2946
+superkernels.cpp 207 0x294e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x294e 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 213 0x2956
+superkernels.cpp 224 0x2956 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x295c
+tile.hpp 74 0x2962
+tile.hpp 86 0x2962 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 213 0x296e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2978
+tile.hpp 74 0x297c
+tile.hpp 74 0x2980 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 217 0x2990
+superkernels.cpp 217 0x2996 x
+superkernels.cpp 217 0x2996 1
+superkernels.cpp 215 0x29a0
+superkernels.cpp 217 0x29a0 1
+superkernels.cpp 224 0x29a0 2
+superkernels.cpp 215 0x29aa x
+superkernels.cpp 217 0x29aa 1
+superkernels.cpp 222 0x29aa 2
+superkernels.cpp 215 0x29be
+superkernels.cpp 217 0x29c6 x
+superkernels.cpp 215 0x29ca x
+superkernels.cpp 217 0x29d0 x
+superkernels.cpp 222 0x29e0
+superkernels.cpp 224 0x29e0 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x29f0 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 221 0x29f8
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x29f8 1
+io_buffer_main.h 218 0x2a02
+io_buffer_main.h 218 0x2a06
+io_buffer_main.h 235 0x2a0a x
+io_buffer_main.h 218 0x2a18 x
+io_buffer_main.h 218 0x2a18 1 x
+io_buffer_main.h 218 0x2a1c
+io_buffer_main.h 395 0x2a20
+io_buffer_main.h 395 0x2a2a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 222 0x2a2e
+superkernels.cpp 221 0x2a38 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2a3c x
+io_buffer_main.h 324 0x2a3c 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 222 0x2a42 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2a46 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 224 0x2a4c x
+superkernels.cpp 221 0x2a54 x
+superkernels.cpp 221 0x2a58
+superkernels.cpp 222 0x2a5c x
+superkernels.cpp 222 0x2a60
+superkernels.cpp 226 0x2a70
+superkernels.cpp 227 0x2a70 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2a70 2 x
+io_buffer_main.h 327 0x2a7a
+io_buffer_main.h 425 0x2a7a 1
+io_buffer_main.h 425 0x2a88 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 226 0x2a8c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2a8c 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 226 0x2a96 x
+superkernels.cpp 226 0x2a9a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2aa6 x
+io_buffer_main.h 327 0x2aaa
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 226 0x2aae x
+superkernels.cpp 226 0x2ab2
+superkernels.cpp 227 0x2ac2
+superkernels.cpp 227 0x2ac6 x
+superkernels.cpp 229 0x2ad0
+superkernels.cpp 229 0x2ae4 x
+superkernels.cpp 229 0x2aec
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 219 0x2b80
+elementwise_binary_shared.h 219 0x2b80 1 x
+elementwise_binary_shared.h 220 0x2b8a x
+elementwise_binary_shared.h 220 0x2b98
+elementwise_binary_shared.h 220 0x2ba0
+elementwise_binary_shared.h 222 0x2ba0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 146 0x2ba0 2
+add_impl.h 146 0x2baa
+add_impl.h 147 0x2baa 1
+add_impl.h 147 0x2baa 2
+add_impl.h 146 0x2bb4 x
+add_impl.h 147 0x2bb4 1
+add_impl.h 147 0x2bbe x
+add_impl.h 147 0x2bc6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 222 0x2bca x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 147 0x2bce
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 222 0x2bd2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 147 0x2bd8 x
+add_impl.h 147 0x2bdc
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 227 0x2bf0 x
+elementwise_binary_shared.h 232 0x2bf0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 141 0x2c00 x
+elementwise_binary.h 142 0x2c00 1
+elementwise_binary.h 144 0x2c00 2 x
+elementwise_binary.h 141 0x2c06
+elementwise_binary.h 141 0x2c0a
+elementwise_binary.h 142 0x2c0e x
+elementwise_binary.h 142 0x2c12
+elementwise_binary.h 130 0x2c20 x
+elementwise_binary.h 133 0x2c20 1 x
+elementwise_binary.h 130 0x2c24
+elementwise_binary.h 133 0x2c36 x
+elementwise_binary.h 134 0x2c3a x
+elementwise_binary.h 134 0x2c4a
+elementwise_binary.h 135 0x2c4e x
+elementwise_binary.h 135 0x2c5e
+elementwise_binary.h 136 0x2c62 x
+elementwise_binary.h 137 0x2c6a x
+elementwise_binary.h 136 0x2c78 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 134 0x2c7c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x2c80
+elementwise_binary.h 139 0x2c92 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 134 0x2c9c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x2ca0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 134 0x2ca0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 149 0x2cb0 x
+elementwise_binary.h 156 0x2cb0 1
+elementwise_binary.h 168 0x2cb0 2 x
+elementwise_binary.h 156 0x2cba x
+elementwise_binary.h 168 0x2cba 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2cc4
+mul_acc32_fp.hpp 36 0x2cc4 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 156 0x2cc4 2
+elementwise_binary.h 156 0x2cc4 3
+elementwise_binary.h 156 0x2cce
+elementwise_binary.h 156 0x2cce 1
+elementwise_binary.h 156 0x2cd8
+elementwise_binary.h 156 0x2ce2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2ce6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 168 0x2ce6 1
+elementwise_binary.h 187 0x2ce6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2cec
+vector.hpp 1139 0x2cec 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x2cec 2 x
+elementwise_binary.h 211 0x2cec 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2cf2 x
+vector.hpp 1139 0x2cf2 1 x
+vector.hpp 1159 0x2cf2 2
+vector.hpp 1159 0x2cf2 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2cf2 4
+accum.hpp 1110 0x2cf2 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2cf2 6 x
+elementwise_binary.h 195 0x2cf2 7
+elementwise_binary.h 213 0x2cf2 8 x
+elementwise_binary.h 218 0x2cf2 9
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2cfa
+vector.hpp 1139 0x2cfa 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x2cfa 2 x
+elementwise_binary.h 211 0x2cfa 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d00 x
+vector.hpp 1139 0x2d00 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d00 2 x
+elementwise_binary.h 213 0x2d00 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d06
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x2d06 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d0a x
+vector.hpp 1139 0x2d0a 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d0a 2 x
+elementwise_binary.h 213 0x2d0a 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d10
+vector.hpp 1139 0x2d10 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d10 2 x
+elementwise_binary.h 189 0x2d10 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d16 x
+vector.hpp 1139 0x2d16 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d16 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d16 3 x
+elementwise_binary.h 213 0x2d16 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d20
+vector.hpp 1139 0x2d20 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d20 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d20 3 x
+elementwise_binary.h 189 0x2d20 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d2a x
+vector.hpp 1139 0x2d2a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d2a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d2a 3 x
+elementwise_binary.h 213 0x2d2a 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d34
+vector.hpp 1139 0x2d34 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d34 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d34 3 x
+elementwise_binary.h 189 0x2d34 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d3e x
+vector.hpp 1139 0x2d3e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d3e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d3e 3 x
+elementwise_binary.h 213 0x2d3e 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d48
+vector.hpp 1139 0x2d48 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d48 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d48 3 x
+elementwise_binary.h 189 0x2d48 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d52 x
+vector.hpp 1139 0x2d52 1 x
+vector.hpp 1159 0x2d52 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d52 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d52 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d52 5 x
+elementwise_binary.h 213 0x2d52 6 x
+elementwise_binary.h 218 0x2d52 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d60
+vector.hpp 1139 0x2d60 1
+vector.hpp 1159 0x2d60 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d60 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d60 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d60 5 x
+elementwise_binary.h 189 0x2d60 6 x
+elementwise_binary.h 195 0x2d60 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d70 x
+vector.hpp 1139 0x2d70 1 x
+vector.hpp 1159 0x2d70 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d70 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d70 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d70 5 x
+elementwise_binary.h 213 0x2d70 6 x
+elementwise_binary.h 218 0x2d70 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2d80
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d80 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d80 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2d80 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2d88 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d88 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d88 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2d88 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2d90
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d90 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d90 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2d90 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2d98 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d98 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d98 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2d98 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2da0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2da0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2da0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2da0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2da8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2da8 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2da8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2da8 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2db0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2db0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2db0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2db0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2db8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2db8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2db8 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2dbc
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2dbc 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 172 0x2dbc 2 x
+elementwise_binary.h 195 0x2dbc 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2dc2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2dc2 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2dc2 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2dc6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2dc6 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2dc6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2dca x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2dca 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2dca 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2dce
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2dce 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2dce 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 369 0x2de0 x
+superkernels.cpp 374 0x2de0 1
+superkernels.cpp 374 0x2de6 x
+superkernels.cpp 369 0x2dec
+superkernels.cpp 371 0x2df2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2df2 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 374 0x2e0e x
+superkernels.cpp 374 0x2e0e 1 x
+superkernels.cpp 371 0x2e14 x
+superkernels.cpp 371 0x2e18
+superkernels.cpp 371 0x2e1e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2e26
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 377 0x2e2a
+superkernels.cpp 379 0x2e2a 1
+superkernels.cpp 381 0x2e2a 2
+superkernels.cpp 393 0x2e2a 3
+superkernels.cpp 377 0x2e34
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2e34 1
+tile.hpp 74 0x2e3e
+tile.hpp 86 0x2e3e 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 377 0x2e4a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2e54
+tile.hpp 74 0x2e58
+tile.hpp 74 0x2e5c x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 381 0x2e60
+superkernels.cpp 381 0x2e60 1 x
+superkernels.cpp 381 0x2e6a
+superkernels.cpp 381 0x2e6a 1
+superkernels.cpp 390 0x2e6a 2
+superkernels.cpp 379 0x2e74 x
+superkernels.cpp 382 0x2e74 1
+superkernels.cpp 391 0x2e74 2
+superkernels.cpp 379 0x2e8a
+superkernels.cpp 381 0x2e90 x
+superkernels.cpp 379 0x2e94 x
+superkernels.cpp 381 0x2e98 x
+superkernels.cpp 382 0x2e9c x
+superkernels.cpp 390 0x2ea0
+superkernels.cpp 391 0x2ea6
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2eb0 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2eb4
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2eb4 1
+io_buffer_main.h 218 0x2ebe
+io_buffer_main.h 218 0x2ec2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2ec6 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 235 0x2eca x
+io_buffer_main.h 218 0x2ed6 x
+io_buffer_main.h 218 0x2ed6 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2eda x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2eda 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2ee0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 395 0x2ee4
+io_buffer_main.h 395 0x2ee4 1
+io_buffer_main.h 395 0x2eee x
+io_buffer_main.h 218 0x2ef2 x
+io_buffer_main.h 218 0x2efa
+io_buffer_main.h 218 0x2efe
+io_buffer_main.h 218 0x2f02
+io_buffer_main.h 235 0x2f06 x
+io_buffer_main.h 218 0x2f14 x
+io_buffer_main.h 218 0x2f14 1 x
+io_buffer_main.h 218 0x2f18
+io_buffer_main.h 395 0x2f24 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 390 0x2f28
+superkernels.cpp 391 0x2f28 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2f28 2
+io_buffer_main.h 125 0x2f36
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 390 0x2f3a x
+superkernels.cpp 391 0x2f40 x
+superkernels.cpp 393 0x2f40 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2f46 x
+io_buffer_main.h 125 0x2f4a
+io_buffer_main.h 327 0x2f4e
+io_buffer_main.h 327 0x2f4e 1
+io_buffer_main.h 125 0x2f54
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 393 0x2f5a x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2f60
+io_buffer_main.h 327 0x2f60 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 390 0x2f64 x
+superkernels.cpp 391 0x2f68 x
+superkernels.cpp 391 0x2f6c
+superkernels.cpp 390 0x2f70 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2f80 x
+io_buffer_main.h 327 0x2f80 1
+io_buffer_main.h 327 0x2f80 2
+io_buffer_main.h 327 0x2f80 3
+io_buffer_main.h 327 0x2f80 4
+io_buffer_main.h 425 0x2f80 5
+io_buffer_main.h 425 0x2f80 6
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2f8a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 425 0x2f9a x
+io_buffer_main.h 327 0x2f9e x
+io_buffer_main.h 324 0x2fa2
+io_buffer_main.h 327 0x2fb0
+io_buffer_main.h 324 0x2fb4 x
+io_buffer_main.h 327 0x2fb4 1
+io_buffer_main.h 425 0x2fc6 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2fca
+superkernels.cpp 398 0x2fca 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2fca 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2fd4 x
+superkernels.cpp 397 0x2fd8
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2fe4 x
+io_buffer_main.h 327 0x2fe8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2fec x
+superkernels.cpp 397 0x2ff0
+superkernels.cpp 398 0x3000
+superkernels.cpp 398 0x3004 x
+superkernels.cpp 400 0x3010
+superkernels.cpp 400 0x3026 x
+superkernels.cpp 400 0x302e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16_params.h:
+conv2d_dw_bf16_params.h 211 0x3040 x
+conv2d_dw_bf16_params.h 215 0x3040 1
+conv2d_dw_bf16_params.h 215 0x3040 2 x
+conv2d_dw_bf16_params.h 215 0x304a x
+conv2d_dw_bf16_params.h 218 0x304a 1
+conv2d_dw_bf16_params.h 218 0x304a 2
+conv2d_dw_bf16_params.h 211 0x3054
+conv2d_dw_bf16_params.h 218 0x305a
+conv2d_dw_bf16_params.h 215 0x306e
+conv2d_dw_bf16_params.h 215 0x3072
+conv2d_dw_bf16_params.h 215 0x3076
+conv2d_dw_bf16_params.h 215 0x307a
+conv2d_dw_bf16_params.h 215 0x3088
+conv2d_dw_bf16_params.h 215 0x308c
+conv2d_dw_bf16_params.h 218 0x3090 x
+conv2d_dw_bf16_params.h 218 0x3094
+conv2d_dw_bf16_params.h 218 0x3098
+conv2d_dw_bf16_params.h 218 0x30a4
+conv2d_dw_bf16_params.h 218 0x30aa
+conv2d_dw_bf16_params.h 218 0x30b0
+conv2d_dw_bf16_params.h 218 0x30b6
+conv2d_dw_bf16_params.h 218 0x30bc
+conv2d_dw_bf16_params.h 218 0x30c0
+conv2d_dw_bf16_params.h 218 0x30d0
+conv2d_dw_bf16_params.h 218 0x30d0 1
+conv2d_dw_bf16_params.h 219 0x30d0 2
+conv2d_dw_bf16_params.h 218 0x30d6
+conv2d_dw_bf16_params.h 219 0x30d6 1 x
+conv2d_dw_bf16_params.h 219 0x30dc
+conv2d_dw_bf16_params.h 219 0x30e0
+conv2d_dw_bf16_params.h 218 0x30ea x
+conv2d_dw_bf16_params.h 218 0x30ee
+conv2d_dw_bf16_params.h 219 0x30f2 x
+conv2d_dw_bf16_params.h 219 0x30f8
+conv2d_dw_bf16_params.h 218 0x3102 x
+conv2d_dw_bf16_params.h 219 0x3106 x
+conv2d_dw_bf16_params.h 219 0x310a
+conv2d_dw_bf16_params.h 218 0x310e x
+conv2d_dw_bf16_params.h 218 0x3112
+conv2d_dw_bf16_params.h 219 0x3112 1 x
+conv2d_dw_bf16_params.h 219 0x3120
+conv2d_dw_bf16_params.h 226 0x3120 1
+conv2d_dw_bf16_params.h 231 0x3120 2
+conv2d_dw_bf16_params.h 219 0x312a
+conv2d_dw_bf16_params.h 219 0x312a 1
+conv2d_dw_bf16_params.h 220 0x312a 2
+conv2d_dw_bf16_params.h 220 0x312a 3
+conv2d_dw_bf16_params.h 232 0x312a 4
+conv2d_dw_bf16_params.h 234 0x312a 5
+conv2d_dw_bf16_params.h 234 0x312a 6
+conv2d_dw_bf16_params.h 243 0x312a 7
+conv2d_dw_bf16_params.h 250 0x312a 8
+conv2d_dw_bf16_params.h 253 0x312a 9
+conv2d_dw_bf16_params.h 260 0x312a 10
+conv2d_dw_bf16_params.h 264 0x312a 11
+conv2d_dw_bf16_params.h 220 0x3134
+conv2d_dw_bf16_params.h 234 0x3134 1
+conv2d_dw_bf16_params.h 246 0x3134 2
+conv2d_dw_bf16_params.h 253 0x3134 3
+conv2d_dw_bf16_params.h 226 0x313e x
+conv2d_dw_bf16_params.h 234 0x313e 1
+conv2d_dw_bf16_params.h 234 0x313e 2
+conv2d_dw_bf16_params.h 231 0x3148
+conv2d_dw_bf16_params.h 232 0x3148 1
+conv2d_dw_bf16_params.h 232 0x3148 2
+conv2d_dw_bf16_params.h 235 0x3152
+conv2d_dw_bf16_params.h 235 0x3152 1
+conv2d_dw_bf16_params.h 242 0x3152 2
+conv2d_dw_bf16_params.h 242 0x3152 3
+conv2d_dw_bf16_params.h 243 0x3152 4
+conv2d_dw_bf16_params.h 250 0x3152 5
+conv2d_dw_bf16_params.h 255 0x3152 6
+conv2d_dw_bf16_params.h 260 0x3152 7
+conv2d_dw_bf16_params.h 264 0x3152 8
+conv2d_dw_bf16_params.h 234 0x315c
+conv2d_dw_bf16_params.h 239 0x315c 1
+conv2d_dw_bf16_params.h 242 0x315c 2
+conv2d_dw_bf16_params.h 248 0x315c 3
+conv2d_dw_bf16_params.h 253 0x315c 4
+conv2d_dw_bf16_params.h 264 0x315c 5
+conv2d_dw_bf16_params.h 219 0x3166 x
+conv2d_dw_bf16_params.h 219 0x316a
+conv2d_dw_bf16_params.h 219 0x316e
+conv2d_dw_bf16_params.h 220 0x316e 1
+conv2d_dw_bf16_params.h 219 0x3174
+conv2d_dw_bf16_params.h 243 0x3174 1
+conv2d_dw_bf16_params.h 247 0x3174 2
+conv2d_dw_bf16_params.h 220 0x317a x
+conv2d_dw_bf16_params.h 250 0x317a 1
+conv2d_dw_bf16_params.h 219 0x3180 x
+conv2d_dw_bf16_params.h 220 0x3184 x
+conv2d_dw_bf16_params.h 231 0x3184 1
+conv2d_dw_bf16_params.h 219 0x318a x
+conv2d_dw_bf16_params.h 231 0x318a 1 x
+conv2d_dw_bf16_params.h 220 0x3190 x
+conv2d_dw_bf16_params.h 253 0x3190 1 x
+conv2d_dw_bf16_params.h 240 0x3196
+conv2d_dw_bf16_params.h 246 0x3196 1 x
+conv2d_dw_bf16_params.h 232 0x319c x
+conv2d_dw_bf16_params.h 226 0x31a0 x
+conv2d_dw_bf16_params.h 231 0x31a4 x
+conv2d_dw_bf16_params.h 238 0x31a4 1
+conv2d_dw_bf16_params.h 234 0x31aa x
+conv2d_dw_bf16_params.h 231 0x31ae x
+conv2d_dw_bf16_params.h 232 0x31ae 1 x
+conv2d_dw_bf16_params.h 234 0x31b4 x
+conv2d_dw_bf16_params.h 232 0x31b8 x
+conv2d_dw_bf16_params.h 227 0x31bc x
+conv2d_dw_bf16_params.h 232 0x31bc 1
+conv2d_dw_bf16_params.h 234 0x31c2 x
+conv2d_dw_bf16_params.h 235 0x31c2 1 x
+conv2d_dw_bf16_params.h 235 0x31c8
+conv2d_dw_bf16_params.h 243 0x31c8 1 x
+conv2d_dw_bf16_params.h 238 0x31ce x
+conv2d_dw_bf16_params.h 242 0x31ce 1 x
+conv2d_dw_bf16_params.h 242 0x31d4
+conv2d_dw_bf16_params.h 243 0x31d4 1 x
+conv2d_dw_bf16_params.h 239 0x31da x
+conv2d_dw_bf16_params.h 242 0x31da 1 x
+conv2d_dw_bf16_params.h 243 0x31e0 x
+conv2d_dw_bf16_params.h 250 0x31e0 1 x
+conv2d_dw_bf16_params.h 234 0x31e6 x
+conv2d_dw_bf16_params.h 240 0x31e6 1 x
+conv2d_dw_bf16_params.h 253 0x31e6 2 x
+conv2d_dw_bf16_params.h 247 0x31ec x
+conv2d_dw_bf16_params.h 242 0x31f0 x
+conv2d_dw_bf16_params.h 247 0x31f0 1
+conv2d_dw_bf16_params.h 241 0x31f6 x
+conv2d_dw_bf16_params.h 243 0x31f6 1 x
+conv2d_dw_bf16_params.h 243 0x31fc
+conv2d_dw_bf16_params.h 245 0x31fc 1 x
+conv2d_dw_bf16_params.h 243 0x3202 x
+conv2d_dw_bf16_params.h 248 0x3202 1 x
+conv2d_dw_bf16_params.h 245 0x3208 x
+conv2d_dw_bf16_params.h 250 0x3208 1 x
+conv2d_dw_bf16_params.h 246 0x320e x
+conv2d_dw_bf16_params.h 250 0x320e 1
+conv2d_dw_bf16_params.h 247 0x3214 x
+conv2d_dw_bf16_params.h 248 0x3214 1 x
+conv2d_dw_bf16_params.h 250 0x321a x
+conv2d_dw_bf16_params.h 250 0x321a 1 x
+conv2d_dw_bf16_params.h 248 0x3220 x
+conv2d_dw_bf16_params.h 250 0x3220 1
+conv2d_dw_bf16_params.h 249 0x3226 x
+conv2d_dw_bf16_params.h 255 0x3226 1 x
+conv2d_dw_bf16_params.h 258 0x3226 2
+conv2d_dw_bf16_params.h 258 0x3226 3
+conv2d_dw_bf16_params.h 252 0x3230 x
+conv2d_dw_bf16_params.h 253 0x3230 1 x
+conv2d_dw_bf16_params.h 253 0x3236
+conv2d_dw_bf16_params.h 255 0x3236 1 x
+conv2d_dw_bf16_params.h 254 0x323c x
+conv2d_dw_bf16_params.h 255 0x323c 1
+conv2d_dw_bf16_params.h 256 0x323c 2
+conv2d_dw_bf16_params.h 258 0x323c 3 x
+conv2d_dw_bf16_params.h 258 0x323c 4 x
+conv2d_dw_bf16_params.h 259 0x323c 5
+conv2d_dw_bf16_params.h 263 0x323c 6
+conv2d_dw_bf16_params.h 255 0x3248 x
+conv2d_dw_bf16_params.h 256 0x324c x
+conv2d_dw_bf16_params.h 260 0x324c 1 x
+conv2d_dw_bf16_params.h 258 0x3252 x
+conv2d_dw_bf16_params.h 260 0x3252 1
+conv2d_dw_bf16_params.h 259 0x3258 x
+conv2d_dw_bf16_params.h 264 0x3258 1 x
+conv2d_dw_bf16_params.h 260 0x325e x
+conv2d_dw_bf16_params.h 264 0x325e 1
+conv2d_dw_bf16_params.h 262 0x3264 x
+conv2d_dw_bf16_params.h 263 0x3268 x
+conv2d_dw_bf16_params.h 264 0x326c x
+conv2d_dw_bf16_params.h 266 0x3270 x
+conv2d_dw_bf16_params.h 266 0x3280
+conv2d_dw_bf16_params.h 266 0x3280 1
+conv2d_dw_bf16_params.h 266 0x3286
+conv2d_dw_bf16_params.h 266 0x328a
+conv2d_dw_bf16_params.h 266 0x3296
+conv2d_dw_bf16_params.h 266 0x32a0
+conv2d_dw_bf16_params.h 267 0x32a0 1
+conv2d_dw_bf16_params.h 266 0x32aa
+conv2d_dw_bf16_params.h 266 0x32aa 1
+conv2d_dw_bf16_params.h 266 0x32b0
+conv2d_dw_bf16_params.h 266 0x32b6
+conv2d_dw_bf16_params.h 267 0x32bc x
+conv2d_dw_bf16_params.h 266 0x32c6 x
+conv2d_dw_bf16_params.h 266 0x32ca
+conv2d_dw_bf16_params.h 267 0x32ca 1 x
+conv2d_dw_bf16_params.h 266 0x32d0 x
+conv2d_dw_bf16_params.h 266 0x32d8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 444 0x35c0 x
+superkernels.cpp 449 0x35c0 1
+superkernels.cpp 449 0x35c6 x
+superkernels.cpp 444 0x35cc
+superkernels.cpp 467 0x35da
+superkernels.cpp 452 0x35ea
+superkernels.cpp 449 0x35f2
+superkernels.cpp 449 0x35f2 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x35f8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 446 0x35fc x
+superkernels.cpp 446 0x3600
+superkernels.cpp 446 0x3604
+superkernels.cpp 446 0x360a
+superkernels.cpp 461 0x360e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x360e 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 451 0x3618
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x3618 1
+tile.hpp 86 0x3618 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 451 0x3626 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x3630
+tile.hpp 74 0x3634
+tile.hpp 74 0x3638 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 452 0x3640
+superkernels.cpp 461 0x3640 1
+superkernels.cpp 452 0x3648 x
+superkernels.cpp 453 0x364c
+superkernels.cpp 453 0x364c 1 x
+superkernels.cpp 452 0x365e
+superkernels.cpp 457 0x365e 1
+superkernels.cpp 452 0x3668 x
+superkernels.cpp 453 0x366c x
+superkernels.cpp 457 0x3670
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3680 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 457 0x3684
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3684 1
+io_buffer_main.h 218 0x368e
+io_buffer_main.h 218 0x3692
+io_buffer_main.h 235 0x3696 x
+io_buffer_main.h 218 0x36a4 x
+io_buffer_main.h 218 0x36a4 1 x
+io_buffer_main.h 218 0x36a8
+io_buffer_main.h 395 0x36ac
+io_buffer_main.h 395 0x36b6 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 456 0x36ba
+superkernels.cpp 459 0x36ba 1
+superkernels.cpp 464 0x36ba 2
+superkernels.cpp 465 0x36ba 3
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x36ba 4
+io_buffer_main.h 425 0x36ba 5
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h:
+io_buffer_impl.h 52 0x36c4
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x36ce
+io_buffer_main.h 324 0x36ce 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 457 0x36d4 x
+superkernels.cpp 457 0x36d8
+superkernels.cpp 461 0x36d8 1
+superkernels.cpp 456 0x36e2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x36ec x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 457 0x36f2 x
+superkernels.cpp 456 0x36f6 x
+superkernels.cpp 459 0x36fa x
+superkernels.cpp 461 0x36fe x
+superkernels.cpp 456 0x3704 x
+superkernels.cpp 459 0x3708 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h:
+io_buffer_impl.h 201 0x370c x
+io_buffer_impl.h 52 0x3710 x
+io_buffer_impl.h 52 0x3714
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x3720
+io_buffer_main.h 324 0x3724 x
+io_buffer_main.h 425 0x3734 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 464 0x3738
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x3738 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 464 0x3742 x
+superkernels.cpp 464 0x3746
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x3756 x
+io_buffer_main.h 327 0x375a
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 464 0x375e x
+superkernels.cpp 464 0x3762
+superkernels.cpp 465 0x3768
+superkernels.cpp 465 0x3774 x
+superkernels.cpp 467 0x3780
+superkernels.cpp 467 0x378a x
+superkernels.cpp 467 0x378e
+superkernels.cpp 578 0x37a0
+superkernels.cpp 578 0x37a0 1 x
+superkernels.cpp 583 0x37a6
+superkernels.cpp 583 0x37b0 x
+superkernels.cpp 587 0x37c2
+superkernels.cpp 590 0x37c2 1
+superkernels.cpp 599 0x37c2 2
+superkernels.cpp 629 0x37c2 3
+superkernels.cpp 583 0x37d0
+superkernels.cpp 583 0x37d0 1
+superkernels.cpp 580 0x37da x
+superkernels.cpp 580 0x37de
+superkernels.cpp 580 0x37e2
+superkernels.cpp 580 0x37e8
+superkernels.cpp 587 0x37ec
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x37ec 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 621 0x37f6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x37f6 1
+tile.hpp 86 0x37f6 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 587 0x3802
+superkernels.cpp 587 0x3802 1
+superkernels.cpp 587 0x380c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x3816
+tile.hpp 74 0x381a
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 587 0x3820
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x3820 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 590 0x3830
+superkernels.cpp 591 0x3830 1
+superkernels.cpp 590 0x3836 x
+superkernels.cpp 591 0x3846 x
+superkernels.cpp 591 0x384a
+superkernels.cpp 599 0x3850
+superkernels.cpp 599 0x3854 x
+superkernels.cpp 591 0x385e x
+superkernels.cpp 611 0x386a
+superkernels.cpp 594 0x3874 x
+superkernels.cpp 595 0x387e
+superkernels.cpp 594 0x3884
+superkernels.cpp 594 0x388a
+superkernels.cpp 595 0x38a0 x
+superkernels.cpp 621 0x38aa
+superkernels.cpp 621 0x38c0
+superkernels.cpp 599 0x38d0 x
+superkernels.cpp 600 0x38da
+superkernels.cpp 599 0x38e0
+superkernels.cpp 599 0x38e6
+superkernels.cpp 600 0x38f0 x
+superkernels.cpp 621 0x38fa
+superkernels.cpp 606 0x3904 x
+superkernels.cpp 611 0x3904 1
+superkernels.cpp 611 0x390e x
+superkernels.cpp 607 0x3912 x
+superkernels.cpp 607 0x3916
+superkernels.cpp 607 0x391c
+superkernels.cpp 606 0x3924
+superkernels.cpp 607 0x392a
+superkernels.cpp 606 0x392e x
+superkernels.cpp 611 0x392e 1
+superkernels.cpp 607 0x3938 x
+superkernels.cpp 611 0x393c x
+superkernels.cpp 608 0x3940 x
+superkernels.cpp 608 0x3944
+superkernels.cpp 611 0x3944 1 x
+superkernels.cpp 608 0x3950 x
+superkernels.cpp 614 0x3960
+superkernels.cpp 614 0x3966 x
+superkernels.cpp 616 0x3966 1
+superkernels.cpp 615 0x3970
+superkernels.cpp 616 0x3970 1 x
+superkernels.cpp 615 0x397a x
+superkernels.cpp 618 0x3986 x
+superkernels.cpp 618 0x3986 1 x
+superkernels.cpp 614 0x398c x
+superkernels.cpp 616 0x398c 1 x
+superkernels.cpp 615 0x3992 x
+superkernels.cpp 616 0x3996 x
+superkernels.cpp 615 0x399a x
+superkernels.cpp 614 0x399e x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x39a2
+io_buffer_main.h 218 0x39b2 x
+io_buffer_main.h 218 0x39b6
+io_buffer_main.h 218 0x39ba
+io_buffer_main.h 218 0x39be
+io_buffer_main.h 235 0x39c4 x
+io_buffer_main.h 218 0x39d0 x
+io_buffer_main.h 218 0x39d0 1 x
+io_buffer_main.h 218 0x39d4
+io_buffer_main.h 395 0x39d4 1
+io_buffer_main.h 395 0x39e2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 621 0x39f6
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x39f6 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 621 0x39fa
+superkernels.cpp 621 0x39fe x
+superkernels.cpp 621 0x3a04
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x3a10
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 623 0x3a20
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3a20 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 623 0x3a2a x
+superkernels.cpp 623 0x3a2a 1
+superkernels.cpp 623 0x3a34
+superkernels.cpp 623 0x3a44
+superkernels.cpp 623 0x3a48
+superkernels.cpp 629 0x3a58
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3a58 1 x
+io_buffer_main.h 395 0x3a58 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 626 0x3a62
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3a62 1
+io_buffer_main.h 218 0x3a6c
+io_buffer_main.h 218 0x3a70
+io_buffer_main.h 235 0x3a74 x
+io_buffer_main.h 218 0x3a82 x
+io_buffer_main.h 218 0x3a82 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 626 0x3a86
+superkernels.cpp 630 0x3a86 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3a86 2
+io_buffer_main.h 395 0x3a94 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 626 0x3a9e x
+superkernels.cpp 629 0x3aa2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x3aaa x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 626 0x3ab2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x3ab2 1
+io_buffer_main.h 324 0x3ab2 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 630 0x3abc x
+superkernels.cpp 630 0x3ac0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x3ac6
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 626 0x3ad0 x
+superkernels.cpp 630 0x3ad8 x
+superkernels.cpp 633 0x3ae8 x
+superkernels.cpp 633 0x3aee
+superkernels.cpp 633 0x3afa
+superkernels.cpp 637 0x3b10 x
+superkernels.cpp 637 0x3b16
+superkernels.cpp 637 0x3b1c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x3b30
+io_buffer_main.h 327 0x3b30 1
+io_buffer_main.h 324 0x3b34
+io_buffer_main.h 327 0x3b34 1
+io_buffer_main.h 327 0x3b34 2
+io_buffer_main.h 425 0x3b34 3
+io_buffer_main.h 425 0x3b34 4
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 645 0x3b3a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x3b3a 1 x
+io_buffer_main.h 425 0x3b50 x
+io_buffer_main.h 327 0x3b54 x
+io_buffer_main.h 324 0x3b58 x
+io_buffer_main.h 327 0x3b66 x
+io_buffer_main.h 327 0x3b6a
+io_buffer_main.h 425 0x3b76 x
+io_buffer_main.h 327 0x3b7a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 645 0x3b8c
+superkernels.cpp 649 0x3b8c 1
+superkernels.cpp 645 0x3b90 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x3b90 1
+io_buffer_main.h 327 0x3b96 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 649 0x3ba0
+superkernels.cpp 648 0x3bb0
+superkernels.cpp 651 0x3bb0 1
+superkernels.cpp 648 0x3bba
+superkernels.cpp 648 0x3bba 1 x
+superkernels.cpp 649 0x3bba 2
+superkernels.cpp 648 0x3bc4
+superkernels.cpp 648 0x3bd4
+superkernels.cpp 648 0x3bd8
+superkernels.cpp 649 0x3bea x
+superkernels.cpp 651 0x3bf4 x
+superkernels.cpp 651 0x3bf8
+superkernels.cpp - 0x3bf9
+
+
+superkernels.cpp:
+File name Line number Starting address View Stmt
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 21 0x3c10 x
+0_0_reloadable5.cc 23 0x3c10 1
+0_0_reloadable5.cc 23 0x3c14 x
+0_0_reloadable5.cc 24 0x3c18 x
+0_0_reloadable5.cc 26 0x3c1c x
+0_0_reloadable5.cc 25 0x3c20 x
+0_0_reloadable5.cc 22 0x3c24 x
+0_0_reloadable5.cc 30 0x3c40 x
+0_0_reloadable5.cc 32 0x3c40 1
+0_0_reloadable5.cc 32 0x3c44 x
+0_0_reloadable5.cc 34 0x3c48 x
+0_0_reloadable5.cc 33 0x3c4c x
+0_0_reloadable5.cc 31 0x3c50 x
+0_0_reloadable5.cc 38 0x3c60 x
+0_0_reloadable5.cc 40 0x3c60 1
+0_0_reloadable5.cc 40 0x3c64 x
+0_0_reloadable5.cc 42 0x3c68 x
+0_0_reloadable5.cc 41 0x3c6c x
+0_0_reloadable5.cc 39 0x3c70 x
+0_0_reloadable5.cc 46 0x3c80 x
+0_0_reloadable5.cc 48 0x3c80 1
+0_0_reloadable5.cc 48 0x3c84 x
+0_0_reloadable5.cc 50 0x3c88 x
+0_0_reloadable5.cc 49 0x3c8c x
+0_0_reloadable5.cc 47 0x3c90 x
+0_0_reloadable5.cc 54 0x3ca0 x
+0_0_reloadable5.cc 56 0x3ca0 1
+0_0_reloadable5.cc 56 0x3ca4 x
+0_0_reloadable5.cc 57 0x3ca8 x
+0_0_reloadable5.cc 59 0x3cac x
+0_0_reloadable5.cc 58 0x3cb0 x
+0_0_reloadable5.cc 55 0x3cb4 x
+0_0_reloadable5.cc 63 0x3cd0 x
+0_0_reloadable5.cc 65 0x3cd0 1
+0_0_reloadable5.cc 65 0x3cd4 x
+0_0_reloadable5.cc 66 0x3cd8 x
+0_0_reloadable5.cc 67 0x3cdc x
+0_0_reloadable5.cc 69 0x3ce0 x
+0_0_reloadable5.cc 68 0x3ce4 x
+0_0_reloadable5.cc 64 0x3ce8 x
+0_0_reloadable5.cc 73 0x3d00 x
+0_0_reloadable5.cc 75 0x3d00 1
+0_0_reloadable5.cc 75 0x3d04 x
+0_0_reloadable5.cc 76 0x3d08 x
+0_0_reloadable5.cc 78 0x3d0c x
+0_0_reloadable5.cc 77 0x3d10 x
+0_0_reloadable5.cc 74 0x3d14 x
+0_0_reloadable5.cc 94 0x930 x
+0_0_reloadable5.cc 96 0x930 1 x
+0_0_reloadable5.cc 96 0x930 2
+0_0_reloadable5.cc 98 0x930 3
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x930 4
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 94 0x936
+0_0_reloadable5.cc 96 0x944
+0_0_reloadable5.cc 98 0x944 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x944 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 96 0x94c x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x952
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 590 0x958 x
+io_buffer_compiler.h 590 0x95c
+io_buffer_compiler.h 590 0x960
+io_buffer_compiler.h 590 0x964
+io_buffer_compiler.h 590 0x968
+io_buffer_compiler.h 195 0x978 x
+io_buffer_compiler.h 195 0x978 1 x
+io_buffer_compiler.h 194 0x97c x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x980
+io_buffer_main.h 410 0x980 1
+io_buffer_main.h 410 0x98a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 98 0x98e
+0_0_reloadable5.cc 102 0x98e 1
+0_0_reloadable5.cc 98 0x992 x
+0_0_reloadable5.cc 98 0x996
+0_0_reloadable5.cc 98 0x99a
+0_0_reloadable5.cc 98 0x9a8
+0_0_reloadable5.cc 98 0x9ac
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 590 0x9b0 x
+io_buffer_compiler.h 590 0x9b8
+io_buffer_compiler.h 590 0x9bc
+io_buffer_compiler.h 590 0x9c0
+io_buffer_compiler.h 590 0x9c4
+io_buffer_compiler.h 195 0x9d4 x
+io_buffer_compiler.h 195 0x9d4 1 x
+io_buffer_compiler.h 194 0x9d8 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x9e4 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 102 0x9e8 x
+0_0_reloadable5.cc 102 0x9ec
+0_0_reloadable5.cc 102 0x9f0
+0_0_reloadable5.cc 102 0x9f6
+0_0_reloadable5.cc 102 0xa08
+0_0_reloadable5.cc 105 0xa0c
+0_0_reloadable5.cc 107 0xa0c 1
+0_0_reloadable5.cc 105 0xa20 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa20 1
+io_buffer_compiler.h 606 0xa20 2
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa20 3
+io_buffer_main.h 440 0xa20 4
+io_buffer_main.h 440 0xa26
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 107 0xa2a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa2e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa2e 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 605 0xa38 x
+io_buffer_compiler.h 605 0xa3c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa4a
+io_buffer_main.h 440 0xa4e x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa52
+io_buffer_compiler.h 606 0xa52 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 107 0xa58 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa66 x
+io_buffer_compiler.h 605 0xa6a x
+io_buffer_compiler.h 606 0xa6a 1
+io_buffer_compiler.h 605 0xa70
+io_buffer_compiler.h 606 0xa70 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa82 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 110 0xa86
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa8a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 110 0xa96 x
+0_0_reloadable5.cc 110 0xaa0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xaa4
+io_buffer_compiler.h 606 0xaa8 x
+io_buffer_compiler.h 606 0xaac
+io_buffer_compiler.h 606 0xab0
+io_buffer_compiler.h - 0xab1
+
+
+CU: me_div.c:
+File name Line number Starting address View Stmt
+
+./me_div.c:[++]
+me_div.c 108 0x3d30
+me_div.c 108 0x3d30 1
+me_div.c 115 0x3d30 2 x
+me_div.c 108 0x3d36
+me_div.c 108 0x3d3a
+me_div.c 108 0x3d3e
+me_div.c 108 0x3d42
+me_div.c 108 0x3d46
+me_div.c 108 0x3d4a
+me_div.c 108 0x3d4e
+me_div.c 108 0x3d52
+me_div.c 108 0x3d56
+me_div.c 108 0x3d5a
+me_div.c 108 0x3d5e
+me_div.c 108 0x3d62
+me_div.c 108 0x3d66
+me_div.c 108 0x3d6a
+me_div.c 108 0x3d6e
+me_div.c 108 0x3d72
+me_div.c 108 0x3d76
+me_div.c 108 0x3d7a
+me_div.c 108 0x3d7e
+me_div.c 108 0x3d82
+me_div.c 108 0x3d86
+me_div.c 108 0x3d8a
+me_div.c 108 0x3d8e
+me_div.c 108 0x3d92
+me_div.c 108 0x3d96
+me_div.c 108 0x3d9a
+me_div.c 108 0x3d9e
+me_div.c 108 0x3da2
+me_div.c 119 0x3da6 x
+me_div.c 108 0x3daa x
+me_div.c 108 0x3dae
+me_div.c 108 0x3db2
+me_div.c 108 0x3db6
+me_div.c - 0x3db7
+
+
+CU: No directory table
+CU: Empty file name table
+ - 0x1
+
+
+CU: No directory table
+CU: Empty file name table
+ - 0x1
+
+
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/scripts/3_3_reloadable11.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/scripts/3_3_reloadable11.bcf
new file mode 100644
index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/scripts/3_3_reloadable11.bcf
@@ -0,0 +1,16 @@
+_reserved DMb 0x0 0x40000
+
+_reserved PM 0x0 0x930 //reserved for main elf
+
+_entry_point _Z13kernelWrapperPPvjjjj
+_symbol _Z13kernelWrapperPPvjjjj 0x930
+
+_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers
+_reserved DMb 0x7ba80 0x40 //reserved for sync buffer
+_stack DM_stack 0x7bac0 0x940 //stack for core
+_reserved DMb 0x7c400 0x40 //reserved for main elf heap
+//space for synopsys compiler at 0x7c440 0x880//heap
+_reserved DMb 0x40000 0x3b280
+
+_reserved DMb 0x7ccc0 0x3340
+_reserved DMb 0x80000 0x80000 // And everything else the core can't see
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/scripts/3_3_reloadable11.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/scripts/3_3_reloadable11.prx
new file mode 100644
index 0000000000000000000000000000000000000000..89de75c361895eb4b9c646c851f2f2af042ef522
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/scripts/3_3_reloadable11.prx
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/src/3_3_reloadable11.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/src/3_3_reloadable11.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ed266062f542d5fd9e7d7b554216254c298b9574
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable11/src/3_3_reloadable11.cc
@@ -0,0 +1,110 @@
+// Automatically generated processor driver using AIEngine tool-chain
+
+#include
+#include
+#include
+
+
+// Declare Kernel functions and initializers
+void conv2d_maxpool(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_add1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_clip1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_mul1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_mul1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_conv_eltbinary(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict,const unsigned int (&)[17],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_conv2d_dwc(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+
+// Declare Kernel objects and external arrays
+
+
+void _b896_wrapper(void* args[])
+{
+ conv2d_maxpool(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast*>(args[1]),
+ *reinterpret_cast(args[3]),
+ *reinterpret_cast*>(args[2]));
+}
+
+void _b901_wrapper(void* args[])
+{
+ superkernel_add1d_attribute_broadcasting(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[2]),
+ *reinterpret_cast*>(args[1]));
+}
+
+void _b906_wrapper(void* args[])
+{
+ superkernel_clip1d(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[2]),
+ *reinterpret_cast*>(args[1]));
+}
+
+void _b881_wrapper(void* args[])
+{
+ superkernel_mul1d_attribute_broadcasting(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[2]),
+ *reinterpret_cast*>(args[1]));
+}
+
+void _b891_wrapper(void* args[])
+{
+ superkernel_mul1d(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[3]),
+ *reinterpret_cast*>(args[1]),
+ *reinterpret_cast*>(args[2]));
+}
+
+void _b924_wrapper(void* args[])
+{
+ superkernel_conv_eltbinary(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast*>(args[1]),
+ *reinterpret_cast*>(args[2]),
+ *reinterpret_cast(args[4]),
+ *reinterpret_cast*>(args[3]));
+}
+
+void _b919_wrapper(void* args[])
+{
+ superkernel_conv2d_dwc(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast*>(args[1]),
+ *reinterpret_cast(args[3]),
+ *reinterpret_cast*>(args[2]));
+}
+
+using UniformKernelFunc = void (*)(void **);
+
+static UniformKernelFunc g_uniformKernelFuncs[7] = {
+ _b896_wrapper,
+ _b901_wrapper,
+ _b906_wrapper,
+ _b881_wrapper,
+ _b891_wrapper,
+ _b924_wrapper,
+ _b919_wrapper
+};
+
+__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut)
+{
+ uint32 idx = 0;
+ reinterpret_cast(args[idx])->acquire(numSyncIn > 0);
+ idx += (numSyncIn > 0) ? 1 : 0;
+ reinterpret_cast(args[idx])->acquire(numSyncIn > 1);
+ idx += (numSyncIn > 1) ? 1 : 0;
+ idx += numAsyncIn;
+
+ (*(g_uniformKernelFuncs[kernelId]))(args);
+
+ idx = 0;
+ reinterpret_cast(args[idx])->release(numSyncIn > 0);
+ idx += (numSyncIn > 0) ? 1 : 0;
+ reinterpret_cast(args[idx])->release(numSyncIn > 1);
+ idx += (numSyncIn > 1) ? 1 : 0;
+ idx += numAsyncIn;
+}
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.calltree
new file mode 100644
index 0000000000000000000000000000000000000000..0d87486df8d685214c85a56d2c420e80fd5d49bc
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.calltree
@@ -0,0 +1,54 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:39 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme
+
+
+// Release: ipp V-2024.06-TGT-241219
+
+_Z13kernelWrapperPPvjjjj
+ _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+ _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ _ZN12me_primitive10udiv_dstepEjjRjS0_
+ _ZN12me_primitive10udiv_dstepEjjRjS0_ (*)
+ int32_to_float32
+ _ZL28normalizeRoundAndPackFloat32iij
+ _ZL19roundAndPackFloat32iij
+ float32_add
+ _ZL14addFloat32Sigsjji
+ _ZL19propagateFloat32NaNjj
+ _ZL19roundAndPackFloat32iij (*)
+ _ZL14subFloat32Sigsjji
+ _ZL19propagateFloat32NaNjj (*)
+ _ZL28normalizeRoundAndPackFloat32iij (*)
+
+
+Call tree stack and functions sizes:
+
+stack stack stack call func func function name
+ desc level level desc
+----- ----- ----- ----- ----- ----- --------------------------------------------------------------
+ 64 448 0 0 220 10058 _Z13kernelWrapperPPvjjjj
+ 128 384 1 1 2676 9838 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 2 1588 1588 _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ 0 0 2 2 670 670 _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+ 256 256 2 2 2680 2822 _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 0 0 3 3 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_
+ 0 0 2 2 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_
+ 0 0 2 2 114 530 int32_to_float32
+ 0 0 2 3 24 416 _ZL28normalizeRoundAndPackFloat32iij
+ 0 0 2 4 392 392 _ZL19roundAndPackFloat32iij
+ 0 0 2 2 64 1968 float32_add
+ 0 0 3 3 624 1128 _ZL14addFloat32Sigsjji
+ 0 0 4 4 112 112 _ZL19propagateFloat32NaNjj
+ 0 0 3 4 392 392 _ZL19roundAndPackFloat32iij
+ 0 0 2 3 752 1280 _ZL14subFloat32Sigsjji
+ 0 0 3 4 112 112 _ZL19propagateFloat32NaNjj
+ 0 0 2 4 24 416 _ZL28normalizeRoundAndPackFloat32iij (*)
+
+
+Maximum call level : 4
+Maximum stack level: 4
+Maximum stack size : 448
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.cmic2
new file mode 100644
index 0000000000000000000000000000000000000000..d037f49ea23915d17f1d140dbcf225735acc1af1
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.cmic2
@@ -0,0 +1,14427 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:41 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me
+
+// Release: ipp V-2024.06-TGT-241219
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function kernelWrapper _Z13kernelWrapperPPvjjjj
+.src_ref 0 "0_0_reloadable2.cc" 29 first
+.src_ref 0 "0_0_reloadable2.cc" 31 60 first
+.function_start
+ 2352 "11010100" // LDA r16, [p0]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2353 "01000001" // /* MW 5 */
+ 2354 "00101111" // /* MW 4 */
+ 2355 "11010000" // /* MW 3 */
+ 2356 "11000010" // /* MW 2 */
+ 2357 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 29
+ 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2359 "00000001" // /* MW 5 */
+ 2360 "00000000" // /* MW 4 */
+ 2361 "00000000" // /* MW 3 */
+ 2362 "00001000" // /* MW 2 */
+ 2363 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 31 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2364 "00000010" // ST p7, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2365 "01110000" // /* MW 7 */
+ 2366 "01010000" // /* MW 6 */
+ 2367 "11101000" // /* MW 5 */
+ 2368 "00000001" // /* MW 4 */
+ 2369 "10110000" // /* MW 3 */
+ 2370 "01110011" // /* MW 2 */
+ 2371 "11111111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 17 79
+.src_ref 0 "0_0_reloadable2.cc" 31 110 first
+ 2372 "00111010" // ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2373 "01111001" // /* MW 9 */
+ 2374 "01100000" // /* MW 8 */
+ 2375 "10110000" // /* MW 7 */
+ 2376 "10000011" // /* MW 6 */
+ 2377 "10100111" // /* MW 5 */
+ 2378 "00011111" // /* MW 4 */
+ 2379 "10110000" // /* MW 3 */
+ 2380 "10000010" // /* MW 2 */
+ 2381 "11111111" // /* MW 1 */
+ 2382 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2383 "00111101" // /* MW 3 */
+ 2384 "11110100" // /* MW 2 */
+ 2385 "00001111" // /* MW 1 */
+ 2386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2387 "00000000" // /* MW 1 */
+ 2388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2389 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2390 "00011000" // ADD.NC p0, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2391 "00000010" // /* MW 3 */
+ 2392 "01101000" // /* MW 2 */
+ 2393 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2394 "10011000" // LDA r16, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2395 "00010110" // /* MW 3 */
+ 2396 "00011110" // /* MW 2 */
+ 2397 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2398 "10011000" // LDA r18, [p0], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2399 "01010110" // /* MW 3 */
+ 2400 "00111110" // /* MW 2 */
+ 2401 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2402 "10011000" // LDA r17, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2403 "00110110" // /* MW 3 */
+ 2404 "11101110" // /* MW 2 */
+ 2405 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2406 "10011000" // LDA r27, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2407 "01110110" // /* MW 3 */
+ 2408 "00000111" // /* MW 2 */
+ 2409 "00000000" // /* MW 1 */
+ 2410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2411 "00000000" // /* MW 1 */
+ 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2413 "00000000" // /* MW 1 */
+ 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2415 "00000000" // /* MW 1 */
+ 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2417 "00000000" // /* MW 1 */
+ 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2419 "00000000" // /* MW 1 */
+ 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2421 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2422 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2423 "00100010" // /* MW 3 */
+ 2424 "00100001" // /* MW 2 */
+ 2425 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2426 "10011000" // ST r16, [p0, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2427 "00010001" // /* MW 3 */
+ 2428 "11010110" // /* MW 2 */
+ 2429 "00001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2430 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2431 "11111101" // /* MW 3 */
+ 2432 "11100000" // /* MW 2 */
+ 2433 "00010111" // /* MW 1 */
+ 2434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2435 "00000000" // /* MW 1 */
+ 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2437 "00000000" // /* MW 1 */
+ 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2439 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2440 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2441 "00001000" // /* MW 3 */
+ 2442 "01010111" // /* MW 2 */
+ 2443 "00010100" // /* MW 1 */
+ 2444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2445 "00000000" // /* MW 1 */
+ 2446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2447 "00000000" // /* MW 1 */
+ 2448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2449 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 17 79 first
+ 2450 "10011000" // LDA p0, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2451 "00011110" // /* MW 3 */
+ 2452 "00101100" // /* MW 2 */
+ 2453 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 18 47 first
+ 2454 "10011000" // LDA p1, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2455 "10011110" // /* MW 3 */
+ 2456 "11111100" // /* MW 2 */
+ 2457 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 19 81 first
+ 2458 "10011000" // LDA p2, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2459 "00011110" // /* MW 3 */
+ 2460 "00000101" // /* MW 2 */
+ 2461 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 16 4 first
+.no_stack_arguments
+ 2462 "00000100" // JL #7536 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7536 delay_slots=5 */
+ 2463 "00000001" // /* MW 5 */
+ 2464 "00000000" // /* MW 4 */
+ 2465 "10111000" // /* MW 3 */
+ 2466 "00001110" // /* MW 2 */
+ 2467 "00000000" // /* MW 1 */
+.delay_slot
+ 2468 "10011000" // ST r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2469 "01010101" // /* MW 3 */
+ 2470 "11110011" // /* MW 2 */
+ 2471 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2475 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2479 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 38 60 first
+.return_address
+ 2480 "10011000" // LDA r16, [p7, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2481 "00010110" // /* MW 3 */
+ 2482 "11110110" // /* MW 2 */
+ 2483 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2484 "00011000" // LDA r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2485 "01010001" // /* MW 3 */
+ 2486 "11110011" // /* MW 2 */
+ 2487 "00000111" // /* MW 1 */
+ 2488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2489 "00000000" // /* MW 1 */
+ 2490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2491 "00000000" // /* MW 1 */
+ 2492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2493 "00000000" // /* MW 1 */
+ 2494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2495 "00000000" // /* MW 1 */
+ 2496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2497 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+ 2498 "00011000" // ADD.NC p0, r16, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2499 "00001000" // /* MW 3 */
+ 2500 "01101000" // /* MW 2 */
+ 2501 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+ 2502 "10011000" // LDA r16, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2503 "00010110" // /* MW 3 */
+ 2504 "00000110" // /* MW 2 */
+ 2505 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2506 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2507 "00000101" // /* MW 3 */
+ 2508 "00100010" // /* MW 2 */
+ 2509 "00010000" // /* MW 1 */
+ 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2511 "00000000" // /* MW 1 */
+ 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2513 "00000000" // /* MW 1 */
+ 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2515 "00000000" // /* MW 1 */
+ 2516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2517 "00000000" // /* MW 1 */
+ 2518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2519 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+ 2520 "00011000" // REL.COND r16, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2521 "00011000" // /* MW 3 */
+ 2522 "00010101" // /* MW 2 */
+ 2523 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2524 "11010100" // LDA lr, [sp, #-12]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2525 "01000001" // /* MW 5 */
+ 2526 "10101111" // /* MW 4 */
+ 2527 "00101101" // /* MW 3 */
+ 2528 "10000111" // /* MW 2 */
+ 2529 "11111110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2530 "10011000" // LDA r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2531 "00010110" // /* MW 3 */
+ 2532 "11110110" // /* MW 2 */
+ 2533 "00000000" // /* MW 1 */
+ 2534 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2535 "10011001" // /* MW 3 */
+ 2536 "11111011" // /* MW 2 */
+ 2537 "00000111" // /* MW 1 */
+ 2538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2539 "00000000" // /* MW 1 */
+ 2540 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2541 "11110001" // /* MW 3 */
+ 2542 "11111101" // /* MW 2 */
+ 2543 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41 first
+ 2544 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2545 "00000001" // /* MW 5 */
+ 2546 "00000000" // /* MW 4 */
+ 2547 "00000000" // /* MW 3 */
+ 2548 "11111000" // /* MW 2 */
+ 2549 "11111111" // /* MW 1 */
+ 2550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2551 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41
+ 2552 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 2553 "00000000" // /* MW 3 */
+ 2554 "00101000" // /* MW 2 */
+ 2555 "00010000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+.delay_slot
+ 2556 "10011000" // SUB r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2557 "00000001" // /* MW 3 */
+ 2558 "01100011" // /* MW 2 */
+ 2559 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2560 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2561 "00010010" // /* MW 3 */
+ 2562 "00100001" // /* MW 2 */
+ 2563 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 22
+.delay_slot
+ 2564 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2565 "00010001" // /* MW 3 */
+ 2566 "11110110" // /* MW 2 */
+ 2567 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+ 2571 "00000000" // /* MW 1 */
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.function setup _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.src_ref 2 "reduce_base_c8.h" 218 first
+.src_ref 2 "reduce_base_c8.h" 220 27 first
+.src_ref 2 "reduce_base_c8.h" 290 63
+.src_ref 2 "reduce_base_c8.h" 348 46
+.function_start
+ 2576 "01110110" // LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2577 "01111000" // /* MW 11 */
+ 2578 "01100000" // /* MW 10 */
+ 2579 "00001001" // /* MW 9 */
+ 2580 "01101000" // /* MW 8 */
+ 2581 "01100111" // /* MW 7 */
+ 2582 "00111110" // /* MW 6 */
+ 2583 "10001011" // /* MW 5 */
+ 2584 "10000000" // /* MW 4 */
+ 2585 "11010011" // /* MW 3 */
+ 2586 "10001110" // /* MW 2 */
+ 2587 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 287 40
+.src_ref 2 "reduce_base_c8.h" 287 40
+.src_ref 2 "reduce_base_c8.h" 348 46 first
+ 2588 "10111010" // MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2589 "00001000" // /* MW 9 */
+ 2590 "00000111" // /* MW 8 */
+ 2591 "00110000" // /* MW 7 */
+ 2592 "00001001" // /* MW 6 */
+ 2593 "00100101" // /* MW 5 */
+ 2594 "00111110" // /* MW 4 */
+ 2595 "00000000" // /* MW 3 */
+ 2596 "00000111" // /* MW 2 */
+ 2597 "00000010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 293 77
+.src_ref 2 "reduce_base_c8.h" 298 44
+.src_ref 2 "reduce_base_c8.h" 299 40
+.src_ref 2 "reduce_base_c8.h" 300 59
+.src_ref 2 "reduce_base_c8.h" 326 79
+ 2598 "10111010" // MOVA r30, #3; MOVX r1, #-3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2599 "01111000" // /* MW 9 */
+ 2600 "01100000" // /* MW 8 */
+ 2601 "00001000" // /* MW 7 */
+ 2602 "10101000" // /* MW 6 */
+ 2603 "00010111" // /* MW 5 */
+ 2604 "00111110" // /* MW 4 */
+ 2605 "00000000" // /* MW 3 */
+ 2606 "01111110" // /* MW 2 */
+ 2607 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 57
+.src_ref 2 "reduce_base_c8.h" 301 81
+.src_ref 2 "reduce_base_c8.h" 305 77
+ 2608 "10111010" // MOVA r5, #-1; MOVXM r4, #65528 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2609 "00010000" // /* MW 9 */
+ 2610 "11111100" // /* MW 8 */
+ 2611 "10001111" // /* MW 7 */
+ 2612 "00111100" // /* MW 6 */
+ 2613 "00000000" // /* MW 5 */
+ 2614 "00000000" // /* MW 4 */
+ 2615 "00000000" // /* MW 3 */
+ 2616 "11100101" // /* MW 2 */
+ 2617 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 218
+.src_ref 2 "reduce_base_c8.h" 280 76
+.src_ref 2 "reduce_base_c8.h" 312 98
+ 2618 "10111010" // MOVA r16, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2619 "01110000" // /* MW 9 */
+ 2620 "00000000" // /* MW 8 */
+ 2621 "00000000" // /* MW 7 */
+ 2622 "00000000" // /* MW 6 */
+ 2623 "00000010" // /* MW 5 */
+ 2624 "00000000" // /* MW 4 */
+ 2625 "00000000" // /* MW 3 */
+ 2626 "10010000" // /* MW 2 */
+ 2627 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 298 44 first
+ 2628 "00011000" // ADD.NC p4, r0, #46 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2629 "00010111" // /* MW 3 */
+ 2630 "01100000" // /* MW 2 */
+ 2631 "00011100" // /* MW 1 */
+ 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2633 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 220 25 first
+ 2634 "10011000" // ST r3, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2635 "01110001" // /* MW 3 */
+ 2636 "00011100" // /* MW 2 */
+ 2637 "00001000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 221 28 first
+ 2638 "10011000" // LDA r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2639 "01010110" // /* MW 3 */
+ 2640 "00011111" // /* MW 2 */
+ 2641 "00000001" // /* MW 1 */
+ 2642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2643 "00000000" // /* MW 1 */
+ 2644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2645 "00000000" // /* MW 1 */
+ 2646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2647 "00000000" // /* MW 1 */
+ 2648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2649 "00000000" // /* MW 1 */
+ 2650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2651 "00000000" // /* MW 1 */
+ 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2653 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 221 26
+.src_ref 2 "reduce_base_c8.h" 301 81 first
+ 2654 "01011100" // ST r26, [p0], #4; AND r17, r26, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2655 "10001001" // /* MW 5 */
+ 2656 "01000100" // /* MW 4 */
+ 2657 "00111101" // /* MW 3 */
+ 2658 "11101010" // /* MW 2 */
+ 2659 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 222 26 first
+.src_ref 2 "reduce_base_c8.h" 293 58 first
+.src_ref 2 "reduce_base_c8.h" 301 81
+ 2660 "10111010" // LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2661 "10101000" // /* MW 9 */
+ 2662 "01001000" // /* MW 8 */
+ 2663 "11001100" // /* MW 7 */
+ 2664 "01111110" // /* MW 6 */
+ 2665 "01001101" // /* MW 5 */
+ 2666 "00000110" // /* MW 4 */
+ 2667 "11010000" // /* MW 3 */
+ 2668 "11110110" // /* MW 2 */
+ 2669 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 63 first
+ 2670 "10011000" // LSHL r18, r26, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2671 "01101101" // /* MW 3 */
+ 2672 "10100100" // /* MW 2 */
+ 2673 "00010110" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 293 77 first
+ 2674 "10011000" // LSHL r6, r4, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2675 "00011101" // /* MW 3 */
+ 2676 "00001100" // /* MW 2 */
+ 2677 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 41 first
+.src_ref 2 "reduce_base_c8.h" 300 59 first
+ 2678 "00100100" // LSHL r17, r26, r1; ADD.NC r1, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2679 "11111111" // /* MW 5 */
+ 2680 "10110010" // /* MW 4 */
+ 2681 "10110000" // /* MW 3 */
+ 2682 "01000011" // /* MW 2 */
+ 2683 "11010100" // /* MW 1 */
+ 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2685 "00000000" // /* MW 1 */
+ 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2687 "00000000" // /* MW 1 */
+ 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2689 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 222 24 first
+.src_ref 2 "reduce_base_c8.h" 287 40 first
+ 2690 "01011100" // ST r29, [p0], #4; MAC r7, r7, r29, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2691 "01001100" // /* MW 5 */
+ 2692 "10011100" // /* MW 4 */
+ 2693 "00111110" // /* MW 3 */
+ 2694 "11110110" // /* MW 2 */
+ 2695 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 223 29 first
+.src_ref 2 "reduce_base_c8.h" 312 60 first
+ 2696 "11111010" // LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2697 "10101111" // /* MW 9 */
+ 2698 "01001001" // /* MW 8 */
+ 2699 "00000111" // /* MW 7 */
+ 2700 "10000000" // /* MW 6 */
+ 2701 "10110101" // /* MW 5 */
+ 2702 "11111111" // /* MW 4 */
+ 2703 "11010111" // /* MW 3 */
+ 2704 "10001010" // /* MW 2 */
+ 2705 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 305 57 first
+ 2706 "10011000" // MUL r20, r3, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2707 "11011111" // /* MW 3 */
+ 2708 "11101001" // /* MW 2 */
+ 2709 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 78 first
+ 2710 "10011000" // MUL r28, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2711 "01001111" // /* MW 3 */
+ 2712 "11111000" // /* MW 2 */
+ 2713 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 299 40 first
+ 2714 "10011000" // LSHL r21, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2715 "11101101" // /* MW 3 */
+ 2716 "01101011" // /* MW 2 */
+ 2717 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 57 first
+.src_ref 2 "reduce_base_c8.h" 299 40
+ 2718 "00100100" // LSHL r18, r29, r5; ADD.NC r27, r21, #-48 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2719 "11010000" // /* MW 5 */
+ 2720 "10110101" // /* MW 4 */
+ 2721 "10111101" // /* MW 3 */
+ 2722 "10001011" // /* MW 2 */
+ 2723 "11101100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 41
+ 2724 "00011000" // ADD r23, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2725 "11111111" // /* MW 3 */
+ 2726 "10101111" // /* MW 2 */
+ 2727 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 301 85 first
+ 2728 "10011000" // MUL r29, r29, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2729 "01101111" // /* MW 3 */
+ 2730 "01111011" // /* MW 2 */
+ 2731 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 223 27 first
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2732 "01011100" // ST r2, [p0], #4; LT r24, r30, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2733 "01010101" // /* MW 5 */
+ 2734 "01100000" // /* MW 4 */
+ 2735 "00111111" // /* MW 3 */
+ 2736 "10001010" // /* MW 2 */
+ 2737 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 224 33 first
+ 2738 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2739 "00101110" // /* MW 3 */
+ 2740 "00011100" // /* MW 2 */
+ 2741 "00000001" // /* MW 1 */
+ 2742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2743 "00000000" // /* MW 1 */
+ 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2745 "00000000" // /* MW 1 */
+ 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2747 "00000000" // /* MW 1 */
+ 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2749 "00000000" // /* MW 1 */
+ 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2751 "00000000" // /* MW 1 */
+ 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2753 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 224 31
+.src_ref 2 "reduce_base_c8.h" 318 64
+ 2754 "00000010" // ST el0, [p0], #4; MOV r31, el0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2755 "01110000" // /* MW 7 */
+ 2756 "00001110" // /* MW 6 */
+ 2757 "11110000" // /* MW 5 */
+ 2758 "00000011" // /* MW 4 */
+ 2759 "00110000" // /* MW 3 */
+ 2760 "10000101" // /* MW 2 */
+ 2761 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 225 34 first
+ 2762 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2763 "00001110" // /* MW 3 */
+ 2764 "00000100" // /* MW 2 */
+ 2765 "00000001" // /* MW 1 */
+ 2766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2767 "00000000" // /* MW 1 */
+ 2768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2769 "00000000" // /* MW 1 */
+ 2770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2771 "00000000" // /* MW 1 */
+ 2772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2773 "00000000" // /* MW 1 */
+ 2774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2775 "00000000" // /* MW 1 */
+ 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2777 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 225 32
+.src_ref 2 "reduce_base_c8.h" 318 64
+ 2778 "00000010" // ST eh0, [p0]; MOV r25, eh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2779 "01110000" // /* MW 7 */
+ 2780 "10001110" // /* MW 6 */
+ 2781 "00110000" // /* MW 5 */
+ 2782 "00000011" // /* MW 4 */
+ 2783 "00110000" // /* MW 3 */
+ 2784 "10000001" // /* MW 2 */
+ 2785 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 226 32 first
+ 2786 "10011000" // LDA r30, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2787 "11010110" // /* MW 3 */
+ 2788 "00010111" // /* MW 2 */
+ 2789 "00000001" // /* MW 1 */
+ 2790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2791 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2792 "10000100" // JNZ r24, #2912 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=2912 delay_slots=5 */
+ 2793 "00000001" // /* MW 5 */
+ 2794 "01000000" // /* MW 4 */
+ 2795 "10110000" // /* MW 3 */
+ 2796 "00000101" // /* MW 2 */
+ 2797 "11000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 98 first
+.delay_slot
+ 2798 "10011000" // LSHL r19, r28, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2799 "00001101" // /* MW 3 */
+ 2800 "00100111" // /* MW 2 */
+ 2801 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 318 64 first
+.delay_slot
+ 2802 "10011000" // MUL r25, r31, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2803 "10011111" // /* MW 3 */
+ 2804 "11110011" // /* MW 2 */
+ 2805 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 300 41 first
+.src_ref 2 "reduce_base_c8.h" 305 77 first
+.delay_slot
+ 2806 "00100100" // LSHL r20, r20, r5; ADD.NC r5, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2807 "11111111" // /* MW 5 */
+ 2808 "10110001" // /* MW 4 */
+ 2809 "10110010" // /* MW 3 */
+ 2810 "00001011" // /* MW 2 */
+ 2811 "10100101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 280 76 first
+.delay_slot
+ 2812 "10011000" // LSHL r16, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2813 "00001101" // /* MW 3 */
+ 2814 "00100001" // /* MW 2 */
+ 2815 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 226 30 first
+.src_ref 2 "reduce_base_c8.h" 318 88 first
+.delay_slot
+ 2816 "01011100" // ST r30, [p0, #4]; MUL r31, r25, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2817 "11011111" // /* MW 5 */
+ 2818 "11111111" // /* MW 4 */
+ 2819 "00111100" // /* MW 3 */
+ 2820 "11111010" // /* MW 2 */
+ 2821 "00000010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2822 "00011000" // MOVX r28, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2823 "00000101" // /* MW 3 */
+ 2824 "00111000" // /* MW 2 */
+ 2825 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2826 "10011000" // EQ r28, r2, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2827 "11000111" // /* MW 3 */
+ 2828 "10111001" // /* MW 2 */
+ 2829 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2830 "10000100" // JNZ r28, #4032 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4032 delay_slots=5 */
+ 2831 "00000001" // /* MW 5 */
+ 2832 "01000000" // /* MW 4 */
+ 2833 "11100000" // /* MW 3 */
+ 2834 "00000111" // /* MW 2 */
+ 2835 "11100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2837 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2839 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2841 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2843 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 242 41 first
+.delay_slot
+ 2844 "00011000" // ADD r22, r3, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2845 "11111111" // /* MW 3 */
+ 2846 "11101101" // /* MW 2 */
+ 2847 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2848 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2849 "00001001" // /* MW 3 */
+ 2850 "00100010" // /* MW 2 */
+ 2851 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2852 "10011000" // EQ r17, r17, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2853 "00100111" // /* MW 3 */
+ 2854 "01100010" // /* MW 2 */
+ 2855 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2856 "10000100" // JNZ r17, #3904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3904 delay_slots=5 */
+ 2857 "00000001" // /* MW 5 */
+ 2858 "01000000" // /* MW 4 */
+ 2859 "10100000" // /* MW 3 */
+ 2860 "00000111" // /* MW 2 */
+ 2861 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2863 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2865 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2869 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.delay_slot
+ 2870 "00011000" // MOVX r7, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2871 "00001101" // /* MW 3 */
+ 2872 "00001110" // /* MW 2 */
+ 2873 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2874 "10011000" // EQ r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2875 "00100111" // /* MW 3 */
+ 2876 "11000100" // /* MW 2 */
+ 2877 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2878 "10000100" // JNZ r2, #3744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3744 delay_slots=5 */
+ 2879 "00000001" // /* MW 5 */
+ 2880 "01000000" // /* MW 4 */
+ 2881 "01010000" // /* MW 3 */
+ 2882 "00000111" // /* MW 2 */
+ 2883 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2885 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2887 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2889 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2891 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2893 "00000000" // /* MW 1 */
+ 2894 "10000100" // J #3552 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3552 delay_slots=5 */
+ 2895 "00000000" // /* MW 5 */
+ 2896 "00000000" // /* MW 4 */
+ 2897 "11110000" // /* MW 3 */
+ 2898 "00000110" // /* MW 2 */
+ 2899 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 2900 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2901 "00010001" // /* MW 3 */
+ 2902 "00110100" // /* MW 2 */
+ 2903 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2905 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2907 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2909 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2911 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2912 "00011000" // MOVX r29, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2913 "00010101" // /* MW 3 */
+ 2914 "00111010" // /* MW 2 */
+ 2915 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2916 "10011000" // LT r24, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2917 "00101010" // /* MW 3 */
+ 2918 "01110000" // /* MW 2 */
+ 2919 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2920 "10000100" // JNZ r24, #3232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3232 delay_slots=5 */
+ 2921 "00000001" // /* MW 5 */
+ 2922 "01000000" // /* MW 4 */
+ 2923 "01010000" // /* MW 3 */
+ 2924 "00000110" // /* MW 2 */
+ 2925 "11000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2933 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 316 38
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 2934 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2935 "00010001" // /* MW 3 */
+ 2936 "00110100" // /* MW 2 */
+ 2937 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2938 "10011000" // EQ r17, r26, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2939 "00100111" // /* MW 3 */
+ 2940 "10100010" // /* MW 2 */
+ 2941 "00010110" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2942 "10000100" // JNZ r17, #3104 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3104 delay_slots=5 */
+ 2943 "00000001" // /* MW 5 */
+ 2944 "01000000" // /* MW 4 */
+ 2945 "00010000" // /* MW 3 */
+ 2946 "00000110" // /* MW 2 */
+ 2947 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2949 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2951 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2953 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2957 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2958 "10011000" // NE r2, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2959 "00101000" // /* MW 3 */
+ 2960 "01000100" // /* MW 2 */
+ 2961 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2962 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */
+ 2963 "00000001" // /* MW 5 */
+ 2964 "01000000" // /* MW 4 */
+ 2965 "11110000" // /* MW 3 */
+ 2966 "00000110" // /* MW 2 */
+ 2967 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2969 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2971 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2973 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2975 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2977 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 286 44 first
+.src_ref 2 "reduce_base_c8.h" 289 38
+.src_ref 2 "reduce_base_c8.h" 291 40
+.src_ref 2 "reduce_base_c8.h" 291 40
+ 2978 "10111010" // ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2979 "01011000" // /* MW 9 */
+ 2980 "11101100" // /* MW 8 */
+ 2981 "00000111" // /* MW 7 */
+ 2982 "00001000" // /* MW 6 */
+ 2983 "00100010" // /* MW 5 */
+ 2984 "00000000" // /* MW 4 */
+ 2985 "11100000" // /* MW 3 */
+ 2986 "11010110" // /* MW 2 */
+ 2987 "10000011" // /* MW 1 */
+ 2988 "11111000" // MOV r30, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2989 "10100000" // /* MW 3 */
+ 2990 "10011100" // /* MW 2 */
+ 2991 "00011111" // /* MW 1 */
+ 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2993 "00000000" // /* MW 1 */
+ 2994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2995 "00000000" // /* MW 1 */
+ 2996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2997 "00000000" // /* MW 1 */
+ 2998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2999 "00000000" // /* MW 1 */
+ 3000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3001 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 287 38 first
+ 3002 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3003 "11110111" // /* MW 3 */
+ 3004 "00011100" // /* MW 2 */
+ 3005 "00000100" // /* MW 1 */
+ 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3007 "00000000" // /* MW 1 */
+ 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3009 "00000000" // /* MW 1 */
+ 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3011 "00000000" // /* MW 1 */
+ 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3013 "00000000" // /* MW 1 */
+ 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3015 "00000000" // /* MW 1 */
+ 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3017 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 288 39 first
+ 3018 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3019 "11110111" // /* MW 3 */
+ 3020 "00011110" // /* MW 2 */
+ 3021 "00000100" // /* MW 1 */
+ 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3023 "00000000" // /* MW 1 */
+ 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3025 "00000000" // /* MW 1 */
+ 3026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3027 "00000000" // /* MW 1 */
+ 3028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3029 "00000000" // /* MW 1 */
+ 3030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3031 "00000000" // /* MW 1 */
+ 3032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3033 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 289 38 first
+ 3034 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3035 "01010111" // /* MW 3 */
+ 3036 "00011100" // /* MW 2 */
+ 3037 "00000100" // /* MW 1 */
+ 3038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3039 "00000000" // /* MW 1 */
+ 3040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3041 "00000000" // /* MW 1 */
+ 3042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3043 "00000000" // /* MW 1 */
+ 3044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3045 "00000000" // /* MW 1 */
+ 3046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3047 "00000000" // /* MW 1 */
+ 3048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3049 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 39 first
+ 3050 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3051 "00110111" // /* MW 3 */
+ 3052 "00011100" // /* MW 2 */
+ 3053 "00000100" // /* MW 1 */
+ 3054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3055 "00000000" // /* MW 1 */
+ 3056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3057 "00000000" // /* MW 1 */
+ 3058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3059 "00000000" // /* MW 1 */
+ 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3061 "00000000" // /* MW 1 */
+ 3062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3063 "00000000" // /* MW 1 */
+ 3064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3065 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 291 40 first
+ 3066 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3067 "01010111" // /* MW 3 */
+ 3068 "00001000" // /* MW 2 */
+ 3069 "00000100" // /* MW 1 */
+ 3070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3071 "00000000" // /* MW 1 */
+ 3072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3073 "00000000" // /* MW 1 */
+ 3074 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3075 "00000000" // /* MW 5 */
+ 3076 "00000000" // /* MW 4 */
+ 3077 "11101000" // /* MW 3 */
+ 3078 "00000110" // /* MW 2 */
+ 3079 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3081 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3083 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3085 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 292 38 first
+.delay_slot
+ 3086 "10011000" // ST r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3087 "01010001" // /* MW 3 */
+ 3088 "00000110" // /* MW 2 */
+ 3089 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 293 38 first
+.delay_slot
+ 3090 "00101110" // NOPA; ST r6, [p4, #4]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 3091 "00011100" // /* MW 13 */
+ 3092 "00000000" // /* MW 12 */
+ 3093 "00000000" // /* MW 11 */
+ 3094 "01010111" // /* MW 10 */
+ 3095 "00011010" // /* MW 9 */
+ 3096 "01000000" // /* MW 8 */
+ 3097 "00000000" // /* MW 7 */
+ 3098 "00000000" // /* MW 6 */
+ 3099 "10100011" // /* MW 5 */
+ 3100 "00101001" // /* MW 4 */
+ 3101 "11111000" // /* MW 3 */
+ 3102 "00101100" // /* MW 2 */
+ 3103 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528
+.src_ref 2 "reduce_base_c8.h" 274 44 first
+.src_ref 2 "reduce_base_c8.h" 275 40
+.src_ref 2 "reduce_base_c8.h" 275 40
+ 3104 "10111010" // ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3105 "01011000" // /* MW 9 */
+ 3106 "00010000" // /* MW 8 */
+ 3107 "01001000" // /* MW 7 */
+ 3108 "10101000" // /* MW 6 */
+ 3109 "01100111" // /* MW 5 */
+ 3110 "00111110" // /* MW 4 */
+ 3111 "11100000" // /* MW 3 */
+ 3112 "10010010" // /* MW 2 */
+ 3113 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 40 first
+.src_ref 2 "reduce_base_c8.h" 279 40
+ 3114 "10111010" // MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3115 "01111000" // /* MW 9 */
+ 3116 "00001110" // /* MW 8 */
+ 3117 "11010000" // /* MW 7 */
+ 3118 "00110011" // /* MW 6 */
+ 3119 "00100010" // /* MW 5 */
+ 3120 "00001100" // /* MW 4 */
+ 3121 "10000000" // /* MW 3 */
+ 3122 "10000000" // /* MW 2 */
+ 3123 "11111101" // /* MW 1 */
+ 3124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3125 "00000000" // /* MW 1 */
+ 3126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3127 "00000000" // /* MW 1 */
+ 3128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3129 "00000000" // /* MW 1 */
+ 3130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3131 "00000000" // /* MW 1 */
+ 3132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3133 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 38
+ 3134 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3135 "01010111" // /* MW 3 */
+ 3136 "00011100" // /* MW 2 */
+ 3137 "00000100" // /* MW 1 */
+ 3138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3139 "00000000" // /* MW 1 */
+ 3140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3141 "00000000" // /* MW 1 */
+ 3142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3143 "00000000" // /* MW 1 */
+ 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3145 "00000000" // /* MW 1 */
+ 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3147 "00000000" // /* MW 1 */
+ 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3149 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 39 first
+ 3150 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3151 "11110111" // /* MW 3 */
+ 3152 "00011110" // /* MW 2 */
+ 3153 "00000100" // /* MW 1 */
+ 3154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3155 "00000000" // /* MW 1 */
+ 3156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3157 "00000000" // /* MW 1 */
+ 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3159 "00000000" // /* MW 1 */
+ 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3161 "00000000" // /* MW 1 */
+ 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3163 "00000000" // /* MW 1 */
+ 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3165 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 38 first
+.src_ref 2 "reduce_base_c8.h" 277 38 first
+ 3166 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3167 "01010111" // /* MW 3 */
+ 3168 "00011100" // /* MW 2 */
+ 3169 "00000100" // /* MW 1 */
+ 3170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3171 "00000000" // /* MW 1 */
+ 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3173 "00000000" // /* MW 1 */
+ 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3175 "00000000" // /* MW 1 */
+ 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3177 "00000000" // /* MW 1 */
+ 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3179 "00000000" // /* MW 1 */
+ 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3181 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 278 39 first
+ 3182 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3183 "10110111" // /* MW 3 */
+ 3184 "00011100" // /* MW 2 */
+ 3185 "00000100" // /* MW 1 */
+ 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3187 "00000000" // /* MW 1 */
+ 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3189 "00000000" // /* MW 1 */
+ 3190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3191 "00000000" // /* MW 1 */
+ 3192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3193 "00000000" // /* MW 1 */
+ 3194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3195 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3197 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 279 40 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3198 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3199 "00110111" // /* MW 3 */
+ 3200 "00001000" // /* MW 2 */
+ 3201 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3203 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3205 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3206 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3207 "00000000" // /* MW 5 */
+ 3208 "00000000" // /* MW 4 */
+ 3209 "11101000" // /* MW 3 */
+ 3210 "00000110" // /* MW 2 */
+ 3211 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 279 40
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3212 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3213 "01000001" // /* MW 3 */
+ 3214 "00000010" // /* MW 2 */
+ 3215 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3217 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3219 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 281 38 first
+.delay_slot
+ 3220 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3221 "01110001" // /* MW 3 */
+ 3222 "00010100" // /* MW 2 */
+ 3223 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 280 38 first
+.delay_slot
+ 3224 "00000010" // ST r16, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3225 "01110000" // /* MW 7 */
+ 3226 "10100101" // /* MW 6 */
+ 3227 "00000001" // /* MW 5 */
+ 3228 "00000000" // /* MW 4 */
+ 3229 "00110000" // /* MW 3 */
+ 3230 "11000010" // /* MW 2 */
+ 3231 "10000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 301 40
+.src_ref 2 "reduce_base_c8.h" 302 76
+ 3232 "00101100" // LDA r3, [sp, #-4]; MOVX r4, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3233 "00110010" // /* MW 5 */
+ 3234 "00010000" // /* MW 4 */
+ 3235 "00100000" // /* MW 3 */
+ 3236 "10001110" // /* MW 2 */
+ 3237 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 3238 "10011000" // EQ r4, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3239 "01000111" // /* MW 3 */
+ 3240 "10001000" // /* MW 2 */
+ 3241 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3242 "10000100" // JNZ r4, #3408 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3408 delay_slots=5 */
+ 3243 "00000001" // /* MW 5 */
+ 3244 "01000000" // /* MW 4 */
+ 3245 "10101000" // /* MW 3 */
+ 3246 "00000110" // /* MW 2 */
+ 3247 "00100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 298 44
+.src_ref 2 "reduce_base_c8.h" 303 40
+.src_ref 2 "reduce_base_c8.h" 310 44
+.src_ref 2 "reduce_base_c8.h" 311 38
+.delay_slot
+ 3248 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3249 "01000001" // /* MW 3 */
+ 3250 "00000010" // /* MW 2 */
+ 3251 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3255 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3257 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3259 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3260 "00011000" // MOVX r3, #7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3261 "00011101" // /* MW 3 */
+ 3262 "00000110" // /* MW 2 */
+ 3263 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3264 "10011000" // NE r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3265 "00101000" // /* MW 3 */
+ 3266 "11000100" // /* MW 2 */
+ 3267 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3268 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */
+ 3269 "00000001" // /* MW 5 */
+ 3270 "01000000" // /* MW 4 */
+ 3271 "11110000" // /* MW 3 */
+ 3272 "00000110" // /* MW 2 */
+ 3273 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3275 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3277 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3283 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 310 44 first
+.src_ref 2 "reduce_base_c8.h" 312 41 first
+.src_ref 2 "reduce_base_c8.h" 315 40
+ 3284 "10111010" // ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3285 "01011000" // /* MW 9 */
+ 3286 "11101100" // /* MW 8 */
+ 3287 "00000111" // /* MW 7 */
+ 3288 "11111000" // /* MW 6 */
+ 3289 "00101111" // /* MW 5 */
+ 3290 "00100110" // /* MW 4 */
+ 3291 "11100000" // /* MW 3 */
+ 3292 "10000110" // /* MW 2 */
+ 3293 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 313 38
+.src_ref 2 "reduce_base_c8.h" 317 97
+ 3294 "10111010" // MOVA r3, #-6; MOVXM dj0, #65536 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3295 "00010000" // /* MW 9 */
+ 3296 "00000000" // /* MW 8 */
+ 3297 "01000000" // /* MW 7 */
+ 3298 "01000000" // /* MW 6 */
+ 3299 "00000000" // /* MW 5 */
+ 3300 "00000000" // /* MW 4 */
+ 3301 "00000000" // /* MW 3 */
+ 3302 "01000011" // /* MW 2 */
+ 3303 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 315 40
+.src_ref 2 "reduce_base_c8.h" 317 97 first
+ 3304 "01100100" // LSHL r3, r28, r3; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3305 "00000001" // /* MW 5 */
+ 3306 "00100000" // /* MW 4 */
+ 3307 "10111100" // /* MW 3 */
+ 3308 "11000111" // /* MW 2 */
+ 3309 "11100000" // /* MW 1 */
+ 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3311 "00000000" // /* MW 1 */
+ 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3313 "00000000" // /* MW 1 */
+ 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3315 "00000000" // /* MW 1 */
+ 3316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3317 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 311 38 first
+ 3318 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3319 "00110111" // /* MW 3 */
+ 3320 "00011100" // /* MW 2 */
+ 3321 "00000100" // /* MW 1 */
+ 3322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3323 "00000000" // /* MW 1 */
+ 3324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3325 "00000000" // /* MW 1 */
+ 3326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3327 "00000000" // /* MW 1 */
+ 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3329 "00000000" // /* MW 1 */
+ 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3331 "00000000" // /* MW 1 */
+ 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3333 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 39 first
+ 3334 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3335 "01010111" // /* MW 3 */
+ 3336 "00011100" // /* MW 2 */
+ 3337 "00000100" // /* MW 1 */
+ 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3339 "00000000" // /* MW 1 */
+ 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3341 "00000000" // /* MW 1 */
+ 3342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3343 "00000000" // /* MW 1 */
+ 3344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3345 "00000000" // /* MW 1 */
+ 3346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3347 "00000000" // /* MW 1 */
+ 3348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3349 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 313 38 first
+ 3350 "10011000" // ST dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3351 "01000001" // /* MW 3 */
+ 3352 "00011100" // /* MW 2 */
+ 3353 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 315 40 first
+ 3354 "00011000" // ST.s16 r24, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3355 "00010111" // /* MW 3 */
+ 3356 "00001011" // /* MW 2 */
+ 3357 "00000100" // /* MW 1 */
+ 3358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3359 "00000000" // /* MW 1 */
+ 3360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3361 "00000000" // /* MW 1 */
+ 3362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3363 "00000000" // /* MW 1 */
+ 3364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3365 "00000000" // /* MW 1 */
+ 3366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3367 "00000000" // /* MW 1 */
+ 3368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3369 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 316 38 first
+ 3370 "10011000" // ST r26, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3371 "01010001" // /* MW 3 */
+ 3372 "00000111" // /* MW 2 */
+ 3373 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 317 38 first
+ 3374 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3375 "01110001" // /* MW 3 */
+ 3376 "00010100" // /* MW 2 */
+ 3377 "00001100" // /* MW 1 */
+ 3378 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3379 "00000000" // /* MW 5 */
+ 3380 "00000000" // /* MW 4 */
+ 3381 "11101000" // /* MW 3 */
+ 3382 "00000110" // /* MW 2 */
+ 3383 "00000000" // /* MW 1 */
+.delay_slot
+ 3384 "11111000" // MOV r30, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3385 "10100000" // /* MW 3 */
+ 3386 "10011111" // /* MW 2 */
+ 3387 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3391 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3394 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 3395 "00011100" // /* MW 13 */
+ 3396 "00000000" // /* MW 12 */
+ 3397 "00000000" // /* MW 11 */
+ 3398 "01010111" // /* MW 10 */
+ 3399 "00011010" // /* MW 9 */
+ 3400 "01000000" // /* MW 8 */
+ 3401 "00000000" // /* MW 7 */
+ 3402 "00000000" // /* MW 6 */
+ 3403 "10110110" // /* MW 5 */
+ 3404 "00000010" // /* MW 4 */
+ 3405 "11110000" // /* MW 3 */
+ 3406 "00101100" // /* MW 2 */
+ 3407 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832
+.src_ref 2 "reduce_base_c8.h" 298 44 first
+.src_ref 2 "reduce_base_c8.h" 301 40
+.src_ref 2 "reduce_base_c8.h" 301 40 first
+ 3408 "10111010" // ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3409 "01011000" // /* MW 9 */
+ 3410 "00010000" // /* MW 8 */
+ 3411 "01001000" // /* MW 7 */
+ 3412 "01110000" // /* MW 6 */
+ 3413 "00101011" // /* MW 5 */
+ 3414 "00000110" // /* MW 4 */
+ 3415 "11100000" // /* MW 3 */
+ 3416 "10000110" // /* MW 2 */
+ 3417 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 76
+.src_ref 2 "reduce_base_c8.h" 303 40
+.src_ref 2 "reduce_base_c8.h" 306 62
+ 3418 "10111010" // MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3419 "01111000" // /* MW 9 */
+ 3420 "00001110" // /* MW 8 */
+ 3421 "11010000" // /* MW 7 */
+ 3422 "10101000" // /* MW 6 */
+ 3423 "01000111" // /* MW 5 */
+ 3424 "00111110" // /* MW 4 */
+ 3425 "10000000" // /* MW 3 */
+ 3426 "10000000" // /* MW 2 */
+ 3427 "11111101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 76 first
+ 3428 "10011000" // LSHL r4, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3429 "01001101" // /* MW 3 */
+ 3430 "11001000" // /* MW 2 */
+ 3431 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 41
+.src_ref 2 "reduce_base_c8.h" 306 62 first
+ 3432 "00100100" // MUL r30, r30, r6; ADD.NC r3, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3433 "11111111" // /* MW 5 */
+ 3434 "10100100" // /* MW 4 */
+ 3435 "11110001" // /* MW 3 */
+ 3436 "10001101" // /* MW 2 */
+ 3437 "11110111" // /* MW 1 */
+ 3438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3439 "00000000" // /* MW 1 */
+ 3440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3441 "00000000" // /* MW 1 */
+ 3442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3443 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 299 38 first
+ 3444 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3445 "01110111" // /* MW 3 */
+ 3446 "00011111" // /* MW 2 */
+ 3447 "00000100" // /* MW 1 */
+ 3448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3449 "00000000" // /* MW 1 */
+ 3450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3451 "00000000" // /* MW 1 */
+ 3452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3453 "00000000" // /* MW 1 */
+ 3454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3455 "00000000" // /* MW 1 */
+ 3456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3457 "00000000" // /* MW 1 */
+ 3458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3459 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 300 39 first
+ 3460 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3461 "10110111" // /* MW 3 */
+ 3462 "00011100" // /* MW 2 */
+ 3463 "00000100" // /* MW 1 */
+ 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3465 "00000000" // /* MW 1 */
+ 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3467 "00000000" // /* MW 1 */
+ 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3469 "00000000" // /* MW 1 */
+ 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3471 "00000000" // /* MW 1 */
+ 3472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3473 "00000000" // /* MW 1 */
+ 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3475 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 301 38 first
+ 3476 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3477 "01010111" // /* MW 3 */
+ 3478 "00011100" // /* MW 2 */
+ 3479 "00000100" // /* MW 1 */
+ 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3481 "00000000" // /* MW 1 */
+ 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3483 "00000000" // /* MW 1 */
+ 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3485 "00000000" // /* MW 1 */
+ 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3487 "00000000" // /* MW 1 */
+ 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3489 "00000000" // /* MW 1 */
+ 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3491 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 39 first
+ 3492 "00011000" // ST.s16 r3, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3493 "01110111" // /* MW 3 */
+ 3494 "00011100" // /* MW 2 */
+ 3495 "00000100" // /* MW 1 */
+ 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3497 "00000000" // /* MW 1 */
+ 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3499 "00000000" // /* MW 1 */
+ 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3501 "00000000" // /* MW 1 */
+ 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3503 "00000000" // /* MW 1 */
+ 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3505 "00000000" // /* MW 1 */
+ 3506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3507 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 303 40 first
+ 3508 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3509 "00110111" // /* MW 3 */
+ 3510 "00001000" // /* MW 2 */
+ 3511 "00000100" // /* MW 1 */
+ 3512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3513 "00000000" // /* MW 1 */
+ 3514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3515 "00000000" // /* MW 1 */
+ 3516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3517 "00000000" // /* MW 1 */
+ 3518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3519 "00000000" // /* MW 1 */
+ 3520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3521 "00000000" // /* MW 1 */
+ 3522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3523 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 304 38 first
+ 3524 "10011000" // ST r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3525 "00110001" // /* MW 3 */
+ 3526 "00000110" // /* MW 2 */
+ 3527 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 305 38 first
+ 3528 "00000010" // ST r20, [p4, #4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3529 "01110000" // /* MW 7 */
+ 3530 "10100101" // /* MW 6 */
+ 3531 "00000001" // /* MW 5 */
+ 3532 "00000000" // /* MW 4 */
+ 3533 "00110000" // /* MW 3 */
+ 3534 "11010010" // /* MW 2 */
+ 3535 "10000010" // /* MW 1 */
+.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ 3536 "10111000" // MOV dj0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3537 "01000000" // /* MW 3 */
+ 3538 "10000000" // /* MW 2 */
+ 3539 "00011000" // /* MW 1 */
+ 3540 "00110110" // ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3541 "10000001" // /* MW 11 */
+ 3542 "10101101" // /* MW 10 */
+ 3543 "00000000" // /* MW 9 */
+ 3544 "00000000" // /* MW 8 */
+ 3545 "00000000" // /* MW 7 */
+ 3546 "00000000" // /* MW 6 */
+ 3547 "00100000" // /* MW 5 */
+ 3548 "00000000" // /* MW 4 */
+ 3549 "11100000" // /* MW 3 */
+ 3550 "01111010" // /* MW 2 */
+ 3551 "01100000" // /* MW 1 */
+.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.src_ref 2 "reduce_base_c8.h" 326 79 first
+.src_ref 2 "reduce_base_c8.h" 329 51
+ 3552 "00010100" // MOVA m2, #24; ADD.NC p0, r0, #30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3553 "00011110" // /* MW 5 */
+ 3554 "11000000" // /* MW 4 */
+ 3555 "10000000" // /* MW 3 */
+ 3556 "00001000" // /* MW 2 */
+ 3557 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.src_ref 2 "reduce_base_c8.h" 330 26
+.src_ref 3 "reduce_mean_c8_impl.h" 139 51 first
+ 3558 "10111010" // LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3559 "01011000" // /* MW 9 */
+ 3560 "11100010" // /* MW 8 */
+ 3561 "00000111" // /* MW 7 */
+ 3562 "00001000" // /* MW 6 */
+ 3563 "00000010" // /* MW 5 */
+ 3564 "00000000" // /* MW 4 */
+ 3565 "11010000" // /* MW 3 */
+ 3566 "10001010" // /* MW 2 */
+ 3567 "01000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 331 24
+.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first
+ 3568 "01010100" // LDA.s16 r3, [p2]; MOV m1, #38 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3569 "10011001" // /* MW 5 */
+ 3570 "00000000" // /* MW 4 */
+ 3571 "01010010" // /* MW 3 */
+ 3572 "10001110" // /* MW 2 */
+ 3573 "01000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 141 49 first
+ 3574 "10011000" // LDA r1, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3575 "00110110" // /* MW 3 */
+ 3576 "00010100" // /* MW 2 */
+ 3577 "00000010" // /* MW 1 */
+ 3578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3579 "00000000" // /* MW 1 */
+ 3580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3581 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 326 28 first
+ 3582 "00011000" // ST.s16 r31, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3583 "11110111" // /* MW 3 */
+ 3584 "00101111" // /* MW 2 */
+ 3585 "00000000" // /* MW 1 */
+ 3586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3587 "00000000" // /* MW 1 */
+ 3588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3589 "00000000" // /* MW 1 */
+ 3590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3591 "00000000" // /* MW 1 */
+ 3592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3593 "00000000" // /* MW 1 */
+ 3594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3595 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3597 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 327 31 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3598 "00011000" // ST.s16 r24, [p0], #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3599 "00010111" // /* MW 3 */
+ 3600 "01011111" // /* MW 2 */
+ 3601 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3603 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3605 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3607 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3609 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 327 31
+.src_ref 2 "reduce_base_c8.h" 328 23
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3610 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3611 "00000001" // /* MW 3 */
+ 3612 "00110000" // /* MW 2 */
+ 3613 "00010000" // /* MW 1 */
+ 3614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3615 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 328 23 first
+ 3616 "00011000" // ST.s16 r24, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3617 "00010111" // /* MW 3 */
+ 3618 "11001111" // /* MW 2 */
+ 3619 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 51 first
+ 3620 "10011000" // LDA.u16 r4, [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3621 "10011010" // /* MW 3 */
+ 3622 "01001000" // /* MW 2 */
+ 3623 "00000000" // /* MW 1 */
+ 3624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3625 "00000000" // /* MW 1 */
+ 3626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3627 "00000000" // /* MW 1 */
+ 3628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3629 "00000000" // /* MW 1 */
+ 3630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3631 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3633 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 28
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3634 "00011000" // ST.s16 r0, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3635 "00010111" // /* MW 3 */
+ 3636 "11111100" // /* MW 2 */
+ 3637 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.src_ref 2 "reduce_base_c8.h" 330 28
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3638 "00100100" // LSHL r4, r4, r26; ADD.NC r5, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3639 "11111111" // /* MW 5 */
+ 3640 "10100100" // /* MW 4 */
+ 3641 "10110010" // /* MW 3 */
+ 3642 "00110101" // /* MW 2 */
+ 3643 "00100001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3644 "10011000" // SUB r0, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3645 "01000001" // /* MW 3 */
+ 3646 "00000000" // /* MW 2 */
+ 3647 "00010000" // /* MW 1 */
+ 3648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3649 "00000000" // /* MW 1 */
+ 3650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3651 "00000000" // /* MW 1 */
+ 3652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3653 "00000000" // /* MW 1 */
+ 3654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3655 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 330 26 first
+ 3656 "00011000" // ST.s16 r5, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3657 "10110111" // /* MW 3 */
+ 3658 "00001000" // /* MW 2 */
+ 3659 "00000000" // /* MW 1 */
+ 3660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3661 "00000000" // /* MW 1 */
+ 3662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3663 "00000000" // /* MW 1 */
+ 3664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3665 "00000000" // /* MW 1 */
+ 3666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3667 "00000000" // /* MW 1 */
+ 3668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3669 "00000000" // /* MW 1 */
+ 3670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3671 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 331 24 first
+ 3672 "00011000" // ST.s16 r19, [p0], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3673 "01110111" // /* MW 3 */
+ 3674 "00101010" // /* MW 2 */
+ 3675 "00000000" // /* MW 1 */
+ 3676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3677 "00000000" // /* MW 1 */
+ 3678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3679 "00000000" // /* MW 1 */
+ 3680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3681 "00000000" // /* MW 1 */
+ 3682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3683 "00000000" // /* MW 1 */
+ 3684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3685 "00000000" // /* MW 1 */
+ 3686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3687 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 139 40 first
+ 3688 "00011000" // ST.s8 r2, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3689 "01000111" // /* MW 3 */
+ 3690 "11101100" // /* MW 2 */
+ 3691 "00000000" // /* MW 1 */
+ 3692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3693 "00000000" // /* MW 1 */
+ 3694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3695 "00000000" // /* MW 1 */
+ 3696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3697 "00000000" // /* MW 1 */
+ 3698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3699 "00000000" // /* MW 1 */
+ 3700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3701 "00000000" // /* MW 1 */
+ 3702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3703 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first
+ 3704 "00011000" // ST.s16 r3, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3705 "01110111" // /* MW 3 */
+ 3706 "00000100" // /* MW 2 */
+ 3707 "00000000" // /* MW 1 */
+ 3708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3709 "00000000" // /* MW 1 */
+ 3710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3711 "00000000" // /* MW 1 */
+ 3712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3713 "00000000" // /* MW 1 */
+ 3714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3715 "00000000" // /* MW 1 */
+ 3716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3717 "00000000" // /* MW 1 */
+ 3718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3719 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 141 38 first
+ 3720 "00011000" // ST.s8 r1, [p0, #-2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3721 "00100111" // /* MW 3 */
+ 3722 "11100100" // /* MW 2 */
+ 3723 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 349 4 first
+ 3724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 3725 "00000000" // /* MW 3 */
+ 3726 "00101000" // /* MW 2 */
+ 3727 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 349 4
+.delay_slot
+ 3728 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3729 "00000001" // /* MW 5 */
+ 3730 "00000000" // /* MW 4 */
+ 3731 "00000000" // /* MW 3 */
+ 3732 "11111000" // /* MW 2 */
+ 3733 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3735 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3737 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3739 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3740 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3741 "01100111" // /* MW 3 */
+ 3742 "00000001" // /* MW 2 */
+ 3743 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168
+.src_ref 2 "reduce_base_c8.h" 262 44 first
+.src_ref 2 "reduce_base_c8.h" 263 77
+ 3744 "10111010" // ST.s16 r21, [p4], #2; MOVXM r5, #65512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3745 "00010000" // /* MW 9 */
+ 3746 "11110100" // /* MW 8 */
+ 3747 "10101111" // /* MW 7 */
+ 3748 "00111100" // /* MW 6 */
+ 3749 "00000000" // /* MW 5 */
+ 3750 "00000000" // /* MW 4 */
+ 3751 "11100000" // /* MW 3 */
+ 3752 "11010110" // /* MW 2 */
+ 3753 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 56
+.src_ref 2 "reduce_base_c8.h" 263 77 first
+.src_ref 2 "reduce_base_c8.h" 267 40
+ 3754 "10111010" // LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3755 "01011000" // /* MW 9 */
+ 3756 "11101100" // /* MW 8 */
+ 3757 "00000111" // /* MW 7 */
+ 3758 "00000100" // /* MW 6 */
+ 3759 "01111101" // /* MW 5 */
+ 3760 "00001010" // /* MW 4 */
+ 3761 "00100000" // /* MW 3 */
+ 3762 "10001010" // /* MW 2 */
+ 3763 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 118
+.src_ref 2 "reduce_base_c8.h" 329 30
+ 3764 "10111010" // MOVA r26, #4; MOVXM r6, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3765 "10010000" // /* MW 9 */
+ 3766 "11111111" // /* MW 8 */
+ 3767 "11001111" // /* MW 7 */
+ 3768 "00111100" // /* MW 6 */
+ 3769 "00000000" // /* MW 5 */
+ 3770 "00000000" // /* MW 4 */
+ 3771 "00000000" // /* MW 3 */
+ 3772 "10011010" // /* MW 2 */
+ 3773 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 118 first
+ 3774 "10011000" // ADD r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3775 "01100000" // /* MW 3 */
+ 3776 "11100010" // /* MW 2 */
+ 3777 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 98
+.src_ref 2 "reduce_base_c8.h" 267 116 first
+ 3778 "00011000" // MAC r29, r29, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3779 "01000110" // /* MW 3 */
+ 3780 "01111010" // /* MW 2 */
+ 3781 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 60 first
+.src_ref 2 "reduce_base_c8.h" 265 98 first
+ 3782 "00011000" // MSC r21, r21, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3783 "01001110" // /* MW 3 */
+ 3784 "01101010" // /* MW 2 */
+ 3785 "00010100" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3787 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 38 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3788 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3789 "01010111" // /* MW 3 */
+ 3790 "00011100" // /* MW 2 */
+ 3791 "00000100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 56
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3792 "10011000" // MUL r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3793 "00101111" // /* MW 3 */
+ 3794 "11000100" // /* MW 2 */
+ 3795 "00010001" // /* MW 1 */
+ 3796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3797 "00000000" // /* MW 1 */
+ 3798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3799 "00000000" // /* MW 1 */
+ 3800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3801 "00000000" // /* MW 1 */
+ 3802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3803 "00000000" // /* MW 1 */
+ 3804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3805 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 264 39 first
+ 3806 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3807 "11010111" // /* MW 3 */
+ 3808 "00011110" // /* MW 2 */
+ 3809 "00000100" // /* MW 1 */
+ 3810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3811 "00000000" // /* MW 1 */
+ 3812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3813 "00000000" // /* MW 1 */
+ 3814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3815 "00000000" // /* MW 1 */
+ 3816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3817 "00000000" // /* MW 1 */
+ 3818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3819 "00000000" // /* MW 1 */
+ 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3821 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 38 first
+ 3822 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3823 "10110111" // /* MW 3 */
+ 3824 "00011110" // /* MW 2 */
+ 3825 "00000100" // /* MW 1 */
+ 3826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3827 "00000000" // /* MW 1 */
+ 3828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3829 "00000000" // /* MW 1 */
+ 3830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3831 "00000000" // /* MW 1 */
+ 3832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3833 "00000000" // /* MW 1 */
+ 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3835 "00000000" // /* MW 1 */
+ 3836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3837 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 266 39 first
+ 3838 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3839 "00110111" // /* MW 3 */
+ 3840 "00011100" // /* MW 2 */
+ 3841 "00000100" // /* MW 1 */
+ 3842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3843 "00000000" // /* MW 1 */
+ 3844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3845 "00000000" // /* MW 1 */
+ 3846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3847 "00000000" // /* MW 1 */
+ 3848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3849 "00000000" // /* MW 1 */
+ 3850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3851 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3853 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 40 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3854 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3855 "01010111" // /* MW 3 */
+ 3856 "00001000" // /* MW 2 */
+ 3857 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3859 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3861 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3862 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3863 "00000000" // /* MW 5 */
+ 3864 "00000000" // /* MW 4 */
+ 3865 "11101000" // /* MW 3 */
+ 3866 "00000110" // /* MW 2 */
+ 3867 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 42
+.delay_slot
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3868 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3869 "01000001" // /* MW 3 */
+ 3870 "00001010" // /* MW 2 */
+ 3871 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 42
+.delay_slot
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3872 "10011000" // SUB r2, r5, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3873 "11010001" // /* MW 3 */
+ 3874 "01000101" // /* MW 2 */
+ 3875 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 270 64
+.delay_slot
+ 3876 "11111000" // MOV r6, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3877 "00011100" // /* MW 3 */
+ 3878 "10100001" // /* MW 2 */
+ 3879 "00011001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 268 38 first
+.delay_slot
+ 3880 "00000010" // ST r3, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3881 "01110000" // /* MW 7 */
+ 3882 "10100101" // /* MW 6 */
+ 3883 "00000001" // /* MW 5 */
+ 3884 "00000000" // /* MW 4 */
+ 3885 "00110000" // /* MW 3 */
+ 3886 "10001110" // /* MW 2 */
+ 3887 "10000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 269 38 first
+.src_ref 2 "reduce_base_c8.h" 270 64 first
+.delay_slot
+ 3888 "11100001" // NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 3889 "00000000" // /* MW 15 */
+ 3890 "00000000" // /* MW 14 */
+ 3891 "01111000" // /* MW 13 */
+ 3892 "10100101" // /* MW 12 */
+ 3893 "00000001" // /* MW 11 */
+ 3894 "01111100" // /* MW 10 */
+ 3895 "11100011" // /* MW 9 */
+ 3896 "10111101" // /* MW 8 */
+ 3897 "00010001" // /* MW 7 */
+ 3898 "00010110" // /* MW 6 */
+ 3899 "00100100" // /* MW 5 */
+ 3900 "00000000" // /* MW 4 */
+ 3901 "11110000" // /* MW 3 */
+ 3902 "00101100" // /* MW 2 */
+ 3903 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328
+.src_ref 2 "reduce_base_c8.h" 250 44
+.src_ref 2 "reduce_base_c8.h" 250 44 first
+.src_ref 2 "reduce_base_c8.h" 255 40
+ 3904 "10111010" // ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3905 "01011000" // /* MW 9 */
+ 3906 "11101100" // /* MW 8 */
+ 3907 "00000111" // /* MW 7 */
+ 3908 "00001000" // /* MW 6 */
+ 3909 "01000010" // /* MW 5 */
+ 3910 "00000000" // /* MW 4 */
+ 3911 "11100000" // /* MW 3 */
+ 3912 "10010010" // /* MW 2 */
+ 3913 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 255 113 first
+ 3914 "10111010" // LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3915 "01011000" // /* MW 9 */
+ 3916 "00001000" // /* MW 8 */
+ 3917 "01001000" // /* MW 7 */
+ 3918 "01110000" // /* MW 6 */
+ 3919 "00101101" // /* MW 5 */
+ 3920 "00000110" // /* MW 4 */
+ 3921 "00100000" // /* MW 3 */
+ 3922 "10000110" // /* MW 2 */
+ 3923 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 329 30
+ 3924 "01100100" // MOVX r3, #16; MOV r26, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3925 "00010001" // /* MW 5 */
+ 3926 "00100000" // /* MW 4 */
+ 3927 "00101101" // /* MW 3 */
+ 3928 "11001000" // /* MW 2 */
+ 3929 "00000000" // /* MW 1 */
+ 3930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3931 "00000000" // /* MW 1 */
+ 3932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3933 "00000000" // /* MW 1 */
+ 3934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3935 "00000000" // /* MW 1 */
+ 3936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3937 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 251 38 first
+ 3938 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3939 "01110111" // /* MW 3 */
+ 3940 "00011111" // /* MW 2 */
+ 3941 "00000100" // /* MW 1 */
+ 3942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3943 "00000000" // /* MW 1 */
+ 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3945 "00000000" // /* MW 1 */
+ 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3947 "00000000" // /* MW 1 */
+ 3948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3949 "00000000" // /* MW 1 */
+ 3950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3951 "00000000" // /* MW 1 */
+ 3952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3953 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 252 39 first
+ 3954 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3955 "10110111" // /* MW 3 */
+ 3956 "00011100" // /* MW 2 */
+ 3957 "00000100" // /* MW 1 */
+ 3958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3959 "00000000" // /* MW 1 */
+ 3960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3961 "00000000" // /* MW 1 */
+ 3962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3963 "00000000" // /* MW 1 */
+ 3964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3965 "00000000" // /* MW 1 */
+ 3966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3967 "00000000" // /* MW 1 */
+ 3968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3969 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 253 38 first
+ 3970 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3971 "01110111" // /* MW 3 */
+ 3972 "00011111" // /* MW 2 */
+ 3973 "00000100" // /* MW 1 */
+ 3974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3975 "00000000" // /* MW 1 */
+ 3976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3977 "00000000" // /* MW 1 */
+ 3978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3979 "00000000" // /* MW 1 */
+ 3980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3981 "00000000" // /* MW 1 */
+ 3982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3983 "00000000" // /* MW 1 */
+ 3984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3985 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 254 39 first
+ 3986 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3987 "11010111" // /* MW 3 */
+ 3988 "00011110" // /* MW 2 */
+ 3989 "00000100" // /* MW 1 */
+ 3990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3991 "00000000" // /* MW 1 */
+ 3992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3993 "00000000" // /* MW 1 */
+ 3994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3995 "00000000" // /* MW 1 */
+ 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3997 "00000000" // /* MW 1 */
+ 3998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3999 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 4000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4001 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 40 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 4002 "00011000" // ST.s16 r3, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4003 "01110111" // /* MW 3 */
+ 4004 "00001000" // /* MW 2 */
+ 4005 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4007 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4009 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4010 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 4011 "00000000" // /* MW 5 */
+ 4012 "00000000" // /* MW 4 */
+ 4013 "11101000" // /* MW 3 */
+ 4014 "00000110" // /* MW 2 */
+ 4015 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.delay_slot
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4016 "00011000" // MAC r3, r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4017 "00100110" // /* MW 3 */
+ 4018 "01000110" // /* MW 2 */
+ 4019 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4023 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 256 38 first
+.delay_slot
+ 4024 "10011000" // ST r6, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4025 "11010001" // /* MW 3 */
+ 4026 "00000100" // /* MW 2 */
+ 4027 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 257 38 first
+.delay_slot
+ 4028 "10011000" // ST r18, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4029 "01010001" // /* MW 3 */
+ 4030 "00010110" // /* MW 2 */
+ 4031 "00001100" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456
+.src_ref 2 "reduce_base_c8.h" 238 44 first
+ 4032 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4033 "10110111" // /* MW 3 */
+ 4034 "00011110" // /* MW 2 */
+ 4035 "00000100" // /* MW 1 */
+ 4036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4037 "00000000" // /* MW 1 */
+ 4038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4039 "00000000" // /* MW 1 */
+ 4040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4041 "00000000" // /* MW 1 */
+ 4042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4043 "00000000" // /* MW 1 */
+ 4044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4045 "00000000" // /* MW 1 */
+ 4046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4047 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 239 38 first
+ 4048 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4049 "11110111" // /* MW 3 */
+ 4050 "00011100" // /* MW 2 */
+ 4051 "00000100" // /* MW 1 */
+ 4052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4053 "00000000" // /* MW 1 */
+ 4054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4055 "00000000" // /* MW 1 */
+ 4056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4057 "00000000" // /* MW 1 */
+ 4058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4059 "00000000" // /* MW 1 */
+ 4060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4061 "00000000" // /* MW 1 */
+ 4062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4063 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 240 39 first
+ 4064 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4065 "11110111" // /* MW 3 */
+ 4066 "00011110" // /* MW 2 */
+ 4067 "00000100" // /* MW 1 */
+ 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4069 "00000000" // /* MW 1 */
+ 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4071 "00000000" // /* MW 1 */
+ 4072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4073 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 94
+ 4074 "00011000" // LDA r3, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4075 "01110001" // /* MW 3 */
+ 4076 "11111100" // /* MW 2 */
+ 4077 "00000111" // /* MW 1 */
+ 4078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4079 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 4080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4081 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 38 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 4082 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4083 "00110111" // /* MW 3 */
+ 4084 "00011100" // /* MW 2 */
+ 4085 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4087 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4088 "01000100" // MOVXM r1, #65504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4089 "11000000" // /* MW 5 */
+ 4090 "10111111" // /* MW 4 */
+ 4091 "11110000" // /* MW 3 */
+ 4092 "00000000" // /* MW 2 */
+ 4093 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4094 "10011000" // ADD r2, r1, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4095 "10100000" // /* MW 3 */
+ 4096 "01000101" // /* MW 2 */
+ 4097 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 94
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4098 "01100100" // MAC r1, r1, r3, r2; MOV r1, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4099 "01000001" // /* MW 5 */
+ 4100 "10100000" // /* MW 4 */
+ 4101 "11000000" // /* MW 3 */
+ 4102 "01000100" // /* MW 2 */
+ 4103 "00011000" // /* MW 1 */
+ 4104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4105 "00000000" // /* MW 1 */
+ 4106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4107 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 242 39 first
+ 4108 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4109 "11010111" // /* MW 3 */
+ 4110 "00011110" // /* MW 2 */
+ 4111 "00000100" // /* MW 1 */
+ 4112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4113 "00000000" // /* MW 1 */
+ 4114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4115 "00000000" // /* MW 1 */
+ 4116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4117 "00000000" // /* MW 1 */
+ 4118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4119 "00000000" // /* MW 1 */
+ 4120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4121 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 40
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 4122 "10111000" // MOV m0, #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4123 "11011000" // /* MW 3 */
+ 4124 "00001111" // /* MW 2 */
+ 4125 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 40 first
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 4126 "00011000" // ST.s16 r5, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4127 "10110111" // /* MW 3 */
+ 4128 "00001000" // /* MW 2 */
+ 4129 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4131 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4133 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4134 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 4135 "00000000" // /* MW 5 */
+ 4136 "00000000" // /* MW 4 */
+ 4137 "11101000" // /* MW 3 */
+ 4138 "00000110" // /* MW 2 */
+ 4139 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 42
+.src_ref 2 "reduce_base_c8.h" 243 42
+.src_ref 2 "reduce_base_c8.h" 243 91
+.src_ref 2 "reduce_base_c8.h" 243 91
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4140 "01100100" // MSC r5, r5, r22, r4; MOV r5, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4141 "01000001" // /* MW 5 */
+ 4142 "10100000" // /* MW 4 */
+ 4143 "11000010" // /* MW 3 */
+ 4144 "01001001" // /* MW 2 */
+ 4145 "10110001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4147 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4149 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 244 38 first
+.delay_slot
+ 4150 "10011000" // ST r20, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4151 "10010001" // /* MW 3 */
+ 4152 "00000110" // /* MW 2 */
+ 4153 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 245 38 first
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 4154 "00111010" // ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4155 "01111001" // /* MW 9 */
+ 4156 "10001110" // /* MW 8 */
+ 4157 "11010000" // /* MW 7 */
+ 4158 "10001011" // /* MW 6 */
+ 4159 "10100000" // /* MW 5 */
+ 4160 "00000001" // /* MW 4 */
+ 4161 "00110000" // /* MW 3 */
+ 4162 "11000110" // /* MW 2 */
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0
+ 4163 "10000010" // /* MW 1 */
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+.function pad_3d<(pad_3d_mode)0, bfloat16, 1> _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+.src_ref 3 "pad_3d.h" 266 first
+.src_ref 3 "pad_3d.h" 465 37 first
+.src_ref 3 "pad_3d.h" 468 21 first
+.src_ref 3 "pad_3d.h" 471 29
+.src_ref 3 "pad_3d.h" 479 21
+.function_start
+ 4176 "10111010" // LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4177 "01011000" // /* MW 9 */
+ 4178 "11101000" // /* MW 8 */
+ 4179 "10000111" // /* MW 7 */
+ 4180 "11001000" // /* MW 6 */
+ 4181 "01000111" // /* MW 5 */
+ 4182 "00111110" // /* MW 4 */
+ 4183 "11010000" // /* MW 3 */
+ 4184 "10000010" // /* MW 2 */
+ 4185 "01000010" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 469 21 first
+.src_ref 3 "pad_3d.h" 478 21
+.src_ref 3 "pad_3d.h" 499 52
+.src_ref 3 "pad_3d.h" 511 25
+ 4186 "10111010" // LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4187 "01011000" // /* MW 9 */
+ 4188 "00000110" // /* MW 8 */
+ 4189 "00001000" // /* MW 7 */
+ 4190 "10101010" // /* MW 6 */
+ 4191 "00100111" // /* MW 5 */
+ 4192 "00111110" // /* MW 4 */
+ 4193 "11010000" // /* MW 3 */
+ 4194 "10000110" // /* MW 2 */
+ 4195 "01000101" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 470 21 first
+.src_ref 3 "pad_3d.h" 486 26
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 26
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 517 22
+ 4196 "10111010" // LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4197 "01111000" // /* MW 9 */
+ 4198 "01100000" // /* MW 8 */
+ 4199 "01101000" // /* MW 7 */
+ 4200 "00001000" // /* MW 6 */
+ 4201 "10000000" // /* MW 5 */
+ 4202 "00000001" // /* MW 4 */
+ 4203 "11010000" // /* MW 3 */
+ 4204 "10010110" // /* MW 2 */
+ 4205 "01001111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 471 29 first
+ 4206 "10011000" // LDA.s16 r18, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4207 "01010010" // /* MW 3 */
+ 4208 "00101010" // /* MW 2 */
+ 4209 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 472 25 first
+ 4210 "10011000" // LDA r6, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4211 "11010110" // /* MW 3 */
+ 4212 "00011100" // /* MW 2 */
+ 4213 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 473 26 first
+ 4214 "10011000" // LDA r7, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4215 "11110110" // /* MW 3 */
+ 4216 "00101100" // /* MW 2 */
+ 4217 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 475 24 first
+ 4218 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4219 "00110110" // /* MW 3 */
+ 4220 "00000110" // /* MW 2 */
+ 4221 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 479 21 first
+ 4222 "10011000" // ASHL r19, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4223 "01001110" // /* MW 3 */
+ 4224 "00100110" // /* MW 2 */
+ 4225 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 477 23 first
+ 4226 "10011000" // LDA r4, [p2, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4227 "10010110" // /* MW 3 */
+ 4228 "00100100" // /* MW 2 */
+ 4229 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 478 21 first
+ 4230 "10011000" // ASHL r20, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4231 "00101110" // /* MW 3 */
+ 4232 "01101000" // /* MW 2 */
+ 4233 "00010001" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 56 25 first
+ 4234 "11111000" // VBCST.16 x0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4235 "01110010" // /* MW 3 */
+ 4236 "01001001" // /* MW 2 */
+ 4237 "00011000" // /* MW 1 */
+ 4238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4239 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 485 45 first
+ 4240 "10011000" // MUL r18, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4241 "01001111" // /* MW 3 */
+ 4242 "11100101" // /* MW 2 */
+ 4243 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 485 34
+ 4244 "10011000" // SUB r19, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4245 "00010001" // /* MW 3 */
+ 4246 "01100111" // /* MW 2 */
+ 4247 "00010000" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 998 25 first
+ 4248 "10011000" // MUL r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4249 "00101111" // /* MW 3 */
+ 4250 "11100111" // /* MW 2 */
+ 4251 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 43 first
+ 4252 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4253 "00101111" // /* MW 3 */
+ 4254 "01100011" // /* MW 2 */
+ 4255 "00010100" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13 first
+ 4256 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4257 "00001101" // /* MW 3 */
+ 4258 "11100001" // /* MW 2 */
+ 4259 "00010100" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 486 26 first
+ 4260 "10100100" // GE r16, r24, r17; ADD.NC p2, r3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4261 "10000010" // /* MW 5 */
+ 4262 "11000011" // /* MW 4 */
+ 4263 "00110100" // /* MW 3 */
+ 4264 "00100011" // /* MW 2 */
+ 4265 "11000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4266 "10000100" // JNZ r16, #4416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4416 delay_slots=5 */
+ 4267 "00000001" // /* MW 5 */
+ 4268 "01000000" // /* MW 4 */
+ 4269 "10100000" // /* MW 3 */
+ 4270 "00001000" // /* MW 2 */
+ 4271 "10000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 487 22
+.delay_slot
+ 4272 "11111000" // VMOV bmll0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4273 "10010010" // /* MW 3 */
+ 4274 "00000000" // /* MW 2 */
+ 4275 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4277 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4283 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4284 "01000100" // MOVXM ls, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4285 "01100000" // /* MW 5 */
+ 4286 "11100010" // /* MW 4 */
+ 4287 "00010001" // /* MW 3 */
+ 4288 "00000000" // /* MW 2 */
+ 4289 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4290 "01000100" // MOVXM le, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4291 "01100000" // /* MW 5 */
+ 4292 "11100010" // /* MW 4 */
+ 4293 "00010110" // /* MW 3 */
+ 4294 "00000000" // /* MW 2 */
+ 4295 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4296 "00000010" // NOPS; MOV lc, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4297 "01110000" // /* MW 7 */
+ 4298 "01010000" // /* MW 6 */
+ 4299 "10111100" // /* MW 5 */
+ 4300 "00000010" // /* MW 4 */
+ 4301 "01100000" // /* MW 3 */
+ 4302 "00101011" // /* MW 2 */
+ 4303 "00000000" // /* MW 1 */
+ 4304 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4305 "00000000" // /* MW 15 */
+ 4306 "00000000" // /* MW 14 */
+ 4307 "01111000" // /* MW 13 */
+ 4308 "10100101" // /* MW 12 */
+ 4309 "00000001" // /* MW 11 */
+ 4310 "00000000" // /* MW 10 */
+ 4311 "00000000" // /* MW 9 */
+ 4312 "00000000" // /* MW 8 */
+ 4313 "01011011" // /* MW 7 */
+ 4314 "00000001" // /* MW 6 */
+ 4315 "00100000" // /* MW 5 */
+ 4316 "00000000" // /* MW 4 */
+ 4317 "11110000" // /* MW 3 */
+ 4318 "00101100" // /* MW 2 */
+ 4319 "00000000" // /* MW 1 */
+ 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4321 "00000000" // /* MW 15 */
+ 4322 "00000000" // /* MW 14 */
+ 4323 "01111000" // /* MW 13 */
+ 4324 "10100101" // /* MW 12 */
+ 4325 "00000001" // /* MW 11 */
+ 4326 "00000000" // /* MW 10 */
+ 4327 "00000000" // /* MW 9 */
+ 4328 "00000000" // /* MW 8 */
+ 4329 "01011011" // /* MW 7 */
+ 4330 "00000001" // /* MW 6 */
+ 4331 "00100000" // /* MW 5 */
+ 4332 "00000000" // /* MW 4 */
+ 4333 "11110000" // /* MW 3 */
+ 4334 "00101100" // /* MW 2 */
+ 4335 "00000000" // /* MW 1 */
+ 4336 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4337 "00000000" // /* MW 15 */
+ 4338 "00000000" // /* MW 14 */
+ 4339 "01111000" // /* MW 13 */
+ 4340 "10100101" // /* MW 12 */
+ 4341 "00000001" // /* MW 11 */
+ 4342 "00000000" // /* MW 10 */
+ 4343 "00000000" // /* MW 9 */
+ 4344 "00000000" // /* MW 8 */
+ 4345 "01011011" // /* MW 7 */
+ 4346 "00000001" // /* MW 6 */
+ 4347 "00100000" // /* MW 5 */
+ 4348 "00000000" // /* MW 4 */
+ 4349 "11110000" // /* MW 3 */
+ 4350 "00101100" // /* MW 2 */
+ 4351 "00000000" // /* MW 1 */
+ 4352 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4353 "00000000" // /* MW 15 */
+ 4354 "00000000" // /* MW 14 */
+ 4355 "01111000" // /* MW 13 */
+ 4356 "10100101" // /* MW 12 */
+ 4357 "00000001" // /* MW 11 */
+ 4358 "00000000" // /* MW 10 */
+ 4359 "00000000" // /* MW 9 */
+ 4360 "00000000" // /* MW 8 */
+ 4361 "01011011" // /* MW 7 */
+ 4362 "00000001" // /* MW 6 */
+ 4363 "00100000" // /* MW 5 */
+ 4364 "00000000" // /* MW 4 */
+ 4365 "11110000" // /* MW 3 */
+ 4366 "00101100" // /* MW 2 */
+ 4367 "00000000" // /* MW 1 */
+ 4368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4369 "00000000" // /* MW 15 */
+ 4370 "00000000" // /* MW 14 */
+ 4371 "01111000" // /* MW 13 */
+ 4372 "10100101" // /* MW 12 */
+ 4373 "00000001" // /* MW 11 */
+ 4374 "00000000" // /* MW 10 */
+ 4375 "00000000" // /* MW 9 */
+ 4376 "00000000" // /* MW 8 */
+ 4377 "01011011" // /* MW 7 */
+ 4378 "00000001" // /* MW 6 */
+ 4379 "00100000" // /* MW 5 */
+ 4380 "00000000" // /* MW 4 */
+ 4381 "11110000" // /* MW 3 */
+ 4382 "00101100" // /* MW 2 */
+ 4383 "00000000" // /* MW 1 */
+ 4384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4385 "00000000" // /* MW 15 */
+ 4386 "00000000" // /* MW 14 */
+ 4387 "01111000" // /* MW 13 */
+ 4388 "10100101" // /* MW 12 */
+ 4389 "00000001" // /* MW 11 */
+ 4390 "00000000" // /* MW 10 */
+ 4391 "00000000" // /* MW 9 */
+ 4392 "00000000" // /* MW 8 */
+ 4393 "01011011" // /* MW 7 */
+ 4394 "00000001" // /* MW 6 */
+ 4395 "00100000" // /* MW 5 */
+ 4396 "00000000" // /* MW 4 */
+ 4397 "11110000" // /* MW 3 */
+ 4398 "00101100" // /* MW 2 */
+ 4399 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224
+.src_ref 3 "pad_3d.h" 487 22 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4400 "11100001" // NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4401 "00000000" // /* MW 15 */
+ 4402 "00000000" // /* MW 14 */
+ 4403 "01111000" // /* MW 13 */
+ 4404 "10100101" // /* MW 12 */
+ 4405 "00000001" // /* MW 11 */
+ 4406 "00000000" // /* MW 10 */
+ 4407 "00000000" // /* MW 9 */
+ 4408 "10000000" // /* MW 8 */
+ 4409 "00000110" // /* MW 7 */
+ 4410 "00011100" // /* MW 6 */
+ 4411 "00100010" // /* MW 5 */
+ 4412 "00000000" // /* MW 4 */
+ 4413 "11110000" // /* MW 3 */
+ 4414 "00101100" // /* MW 2 */
+ 4415 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240
+.src_ref 3 "pad_3d.h" 495 21
+.src_ref 3 "pad_3d.h" 495 40 first
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 38 first
+.loop_nesting 0
+ 4416 "10111010" // MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4417 "10101000" // /* MW 9 */
+ 4418 "11001100" // /* MW 8 */
+ 4419 "00101001" // /* MW 7 */
+ 4420 "11111110" // /* MW 6 */
+ 4421 "00000000" // /* MW 5 */
+ 4422 "00001011" // /* MW 4 */
+ 4423 "00000000" // /* MW 3 */
+ 4424 "10000110" // /* MW 2 */
+ 4425 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 40
+.src_ref 3 "pad_3d.h" 496 29 first
+ 4426 "00100100" // SUB r17, r0, r17; ADD.NC dn1, r7, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4427 "11111111" // /* MW 5 */
+ 4428 "10000111" // /* MW 4 */
+ 4429 "00110010" // /* MW 3 */
+ 4430 "01100010" // /* MW 2 */
+ 4431 "00000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 21 first
+ 4432 "10011000" // LSHL r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4433 "01101101" // /* MW 3 */
+ 4434 "01100010" // /* MW 2 */
+ 4435 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 58
+.src_ref 3 "pad_3d.h" 498 23 first
+ 4436 "00100100" // SUB r17, r0, r7; ADD.NC m1, r17, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4437 "00010000" // /* MW 5 */
+ 4438 "00010001" // /* MW 4 */
+ 4439 "00110010" // /* MW 3 */
+ 4440 "01001110" // /* MW 2 */
+ 4441 "00000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 45 first
+ 4442 "10011000" // MUL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4443 "00001111" // /* MW 3 */
+ 4444 "11100001" // /* MW 2 */
+ 4445 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 498 10 first
+ 4446 "10011000" // LSHL r6, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4447 "01101101" // /* MW 3 */
+ 4448 "01001100" // /* MW 2 */
+ 4449 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 52 first
+ 4450 "10100100" // ASHL r6, r16, r2; ADD.NC p2, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4451 "00110010" // /* MW 5 */
+ 4452 "11000011" // /* MW 4 */
+ 4453 "11010100" // /* MW 3 */
+ 4454 "10000101" // /* MW 2 */
+ 4455 "10000001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 26
+ 4456 "10011000" // GE r7, r24, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4457 "01101001" // /* MW 3 */
+ 4458 "00001110" // /* MW 2 */
+ 4459 "00010110" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 4
+ 4460 "10000100" // JNZ r7, #4624 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4624 delay_slots=5 */
+ 4461 "00000001" // /* MW 5 */
+ 4462 "01000000" // /* MW 4 */
+ 4463 "00001000" // /* MW 3 */
+ 4464 "00001001" // /* MW 2 */
+ 4465 "00111000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4475 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 499 4
+ 4476 "10111010" // MOVA dc1, #0; MOVXM ls, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4477 "00010000" // /* MW 9 */
+ 4478 "00000000" // /* MW 8 */
+ 4479 "01111001" // /* MW 7 */
+ 4480 "00000100" // /* MW 6 */
+ 4481 "00000000" // /* MW 5 */
+ 4482 "00000000" // /* MW 4 */
+ 4483 "10000000" // /* MW 3 */
+ 4484 "00000111" // /* MW 2 */
+ 4485 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 499 4
+ 4486 "10111010" // MOVA dj1, #16; MOVXM le, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4487 "00010000" // /* MW 9 */
+ 4488 "00000000" // /* MW 8 */
+ 4489 "10111001" // /* MW 7 */
+ 4490 "00000101" // /* MW 6 */
+ 4491 "00000000" // /* MW 5 */
+ 4492 "00000000" // /* MW 4 */
+ 4493 "10000000" // /* MW 3 */
+ 4494 "00000110" // /* MW 2 */
+ 4495 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 4
+ 4496 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4497 "00000000" // /* MW 15 */
+ 4498 "00000000" // /* MW 14 */
+ 4499 "01111000" // /* MW 13 */
+ 4500 "10010000" // /* MW 12 */
+ 4501 "10111001" // /* MW 11 */
+ 4502 "00000010" // /* MW 10 */
+ 4503 "00000000" // /* MW 9 */
+ 4504 "00000000" // /* MW 8 */
+ 4505 "01011011" // /* MW 7 */
+ 4506 "00000001" // /* MW 6 */
+ 4507 "00100000" // /* MW 5 */
+ 4508 "00000000" // /* MW 4 */
+ 4509 "11110000" // /* MW 3 */
+ 4510 "00101100" // /* MW 2 */
+ 4511 "00000000" // /* MW 1 */
+ 4512 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4513 "00000000" // /* MW 15 */
+ 4514 "00000000" // /* MW 14 */
+ 4515 "01111000" // /* MW 13 */
+ 4516 "10100101" // /* MW 12 */
+ 4517 "00000001" // /* MW 11 */
+ 4518 "00000000" // /* MW 10 */
+ 4519 "00000000" // /* MW 9 */
+ 4520 "00000000" // /* MW 8 */
+ 4521 "01011011" // /* MW 7 */
+ 4522 "00000001" // /* MW 6 */
+ 4523 "00100000" // /* MW 5 */
+ 4524 "00000000" // /* MW 4 */
+ 4525 "11110000" // /* MW 3 */
+ 4526 "00101100" // /* MW 2 */
+ 4527 "00000000" // /* MW 1 */
+ 4528 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4529 "00000000" // /* MW 15 */
+ 4530 "00000000" // /* MW 14 */
+ 4531 "01111000" // /* MW 13 */
+ 4532 "10100101" // /* MW 12 */
+ 4533 "00000001" // /* MW 11 */
+ 4534 "00000000" // /* MW 10 */
+ 4535 "00000000" // /* MW 9 */
+ 4536 "00000000" // /* MW 8 */
+ 4537 "01011011" // /* MW 7 */
+ 4538 "00000001" // /* MW 6 */
+ 4539 "00100000" // /* MW 5 */
+ 4540 "00000000" // /* MW 4 */
+ 4541 "11110000" // /* MW 3 */
+ 4542 "00101100" // /* MW 2 */
+ 4543 "00000000" // /* MW 1 */
+ 4544 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4545 "00000000" // /* MW 15 */
+ 4546 "00000000" // /* MW 14 */
+ 4547 "01111000" // /* MW 13 */
+ 4548 "10100101" // /* MW 12 */
+ 4549 "00000001" // /* MW 11 */
+ 4550 "00000000" // /* MW 10 */
+ 4551 "00000000" // /* MW 9 */
+ 4552 "00000000" // /* MW 8 */
+ 4553 "01011011" // /* MW 7 */
+ 4554 "00000001" // /* MW 6 */
+ 4555 "00100000" // /* MW 5 */
+ 4556 "00000000" // /* MW 4 */
+ 4557 "11110000" // /* MW 3 */
+ 4558 "00101100" // /* MW 2 */
+ 4559 "00000000" // /* MW 1 */
+ 4560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4561 "00000000" // /* MW 15 */
+ 4562 "00000000" // /* MW 14 */
+ 4563 "01111000" // /* MW 13 */
+ 4564 "10100101" // /* MW 12 */
+ 4565 "00000001" // /* MW 11 */
+ 4566 "00000000" // /* MW 10 */
+ 4567 "00000000" // /* MW 9 */
+ 4568 "00000000" // /* MW 8 */
+ 4569 "01011011" // /* MW 7 */
+ 4570 "00000001" // /* MW 6 */
+ 4571 "00100000" // /* MW 5 */
+ 4572 "00000000" // /* MW 4 */
+ 4573 "11110000" // /* MW 3 */
+ 4574 "00101100" // /* MW 2 */
+ 4575 "00000000" // /* MW 1 */
+ 4576 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4577 "00000000" // /* MW 15 */
+ 4578 "00000000" // /* MW 14 */
+ 4579 "01111000" // /* MW 13 */
+ 4580 "10100101" // /* MW 12 */
+ 4581 "00000001" // /* MW 11 */
+ 4582 "00000000" // /* MW 10 */
+ 4583 "00000000" // /* MW 9 */
+ 4584 "00000000" // /* MW 8 */
+ 4585 "01011011" // /* MW 7 */
+ 4586 "00000001" // /* MW 6 */
+ 4587 "00100000" // /* MW 5 */
+ 4588 "00000000" // /* MW 4 */
+ 4589 "11110000" // /* MW 3 */
+ 4590 "00101100" // /* MW 2 */
+ 4591 "00000000" // /* MW 1 */
+ 4592 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4593 "00000000" // /* MW 15 */
+ 4594 "00000000" // /* MW 14 */
+ 4595 "01111000" // /* MW 13 */
+ 4596 "10100101" // /* MW 12 */
+ 4597 "00000001" // /* MW 11 */
+ 4598 "00000000" // /* MW 10 */
+ 4599 "00000000" // /* MW 9 */
+ 4600 "00000000" // /* MW 8 */
+ 4601 "01011011" // /* MW 7 */
+ 4602 "00000001" // /* MW 6 */
+ 4603 "00100000" // /* MW 5 */
+ 4604 "00000000" // /* MW 4 */
+ 4605 "11110000" // /* MW 3 */
+ 4606 "00101100" // /* MW 2 */
+ 4607 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4608 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4609 "00000000" // /* MW 15 */
+ 4610 "00000000" // /* MW 14 */
+ 4611 "01111000" // /* MW 13 */
+ 4612 "10100101" // /* MW 12 */
+ 4613 "00000001" // /* MW 11 */
+ 4614 "00000000" // /* MW 10 */
+ 4615 "00000000" // /* MW 9 */
+ 4616 "00000000" // /* MW 8 */
+ 4617 "00101110" // /* MW 7 */
+ 4618 "00110000" // /* MW 6 */
+ 4619 "00100010" // /* MW 5 */
+ 4620 "00000000" // /* MW 4 */
+ 4621 "11110000" // /* MW 3 */
+ 4622 "00101100" // /* MW 2 */
+ 4623 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448
+.src_ref 3 "pad_3d.h" 514 39
+.loop_nesting 0
+ 4624 "01000100" // MOVXM r7, #2147483640 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4625 "11110000" // /* MW 5 */
+ 4626 "10111111" // /* MW 4 */
+ 4627 "11110011" // /* MW 3 */
+ 4628 "11111111" // /* MW 2 */
+ 4629 "01111111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 39 first
+ 4630 "10011000" // AND r7, r7, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4631 "01000100" // /* MW 3 */
+ 4632 "11001110" // /* MW 2 */
+ 4633 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 35
+ 4634 "10011000" // SUB r7, r5, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4635 "01110001" // /* MW 3 */
+ 4636 "01001110" // /* MW 2 */
+ 4637 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55
+ 4638 "10011000" // MUL r7, r7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4639 "00001111" // /* MW 3 */
+ 4640 "11001110" // /* MW 2 */
+ 4641 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 511 25 first
+ 4642 "10011000" // ASHL r2, r4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4643 "00101110" // /* MW 3 */
+ 4644 "00000100" // /* MW 2 */
+ 4645 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 36 first
+ 4646 "10011000" // SUB r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4647 "01000001" // /* MW 3 */
+ 4648 "01001000" // /* MW 2 */
+ 4649 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 515 30 first
+ 4650 "10011000" // MUL r2, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4651 "00001111" // /* MW 3 */
+ 4652 "10000100" // /* MW 2 */
+ 4653 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 28 first
+ 4654 "10011000" // MUL r0, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4655 "00001111" // /* MW 3 */
+ 4656 "00000000" // /* MW 2 */
+ 4657 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 514 55
+.src_ref 3 "pad_3d.h" 517 39 first
+ 4658 "01100100" // MUL r1, r1, r2; MOV r6, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4659 "00000101" // /* MW 5 */
+ 4660 "00100000" // /* MW 4 */
+ 4661 "11110011" // /* MW 3 */
+ 4662 "01000101" // /* MW 2 */
+ 4663 "00001000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21 first
+ 4664 "10011000" // LSHL r0, r0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4665 "01101101" // /* MW 3 */
+ 4666 "00000000" // /* MW 2 */
+ 4667 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 517 22 first
+ 4668 "10100100" // GE r0, r24, r1; ADD.NC p2, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4669 "00000010" // /* MW 5 */
+ 4670 "11000011" // /* MW 4 */
+ 4671 "00110100" // /* MW 3 */
+ 4672 "00000011" // /* MW 2 */
+ 4673 "11000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 517 4
+ 4674 "10000100" // JNZ r0, #4832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */
+ 4675 "00000001" // /* MW 5 */
+ 4676 "01000000" // /* MW 4 */
+ 4677 "01110000" // /* MW 3 */
+ 4678 "00001001" // /* MW 2 */
+ 4679 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4681 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4683 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55 first
+.delay_slot
+ 4684 "10011000" // LSHL r4, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4685 "01101101" // /* MW 3 */
+ 4686 "11001000" // /* MW 2 */
+ 4687 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55
+.delay_slot
+ 4688 "00011000" // ADD.NC m0, r4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4689 "00001000" // /* MW 3 */
+ 4690 "00000010" // /* MW 2 */
+ 4691 "00011000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 515 37 first
+.delay_slot
+ 4692 "10011000" // ADD.NC dn0, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4693 "01111111" // /* MW 3 */
+ 4694 "01000001" // /* MW 2 */
+ 4695 "00011000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 517 4 first
+ 4696 "10111010" // MOVA dc0, #0; MOVXM ls, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4697 "00010000" // /* MW 9 */
+ 4698 "01101000" // /* MW 8 */
+ 4699 "01111001" // /* MW 7 */
+ 4700 "00000100" // /* MW 6 */
+ 4701 "00000000" // /* MW 5 */
+ 4702 "00000000" // /* MW 4 */
+ 4703 "10000000" // /* MW 3 */
+ 4704 "00000011" // /* MW 2 */
+ 4705 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 517 4
+ 4706 "10111010" // MOVA dj0, #16; MOVXM le, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4707 "00010000" // /* MW 9 */
+ 4708 "01101000" // /* MW 8 */
+ 4709 "10111001" // /* MW 7 */
+ 4710 "00000101" // /* MW 6 */
+ 4711 "00000000" // /* MW 5 */
+ 4712 "00000000" // /* MW 4 */
+ 4713 "10000000" // /* MW 3 */
+ 4714 "00000010" // /* MW 2 */
+ 4715 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 517 4
+ 4716 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4717 "10100000" // /* MW 3 */
+ 4718 "01110000" // /* MW 2 */
+ 4719 "00011101" // /* MW 1 */
+ 4720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4721 "00000000" // /* MW 15 */
+ 4722 "00000000" // /* MW 14 */
+ 4723 "01111000" // /* MW 13 */
+ 4724 "10100101" // /* MW 12 */
+ 4725 "00000001" // /* MW 11 */
+ 4726 "00000000" // /* MW 10 */
+ 4727 "00000000" // /* MW 9 */
+ 4728 "00000000" // /* MW 8 */
+ 4729 "01011011" // /* MW 7 */
+ 4730 "00000001" // /* MW 6 */
+ 4731 "00100000" // /* MW 5 */
+ 4732 "00000000" // /* MW 4 */
+ 4733 "11110000" // /* MW 3 */
+ 4734 "00101100" // /* MW 2 */
+ 4735 "00000000" // /* MW 1 */
+ 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4737 "00000000" // /* MW 15 */
+ 4738 "00000000" // /* MW 14 */
+ 4739 "01111000" // /* MW 13 */
+ 4740 "10100101" // /* MW 12 */
+ 4741 "00000001" // /* MW 11 */
+ 4742 "00000000" // /* MW 10 */
+ 4743 "00000000" // /* MW 9 */
+ 4744 "00000000" // /* MW 8 */
+ 4745 "01011011" // /* MW 7 */
+ 4746 "00000001" // /* MW 6 */
+ 4747 "00100000" // /* MW 5 */
+ 4748 "00000000" // /* MW 4 */
+ 4749 "11110000" // /* MW 3 */
+ 4750 "00101100" // /* MW 2 */
+ 4751 "00000000" // /* MW 1 */
+ 4752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4753 "00000000" // /* MW 15 */
+ 4754 "00000000" // /* MW 14 */
+ 4755 "01111000" // /* MW 13 */
+ 4756 "10100101" // /* MW 12 */
+ 4757 "00000001" // /* MW 11 */
+ 4758 "00000000" // /* MW 10 */
+ 4759 "00000000" // /* MW 9 */
+ 4760 "00000000" // /* MW 8 */
+ 4761 "01011011" // /* MW 7 */
+ 4762 "00000001" // /* MW 6 */
+ 4763 "00100000" // /* MW 5 */
+ 4764 "00000000" // /* MW 4 */
+ 4765 "11110000" // /* MW 3 */
+ 4766 "00101100" // /* MW 2 */
+ 4767 "00000000" // /* MW 1 */
+ 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4769 "00000000" // /* MW 15 */
+ 4770 "00000000" // /* MW 14 */
+ 4771 "01111000" // /* MW 13 */
+ 4772 "10100101" // /* MW 12 */
+ 4773 "00000001" // /* MW 11 */
+ 4774 "00000000" // /* MW 10 */
+ 4775 "00000000" // /* MW 9 */
+ 4776 "00000000" // /* MW 8 */
+ 4777 "01011011" // /* MW 7 */
+ 4778 "00000001" // /* MW 6 */
+ 4779 "00100000" // /* MW 5 */
+ 4780 "00000000" // /* MW 4 */
+ 4781 "11110000" // /* MW 3 */
+ 4782 "00101100" // /* MW 2 */
+ 4783 "00000000" // /* MW 1 */
+ 4784 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4785 "00000000" // /* MW 15 */
+ 4786 "00000000" // /* MW 14 */
+ 4787 "01111000" // /* MW 13 */
+ 4788 "10100101" // /* MW 12 */
+ 4789 "00000001" // /* MW 11 */
+ 4790 "00000000" // /* MW 10 */
+ 4791 "00000000" // /* MW 9 */
+ 4792 "00000000" // /* MW 8 */
+ 4793 "01011011" // /* MW 7 */
+ 4794 "00000001" // /* MW 6 */
+ 4795 "00100000" // /* MW 5 */
+ 4796 "00000000" // /* MW 4 */
+ 4797 "11110000" // /* MW 3 */
+ 4798 "00101100" // /* MW 2 */
+ 4799 "00000000" // /* MW 1 */
+ 4800 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4801 "00000000" // /* MW 15 */
+ 4802 "00000000" // /* MW 14 */
+ 4803 "01111000" // /* MW 13 */
+ 4804 "10100101" // /* MW 12 */
+ 4805 "00000001" // /* MW 11 */
+ 4806 "00000000" // /* MW 10 */
+ 4807 "00000000" // /* MW 9 */
+ 4808 "00000000" // /* MW 8 */
+ 4809 "01011011" // /* MW 7 */
+ 4810 "00000001" // /* MW 6 */
+ 4811 "00100000" // /* MW 5 */
+ 4812 "00000000" // /* MW 4 */
+ 4813 "11110000" // /* MW 3 */
+ 4814 "00101100" // /* MW 2 */
+ 4815 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4816 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4817 "00000000" // /* MW 15 */
+ 4818 "00000000" // /* MW 14 */
+ 4819 "01111000" // /* MW 13 */
+ 4820 "10100101" // /* MW 12 */
+ 4821 "00000001" // /* MW 11 */
+ 4822 "00000000" // /* MW 10 */
+ 4823 "00000000" // /* MW 9 */
+ 4824 "00000000" // /* MW 8 */
+ 4825 "00101110" // /* MW 7 */
+ 4826 "00010000" // /* MW 6 */
+ 4827 "00100010" // /* MW 5 */
+ 4828 "00000000" // /* MW 4 */
+ 4829 "11110000" // /* MW 3 */
+ 4830 "00101100" // /* MW 2 */
+ 4831 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656
+.src_ref 3 "pad_3d.h" 282 first
+.loop_nesting 0
+ 4832 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4833 "00000000" // /* MW 3 */
+ 4834 "00101000" // /* MW 2 */
+ 4835 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4837 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4839 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4841 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4843 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0
+ 4845 "00000000" // /* MW 1 */
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.function run _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 2 "reduce_base_c8.h" 352 30
+.src_ref 2 "reduce_base_c8.h" 362 first
+.src_ref 2 "reduce_base_c8.h" 365 18
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+.function_start
+ 4848 "11111000" // MOV r3, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4849 "11000000" // /* MW 3 */
+ 4850 "11010100" // /* MW 2 */
+ 4851 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 365 18 first
+ 4852 "00000010" // MOVS dn3, p7; ADD.NC p7, r3, #44 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4853 "00000000" // /* MW 7 */
+ 4854 "11001011" // /* MW 6 */
+ 4855 "10110000" // /* MW 5 */
+ 4856 "00000011" // /* MW 4 */
+ 4857 "01100000" // /* MW 3 */
+ 4858 "10010001" // /* MW 2 */
+ 4859 "01101011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 367 19 first
+ 4860 "10011000" // LDA.u16 r0, [p7], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4861 "00011010" // /* MW 3 */
+ 4862 "10001100" // /* MW 2 */
+ 4863 "00000111" // /* MW 1 */
+ 4864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4865 "00000000" // /* MW 1 */
+ 4866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4867 "00000000" // /* MW 1 */
+ 4868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4869 "00000000" // /* MW 1 */
+ 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4871 "00000000" // /* MW 1 */
+ 4872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4873 "00000000" // /* MW 1 */
+ 4874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4875 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 367 12
+.src_ref 2 "reduce_base_c8.h" 367 19
+ 4876 "10000100" // JNZ r0, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */
+ 4877 "00000001" // /* MW 5 */
+ 4878 "01000000" // /* MW 4 */
+ 4879 "11110000" // /* MW 3 */
+ 4880 "00001001" // /* MW 2 */
+ 4881 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 101 18
+.src_ref 5 "broadcast.hpp" 80 25
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 2 "reduce_base_c8.h" 372 34
+.delay_slot
+ 4882 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4883 "00000001" // /* MW 3 */
+ 4884 "00100000" // /* MW 2 */
+ 4885 "00010000" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+.delay_slot
+ 4886 "11111000" // VBCST.32 x1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4887 "01110010" // /* MW 3 */
+ 4888 "11000010" // /* MW 2 */
+ 4889 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4891 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4893 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 362
+.delay_slot
+ 4894 "11000100" // PADDXM [sp], #256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4895 "00000001" // /* MW 5 */
+ 4896 "00000000" // /* MW 4 */
+ 4897 "00000000" // /* MW 3 */
+ 4898 "00100000" // /* MW 2 */
+ 4899 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 43
+ 4900 "10111000" // MOV dj2, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4901 "01001000" // /* MW 3 */
+ 4902 "10000000" // /* MW 2 */
+ 4903 "00011010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 43 first
+ 4904 "10011000" // LDA r1, [p2, dj2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4905 "00110110" // /* MW 3 */
+ 4906 "01000000" // /* MW 2 */
+ 4907 "00000010" // /* MW 1 */
+ 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4909 "00000000" // /* MW 1 */
+ 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4911 "00000000" // /* MW 1 */
+ 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4913 "00000000" // /* MW 1 */
+ 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4915 "00000000" // /* MW 1 */
+ 4916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4917 "00000000" // /* MW 1 */
+ 4918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4919 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 34
+ 4920 "10011000" // GE r2, r16, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4921 "00011001" // /* MW 3 */
+ 4922 "00000100" // /* MW 2 */
+ 4923 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4924 "10000100" // JNZ r2, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */
+ 4925 "00000001" // /* MW 5 */
+ 4926 "01000000" // /* MW 4 */
+ 4927 "11110000" // /* MW 3 */
+ 4928 "00001001" // /* MW 2 */
+ 4929 "00010000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 2 "reduce_base_c8.h" 374 29
+.delay_slot
+ 4930 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4931 "10010010" // /* MW 3 */
+ 4932 "00000010" // /* MW 2 */
+ 4933 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4935 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4941 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 2 "reduce_base_c8.h" 372 12
+.src_ref 2 "reduce_base_c8.h" 374 29
+ 4942 "01110110" // NOPA; MOVS p3, p1; MOVXM ls, #5072 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4943 "00010000" // /* MW 11 */
+ 4944 "11101000" // /* MW 10 */
+ 4945 "01111001" // /* MW 9 */
+ 4946 "00000100" // /* MW 8 */
+ 4947 "00000000" // /* MW 7 */
+ 4948 "00000000" // /* MW 6 */
+ 4949 "10001011" // /* MW 5 */
+ 4950 "10000100" // /* MW 4 */
+ 4951 "11110011" // /* MW 3 */
+ 4952 "00101100" // /* MW 2 */
+ 4953 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4954 "01000100" // MOVXM le, #5072 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4955 "10100000" // /* MW 5 */
+ 4956 "11100111" // /* MW 4 */
+ 4957 "00010110" // /* MW 3 */
+ 4958 "00000000" // /* MW 2 */
+ 4959 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4960 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4961 "00000000" // /* MW 15 */
+ 4962 "00000000" // /* MW 14 */
+ 4963 "01111000" // /* MW 13 */
+ 4964 "01010000" // /* MW 12 */
+ 4965 "10111000" // /* MW 11 */
+ 4966 "00000010" // /* MW 10 */
+ 4967 "00000000" // /* MW 9 */
+ 4968 "00000000" // /* MW 8 */
+ 4969 "01011011" // /* MW 7 */
+ 4970 "00000001" // /* MW 6 */
+ 4971 "00100000" // /* MW 5 */
+ 4972 "00000000" // /* MW 4 */
+ 4973 "11110000" // /* MW 3 */
+ 4974 "00101100" // /* MW 2 */
+ 4975 "00000000" // /* MW 1 */
+ 4976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4977 "00000000" // /* MW 15 */
+ 4978 "00000000" // /* MW 14 */
+ 4979 "01111000" // /* MW 13 */
+ 4980 "10100101" // /* MW 12 */
+ 4981 "00000001" // /* MW 11 */
+ 4982 "00000000" // /* MW 10 */
+ 4983 "00000000" // /* MW 9 */
+ 4984 "00000000" // /* MW 8 */
+ 4985 "01011011" // /* MW 7 */
+ 4986 "00000001" // /* MW 6 */
+ 4987 "00100000" // /* MW 5 */
+ 4988 "00000000" // /* MW 4 */
+ 4989 "11110000" // /* MW 3 */
+ 4990 "00101100" // /* MW 2 */
+ 4991 "00000000" // /* MW 1 */
+ 4992 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4993 "00000000" // /* MW 15 */
+ 4994 "00000000" // /* MW 14 */
+ 4995 "01111000" // /* MW 13 */
+ 4996 "10100101" // /* MW 12 */
+ 4997 "00000001" // /* MW 11 */
+ 4998 "00000000" // /* MW 10 */
+ 4999 "00000000" // /* MW 9 */
+ 5000 "00000000" // /* MW 8 */
+ 5001 "01011011" // /* MW 7 */
+ 5002 "00000001" // /* MW 6 */
+ 5003 "00100000" // /* MW 5 */
+ 5004 "00000000" // /* MW 4 */
+ 5005 "11110000" // /* MW 3 */
+ 5006 "00101100" // /* MW 2 */
+ 5007 "00000000" // /* MW 1 */
+ 5008 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5009 "00000000" // /* MW 15 */
+ 5010 "00000000" // /* MW 14 */
+ 5011 "01111000" // /* MW 13 */
+ 5012 "10100101" // /* MW 12 */
+ 5013 "00000001" // /* MW 11 */
+ 5014 "00000000" // /* MW 10 */
+ 5015 "00000000" // /* MW 9 */
+ 5016 "00000000" // /* MW 8 */
+ 5017 "01011011" // /* MW 7 */
+ 5018 "00000001" // /* MW 6 */
+ 5019 "00100000" // /* MW 5 */
+ 5020 "00000000" // /* MW 4 */
+ 5021 "11110000" // /* MW 3 */
+ 5022 "00101100" // /* MW 2 */
+ 5023 "00000000" // /* MW 1 */
+ 5024 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5025 "00000000" // /* MW 15 */
+ 5026 "00000000" // /* MW 14 */
+ 5027 "01111000" // /* MW 13 */
+ 5028 "10100101" // /* MW 12 */
+ 5029 "00000001" // /* MW 11 */
+ 5030 "00000000" // /* MW 10 */
+ 5031 "00000000" // /* MW 9 */
+ 5032 "00000000" // /* MW 8 */
+ 5033 "01011011" // /* MW 7 */
+ 5034 "00000001" // /* MW 6 */
+ 5035 "00100000" // /* MW 5 */
+ 5036 "00000000" // /* MW 4 */
+ 5037 "11110000" // /* MW 3 */
+ 5038 "00101100" // /* MW 2 */
+ 5039 "00000000" // /* MW 1 */
+ 5040 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5041 "00000000" // /* MW 15 */
+ 5042 "00000000" // /* MW 14 */
+ 5043 "01111000" // /* MW 13 */
+ 5044 "10100101" // /* MW 12 */
+ 5045 "00000001" // /* MW 11 */
+ 5046 "00000000" // /* MW 10 */
+ 5047 "00000000" // /* MW 9 */
+ 5048 "00000000" // /* MW 8 */
+ 5049 "01011011" // /* MW 7 */
+ 5050 "00000001" // /* MW 6 */
+ 5051 "00100000" // /* MW 5 */
+ 5052 "00000000" // /* MW 4 */
+ 5053 "11110000" // /* MW 3 */
+ 5054 "00101100" // /* MW 2 */
+ 5055 "00000000" // /* MW 1 */
+ 5056 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5057 "00000000" // /* MW 15 */
+ 5058 "00000000" // /* MW 14 */
+ 5059 "01111000" // /* MW 13 */
+ 5060 "10100101" // /* MW 12 */
+ 5061 "00000001" // /* MW 11 */
+ 5062 "00000000" // /* MW 10 */
+ 5063 "00000000" // /* MW 9 */
+ 5064 "00000000" // /* MW 8 */
+ 5065 "01011011" // /* MW 7 */
+ 5066 "00000001" // /* MW 6 */
+ 5067 "00100000" // /* MW 5 */
+ 5068 "00000000" // /* MW 4 */
+ 5069 "11110000" // /* MW 3 */
+ 5070 "00101100" // /* MW 2 */
+ 5071 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 2 "reduce_base_c8.h" 374 29 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 5072 "11100001" // NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5073 "00000000" // /* MW 15 */
+ 5074 "00000000" // /* MW 14 */
+ 5075 "01111000" // /* MW 13 */
+ 5076 "10100101" // /* MW 12 */
+ 5077 "00000001" // /* MW 11 */
+ 5078 "00000000" // /* MW 10 */
+ 5079 "00000000" // /* MW 9 */
+ 5080 "10000000" // /* MW 8 */
+ 5081 "00000110" // /* MW 7 */
+ 5082 "00011101" // /* MW 6 */
+ 5083 "00100011" // /* MW 5 */
+ 5084 "00000000" // /* MW 4 */
+ 5085 "11110000" // /* MW 3 */
+ 5086 "00101100" // /* MW 2 */
+ 5087 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 412 41
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.loop_nesting 0
+ 5088 "10111000" // MOV m4, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5089 "01000000" // /* MW 3 */
+ 5090 "00000000" // /* MW 2 */
+ 5091 "00011100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28 first
+ 5092 "10011000" // LDA.u16 r17, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5093 "00111010" // /* MW 3 */
+ 5094 "10001010" // /* MW 2 */
+ 5095 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 388 28
+ 5096 "01010100" // LDA.s16 r22, [p7], #-2; MOV m5, #-58 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5097 "00011001" // /* MW 5 */
+ 5098 "00011111" // /* MW 4 */
+ 5099 "01011010" // /* MW 3 */
+ 5100 "11011010" // /* MW 2 */
+ 5101 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 570 33
+ 5102 "01010100" // LDA.u16 r26, [p7], m5; MOV dj0, #46 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5103 "10111001" // /* MW 5 */
+ 5104 "00000000" // /* MW 4 */
+ 5105 "01010001" // /* MW 3 */
+ 5106 "01101011" // /* MW 2 */
+ 5107 "11110101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 570 33 first
+.src_ref 2 "reduce_base_c8.h" 594 43
+ 5108 "11010100" // LDA.s16 r20, [p7, dj0]; MOV r19, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5109 "10000001" // /* MW 5 */
+ 5110 "10111101" // /* MW 4 */
+ 5111 "01011001" // /* MW 3 */
+ 5112 "01010010" // /* MW 2 */
+ 5113 "11100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 594 43 first
+ 5114 "00010100" // LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5115 "00111000" // /* MW 5 */
+ 5116 "11010011" // /* MW 4 */
+ 5117 "01010110" // /* MW 3 */
+ 5118 "01001110" // /* MW 2 */
+ 5119 "11100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 594 43
+ 5120 "10011000" // LDA.s16 r21, [p3], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5121 "10110010" // /* MW 3 */
+ 5122 "11011110" // /* MW 2 */
+ 5123 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 594 64
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 5124 "10011000" // LDA.u16 r28, [p3], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5125 "10011010" // /* MW 3 */
+ 5126 "11111111" // /* MW 2 */
+ 5127 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 595 56 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 5128 "00101100" // LDA.s16 r17, [p3], #6; MOVX r7, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5129 "00010010" // /* MW 5 */
+ 5130 "00011100" // /* MW 4 */
+ 5131 "01010000" // /* MW 3 */
+ 5132 "11000110" // /* MW 2 */
+ 5133 "01100111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 596 56 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5134 "10111010" // LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5135 "01011000" // /* MW 9 */
+ 5136 "00000000" // /* MW 8 */
+ 5137 "01100000" // /* MW 7 */
+ 5138 "11001010" // /* MW 6 */
+ 5139 "00100111" // /* MW 5 */
+ 5140 "00111111" // /* MW 4 */
+ 5141 "01010000" // /* MW 3 */
+ 5142 "11001010" // /* MW 2 */
+ 5143 "01111110" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 2 "reduce_base_c8.h" 388 28 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 33 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5144 "01110110" // LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5145 "01011000" // /* MW 11 */
+ 5146 "00000001" // /* MW 10 */
+ 5147 "11001000" // /* MW 9 */
+ 5148 "01101100" // /* MW 8 */
+ 5149 "00101001" // /* MW 7 */
+ 5150 "00100011" // /* MW 6 */
+ 5151 "01001011" // /* MW 5 */
+ 5152 "00010000" // /* MW 4 */
+ 5153 "01010010" // /* MW 3 */
+ 5154 "00011110" // /* MW 2 */
+ 5155 "11100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 595 75 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5156 "01110110" // LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5157 "01011000" // /* MW 11 */
+ 5158 "00111100" // /* MW 10 */
+ 5159 "01001000" // /* MW 9 */
+ 5160 "11101100" // /* MW 8 */
+ 5161 "01110011" // /* MW 7 */
+ 5162 "00101100" // /* MW 6 */
+ 5163 "00001011" // /* MW 5 */
+ 5164 "01011010" // /* MW 4 */
+ 5165 "01010010" // /* MW 3 */
+ 5166 "11101111" // /* MW 2 */
+ 5167 "01100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5168 "01110110" // MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5169 "01111000" // /* MW 11 */
+ 5170 "11010000" // /* MW 10 */
+ 5171 "00000001" // /* MW 9 */
+ 5172 "01101101" // /* MW 8 */
+ 5173 "01000011" // /* MW 7 */
+ 5174 "00101001" // /* MW 6 */
+ 5175 "10001011" // /* MW 5 */
+ 5176 "10000100" // /* MW 4 */
+ 5177 "10000011" // /* MW 3 */
+ 5178 "00001010" // /* MW 2 */
+ 5179 "00001000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5180 "10111010" // VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5181 "01111000" // /* MW 9 */
+ 5182 "00010000" // /* MW 8 */
+ 5183 "10000101" // /* MW 7 */
+ 5184 "01101110" // /* MW 6 */
+ 5185 "00110011" // /* MW 5 */
+ 5186 "00100111" // /* MW 4 */
+ 5187 "10110000" // /* MW 3 */
+ 5188 "00010010" // /* MW 2 */
+ 5189 "01101010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5190 "10111010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5191 "01111000" // /* MW 9 */
+ 5192 "11010000" // /* MW 8 */
+ 5193 "00000100" // /* MW 7 */
+ 5194 "01101111" // /* MW 6 */
+ 5195 "00110011" // /* MW 5 */
+ 5196 "00101011" // /* MW 4 */
+ 5197 "00110000" // /* MW 3 */
+ 5198 "01000001" // /* MW 2 */
+ 5199 "00010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5200 "00100100" // LSHL r17, r17, r6; ADD.NC lc, r18, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5201 "11111110" // /* MW 5 */
+ 5202 "11110010" // /* MW 4 */
+ 5203 "10111010" // /* MW 3 */
+ 5204 "01001101" // /* MW 2 */
+ 5205 "10001100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+ 5206 "11100100" // LSHL r17, r18, r6; MOV dj0, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5207 "01000001" // /* MW 5 */
+ 5208 "00010001" // /* MW 4 */
+ 5209 "10110001" // /* MW 3 */
+ 5210 "01001101" // /* MW 2 */
+ 5211 "10010100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+ 5212 "01110110" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5213 "01111000" // /* MW 11 */
+ 5214 "11010000" // /* MW 10 */
+ 5215 "00000100" // /* MW 9 */
+ 5216 "01101100" // /* MW 8 */
+ 5217 "01100011" // /* MW 7 */
+ 5218 "00001110" // /* MW 6 */
+ 5219 "01001011" // /* MW 5 */
+ 5220 "00010000" // /* MW 4 */
+ 5221 "00110000" // /* MW 3 */
+ 5222 "00000001" // /* MW 2 */
+ 5223 "00011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+ 5224 "01001010" // MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5225 "00111101" // /* MW 9 */
+ 5226 "00110000" // /* MW 8 */
+ 5227 "00010100" // /* MW 7 */
+ 5228 "11100100" // /* MW 6 */
+ 5229 "00100000" // /* MW 5 */
+ 5230 "00000011" // /* MW 4 */
+ 5231 "01100111" // /* MW 3 */
+ 5232 "10000001" // /* MW 2 */
+ 5233 "00001011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+ 5234 "10111010" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5235 "01110010" // /* MW 9 */
+ 5236 "01010000" // /* MW 8 */
+ 5237 "01000100" // /* MW 7 */
+ 5238 "00000010" // /* MW 6 */
+ 5239 "00001011" // /* MW 5 */
+ 5240 "01011011" // /* MW 4 */
+ 5241 "00110100" // /* MW 3 */
+ 5242 "00100001" // /* MW 2 */
+ 5243 "00011101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+ 5244 "11010100" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5245 "00000001" // /* MW 5 */
+ 5246 "10010011" // /* MW 4 */
+ 5247 "00110011" // /* MW 3 */
+ 5248 "00110001" // /* MW 2 */
+ 5249 "00000011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 5250 "01100010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5251 "00111101" // /* MW 7 */
+ 5252 "10000000" // /* MW 6 */
+ 5253 "00010001" // /* MW 5 */
+ 5254 "00000100" // /* MW 4 */
+ 5255 "00110000" // /* MW 3 */
+ 5256 "01000001" // /* MW 2 */
+ 5257 "00010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5258 "10011000" // VLDA.2D bmll1, [p3], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5259 "10010101" // /* MW 3 */
+ 5260 "01010000" // /* MW 2 */
+ 5261 "00000011" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5263 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5264 "01011010" // MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5265 "00111101" // /* MW 9 */
+ 5266 "00101000" // /* MW 8 */
+ 5267 "00010000" // /* MW 7 */
+ 5268 "00000010" // /* MW 6 */
+ 5269 "01001100" // /* MW 5 */
+ 5270 "10001111" // /* MW 4 */
+ 5271 "00000000" // /* MW 3 */
+ 5272 "00000000" // /* MW 2 */
+ 5273 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 412 41
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5274 "11010100" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5275 "00000001" // /* MW 5 */
+ 5276 "00010000" // /* MW 4 */
+ 5277 "00110111" // /* MW 3 */
+ 5278 "00000001" // /* MW 2 */
+ 5279 "00011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5280 "11101011" // MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5281 "10000001" // /* MW 15 */
+ 5282 "10100001" // /* MW 14 */
+ 5283 "01111000" // /* MW 13 */
+ 5284 "00000000" // /* MW 12 */
+ 5285 "10000010" // /* MW 11 */
+ 5286 "00001000" // /* MW 10 */
+ 5287 "01000100" // /* MW 9 */
+ 5288 "00000000" // /* MW 8 */
+ 5289 "10001011" // /* MW 7 */
+ 5290 "10000100" // /* MW 6 */
+ 5291 "00100100" // /* MW 5 */
+ 5292 "00000000" // /* MW 4 */
+ 5293 "10000000" // /* MW 3 */
+ 5294 "00000110" // /* MW 2 */
+ 5295 "00001000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5296 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5297 "01100001" // /* MW 15 */
+ 5298 "10010000" // /* MW 14 */
+ 5299 "00010000" // /* MW 13 */
+ 5300 "10010000" // /* MW 12 */
+ 5301 "10111010" // /* MW 11 */
+ 5302 "00000101" // /* MW 10 */
+ 5303 "00000000" // /* MW 9 */
+ 5304 "00000000" // /* MW 8 */
+ 5305 "00001011" // /* MW 7 */
+ 5306 "01011010" // /* MW 6 */
+ 5307 "00100001" // /* MW 5 */
+ 5308 "00000000" // /* MW 4 */
+ 5309 "00110000" // /* MW 3 */
+ 5310 "00100001" // /* MW 2 */
+ 5311 "00011101" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 5312 "10011000" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5313 "10001001" // /* MW 3 */
+ 5314 "00011001" // /* MW 2 */
+ 5315 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5316 "01100110" // VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5317 "00111101" // /* MW 11 */
+ 5318 "10000000" // /* MW 10 */
+ 5319 "00010001" // /* MW 9 */
+ 5320 "10001110" // /* MW 8 */
+ 5321 "10101101" // /* MW 7 */
+ 5322 "00000000" // /* MW 6 */
+ 5323 "00100000" // /* MW 5 */
+ 5324 "00000000" // /* MW 4 */
+ 5325 "10110000" // /* MW 3 */
+ 5326 "00010010" // /* MW 2 */
+ 5327 "01101010" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5328 "11100001" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5329 "00000000" // /* MW 15 */
+ 5330 "00000000" // /* MW 14 */
+ 5331 "01111000" // /* MW 13 */
+ 5332 "10100101" // /* MW 12 */
+ 5333 "00000001" // /* MW 11 */
+ 5334 "00000000" // /* MW 10 */
+ 5335 "00000000" // /* MW 9 */
+ 5336 "00000000" // /* MW 8 */
+ 5337 "01011011" // /* MW 7 */
+ 5338 "00000001" // /* MW 6 */
+ 5339 "00100000" // /* MW 5 */
+ 5340 "00000000" // /* MW 4 */
+ 5341 "00110000" // /* MW 3 */
+ 5342 "01000001" // /* MW 2 */
+ 5343 "00010101" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5345 "00000000" // /* MW 15 */
+ 5346 "00000000" // /* MW 14 */
+ 5347 "01111000" // /* MW 13 */
+ 5348 "10100101" // /* MW 12 */
+ 5349 "00000001" // /* MW 11 */
+ 5350 "00000000" // /* MW 10 */
+ 5351 "00000000" // /* MW 9 */
+ 5352 "00000000" // /* MW 8 */
+ 5353 "01011011" // /* MW 7 */
+ 5354 "00000001" // /* MW 6 */
+ 5355 "00100000" // /* MW 5 */
+ 5356 "00000000" // /* MW 4 */
+ 5357 "11110000" // /* MW 3 */
+ 5358 "00101100" // /* MW 2 */
+ 5359 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5360 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5361 "01000001" // /* MW 15 */
+ 5362 "10000001" // /* MW 14 */
+ 5363 "01111000" // /* MW 13 */
+ 5364 "10100101" // /* MW 12 */
+ 5365 "00000001" // /* MW 11 */
+ 5366 "00000000" // /* MW 10 */
+ 5367 "00000000" // /* MW 9 */
+ 5368 "00000000" // /* MW 8 */
+ 5369 "01011011" // /* MW 7 */
+ 5370 "00000001" // /* MW 6 */
+ 5371 "00100000" // /* MW 5 */
+ 5372 "00000000" // /* MW 4 */
+ 5373 "11110000" // /* MW 3 */
+ 5374 "00101100" // /* MW 2 */
+ 5375 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5376 "11100001" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5377 "00000000" // /* MW 15 */
+ 5378 "00000000" // /* MW 14 */
+ 5379 "01111000" // /* MW 13 */
+ 5380 "10100101" // /* MW 12 */
+ 5381 "00000001" // /* MW 11 */
+ 5382 "00000000" // /* MW 10 */
+ 5383 "00000000" // /* MW 9 */
+ 5384 "10000000" // /* MW 8 */
+ 5385 "00000110" // /* MW 7 */
+ 5386 "00110001" // /* MW 6 */
+ 5387 "00100100" // /* MW 5 */
+ 5388 "00000000" // /* MW 4 */
+ 5389 "00110000" // /* MW 3 */
+ 5390 "00000001" // /* MW 2 */
+ 5391 "00011001" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5392 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5393 "10000001" // /* MW 15 */
+ 5394 "10100001" // /* MW 14 */
+ 5395 "01111000" // /* MW 13 */
+ 5396 "10100101" // /* MW 12 */
+ 5397 "00000001" // /* MW 11 */
+ 5398 "00000000" // /* MW 10 */
+ 5399 "00000000" // /* MW 9 */
+ 5400 "00000000" // /* MW 8 */
+ 5401 "01011011" // /* MW 7 */
+ 5402 "00000001" // /* MW 6 */
+ 5403 "00100000" // /* MW 5 */
+ 5404 "00000000" // /* MW 4 */
+ 5405 "11110000" // /* MW 3 */
+ 5406 "00101100" // /* MW 2 */
+ 5407 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5408 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5409 "01100001" // /* MW 15 */
+ 5410 "10010000" // /* MW 14 */
+ 5411 "01111000" // /* MW 13 */
+ 5412 "10100101" // /* MW 12 */
+ 5413 "00000001" // /* MW 11 */
+ 5414 "00000000" // /* MW 10 */
+ 5415 "00000000" // /* MW 9 */
+ 5416 "00000000" // /* MW 8 */
+ 5417 "01011011" // /* MW 7 */
+ 5418 "00000001" // /* MW 6 */
+ 5419 "00100000" // /* MW 5 */
+ 5420 "00000000" // /* MW 4 */
+ 5421 "00110000" // /* MW 3 */
+ 5422 "00100001" // /* MW 2 */
+ 5423 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 107 23
+.src_ref 2 "reduce_base_c8.h" 412 41 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 5424 "10111010" // LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5425 "00010000" // /* MW 9 */
+ 5426 "11000000" // /* MW 8 */
+ 5427 "10101111" // /* MW 7 */
+ 5428 "00001100" // /* MW 6 */
+ 5429 "00000000" // /* MW 5 */
+ 5430 "00000000" // /* MW 4 */
+ 5431 "01010000" // /* MW 3 */
+ 5432 "00000111" // /* MW 2 */
+ 5433 "11101100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 6 "me_vmult_float_emulated.h" 107 23 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5434 "01001010" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5435 "00111101" // /* MW 9 */
+ 5436 "10000000" // /* MW 8 */
+ 5437 "00010001" // /* MW 7 */
+ 5438 "11100010" // /* MW 6 */
+ 5439 "01110010" // /* MW 5 */
+ 5440 "00010101" // /* MW 4 */
+ 5441 "00110010" // /* MW 3 */
+ 5442 "00110001" // /* MW 2 */
+ 5443 "00000011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 101 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5444 "11111000" // VBCST.16 x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5445 "01110010" // /* MW 3 */
+ 5446 "01000001" // /* MW 2 */
+ 5447 "00011000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5449 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5450 "01001000" // VADD.f dm0, dm1, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5451 "00111101" // /* MW 3 */
+ 5452 "00101000" // /* MW 2 */
+ 5453 "00010000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5454 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5455 "00000110" // /* MW 3 */
+ 5456 "00110001" // /* MW 2 */
+ 5457 "00001100" // /* MW 1 */
+ 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5459 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 412 52 first
+ 5460 "01100010" // ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5461 "00111101" // /* MW 7 */
+ 5462 "00001100" // /* MW 6 */
+ 5463 "00010010" // /* MW 5 */
+ 5464 "11111001" // /* MW 4 */
+ 5465 "01011111" // /* MW 3 */
+ 5466 "00000010" // /* MW 2 */
+ 5467 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 412 31
+ 5468 "10011000" // NE r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5469 "00001000" // /* MW 3 */
+ 5470 "01000000" // /* MW 2 */
+ 5471 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 412 16
+ 5472 "10000100" // JNZ r0, #6368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6368 delay_slots=5 */
+ 5473 "00000001" // /* MW 5 */
+ 5474 "01000000" // /* MW 4 */
+ 5475 "01110000" // /* MW 3 */
+ 5476 "00001100" // /* MW 2 */
+ 5477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5483 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.delay_slot
+ 5484 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5485 "00000110" // /* MW 3 */
+ 5486 "00110001" // /* MW 2 */
+ 5487 "00001100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5489 "00000000" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+.src_ref 3 "reduce_mean_c8_impl.h" 184 15 first
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5490 "00101100" // LDA r6, [p2, #12]; MOVX r5, #3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5491 "00011010" // /* MW 5 */
+ 5492 "00010100" // /* MW 4 */
+ 5493 "11010000" // /* MW 3 */
+ 5494 "10011010" // /* MW 2 */
+ 5495 "01000110" // /* MW 1 */
+ 5496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5497 "00000000" // /* MW 1 */
+ 5498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5499 "00000000" // /* MW 1 */
+ 5500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5501 "00000000" // /* MW 1 */
+ 5502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5503 "00000000" // /* MW 1 */
+ 5504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5505 "00000000" // /* MW 1 */
+ 5506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5507 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5508 "10011000" // GE r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5509 "01101001" // /* MW 3 */
+ 5510 "01001110" // /* MW 2 */
+ 5511 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5512 "10000100" // JNZ r7, #7296 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7296 delay_slots=5 */
+ 5513 "00000001" // /* MW 5 */
+ 5514 "01000000" // /* MW 4 */
+ 5515 "01000000" // /* MW 3 */
+ 5516 "00001110" // /* MW 2 */
+ 5517 "00111000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+.delay_slot
+ 5518 "00011000" // MOVX r0, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5519 "00010001" // /* MW 3 */
+ 5520 "00000000" // /* MW 2 */
+ 5521 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5527 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5529 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5530 "10011000" // NE r5, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5531 "00001000" // /* MW 3 */
+ 5532 "10001010" // /* MW 2 */
+ 5533 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5534 "10000100" // JNZ r5, #6512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6512 delay_slots=5 */
+ 5535 "00000001" // /* MW 5 */
+ 5536 "01000000" // /* MW 4 */
+ 5537 "10111000" // /* MW 3 */
+ 5538 "00001100" // /* MW 2 */
+ 5539 "00101000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5541 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5543 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5549 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 5550 "11100100" // MOVX r17, #257; MOV dc4, lr /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5551 "11000001" // /* MW 5 */
+ 5552 "10000011" // /* MW 4 */
+ 5553 "10101001" // /* MW 3 */
+ 5554 "01000000" // /* MW 2 */
+ 5555 "00100100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 5556 "01000100" // MOVXM r21, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5557 "11111110" // /* MW 5 */
+ 5558 "10111111" // /* MW 4 */
+ 5559 "11111010" // /* MW 3 */
+ 5560 "00000000" // /* MW 2 */
+ 5561 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48
+ 5562 "00101100" // NOPA; MOVX r20, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5563 "00000010" // /* MW 5 */
+ 5564 "01010000" // /* MW 4 */
+ 5565 "11110000" // /* MW 3 */
+ 5566 "00101100" // /* MW 2 */
+ 5567 "00000000" // /* MW 1 */
+.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+ 5568 "01110110" // MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5569 "01011000" // /* MW 11 */
+ 5570 "00111100" // /* MW 10 */
+ 5571 "01001000" // /* MW 9 */
+ 5572 "00001000" // /* MW 8 */
+ 5573 "01010010" // /* MW 7 */
+ 5574 "00000000" // /* MW 6 */
+ 5575 "00001011" // /* MW 5 */
+ 5576 "10000011" // /* MW 4 */
+ 5577 "10000010" // /* MW 3 */
+ 5578 "00001010" // /* MW 2 */
+ 5579 "00001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first
+ 5580 "00101100" // LDA.s16 r6, [p2, dj2]; MOVX r4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5581 "00000010" // /* MW 5 */
+ 5582 "00010001" // /* MW 4 */
+ 5583 "01010000" // /* MW 3 */
+ 5584 "00011010" // /* MW 2 */
+ 5585 "01001000" // /* MW 1 */
+ 5586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5587 "00000000" // /* MW 1 */
+ 5588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5589 "00000000" // /* MW 1 */
+ 5590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5591 "00000000" // /* MW 1 */
+ 5592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5593 "00000000" // /* MW 1 */
+ 5594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5595 "00000000" // /* MW 1 */
+ 5596 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5597 "01100111" // /* MW 3 */
+ 5598 "00000001" // /* MW 2 */
+ 5599 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+ 5600 "11100001" // NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5601 "00000000" // /* MW 15 */
+ 5602 "00000000" // /* MW 14 */
+ 5603 "01111000" // /* MW 13 */
+ 5604 "10100101" // /* MW 12 */
+ 5605 "00000001" // /* MW 11 */
+ 5606 "11110100" // /* MW 10 */
+ 5607 "01010010" // /* MW 9 */
+ 5608 "00001100" // /* MW 8 */
+ 5609 "01011011" // /* MW 7 */
+ 5610 "00000001" // /* MW 6 */
+ 5611 "00100000" // /* MW 5 */
+ 5612 "00000000" // /* MW 4 */
+ 5613 "11110000" // /* MW 3 */
+ 5614 "00101100" // /* MW 2 */
+ 5615 "00000000" // /* MW 1 */
+.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+ 5616 "01110110" // MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5617 "00010000" // /* MW 11 */
+ 5618 "01111000" // /* MW 10 */
+ 5619 "10110010" // /* MW 9 */
+ 5620 "11110011" // /* MW 8 */
+ 5621 "00000001" // /* MW 7 */
+ 5622 "10000000" // /* MW 6 */
+ 5623 "10100101" // /* MW 5 */
+ 5624 "11111101" // /* MW 4 */
+ 5625 "10000111" // /* MW 3 */
+ 5626 "10001010" // /* MW 2 */
+ 5627 "00000100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1289 16
+ 5628 "01110110" // LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5629 "01111000" // /* MW 11 */
+ 5630 "00111001" // /* MW 10 */
+ 5631 "10001011" // /* MW 9 */
+ 5632 "00001000" // /* MW 8 */
+ 5633 "01010000" // /* MW 7 */
+ 5634 "10000000" // /* MW 6 */
+ 5635 "01100101" // /* MW 5 */
+ 5636 "11111010" // /* MW 4 */
+ 5637 "01010111" // /* MW 3 */
+ 5638 "11011100" // /* MW 2 */
+ 5639 "11100000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 1280 49
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1287 41
+.src_ref 5 "vector.hpp" 1288 16
+.src_ref 5 "vector.hpp" 1289 16
+.src_ref 5 "vector.hpp" 1292 26
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35 first
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+ 5640 "01110110" // LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5641 "01111000" // /* MW 11 */
+ 5642 "01001001" // /* MW 10 */
+ 5643 "00000010" // /* MW 9 */
+ 5644 "11101000" // /* MW 8 */
+ 5645 "01100111" // /* MW 7 */
+ 5646 "00111111" // /* MW 6 */
+ 5647 "10001011" // /* MW 5 */
+ 5648 "10000100" // /* MW 4 */
+ 5649 "11010111" // /* MW 3 */
+ 5650 "00011010" // /* MW 2 */
+ 5651 "01001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "vector.hpp" 1280 49
+ 5652 "10111010" // MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5653 "01111000" // /* MW 9 */
+ 5654 "01001001" // /* MW 8 */
+ 5655 "00000010" // /* MW 7 */
+ 5656 "00000001" // /* MW 6 */
+ 5657 "11010010" // /* MW 5 */
+ 5658 "00000010" // /* MW 4 */
+ 5659 "00000000" // /* MW 3 */
+ 5660 "11111000" // /* MW 2 */
+ 5661 "00000011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9 first
+ 5662 "10111010" // MOVA r25, #16; MOVXM ls, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5663 "00010000" // /* MW 9 */
+ 5664 "01000000" // /* MW 8 */
+ 5665 "01111011" // /* MW 7 */
+ 5666 "00000100" // /* MW 6 */
+ 5667 "00000000" // /* MW 5 */
+ 5668 "00000000" // /* MW 4 */
+ 5669 "00000000" // /* MW 3 */
+ 5670 "00011001" // /* MW 2 */
+ 5671 "00000010" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9
+ 5672 "10111010" // VLDA wl2, [sp, #-32]; MOVXM le, #6336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5673 "00010000" // /* MW 9 */
+ 5674 "01100000" // /* MW 8 */
+ 5675 "10111100" // /* MW 7 */
+ 5676 "00000101" // /* MW 6 */
+ 5677 "00000000" // /* MW 5 */
+ 5678 "00000000" // /* MW 4 */
+ 5679 "10110000" // /* MW 3 */
+ 5680 "10010100" // /* MW 2 */
+ 5681 "11111111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 98
+ 5682 "00011000" // MOVX r26, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5683 "00000001" // /* MW 3 */
+ 5684 "01110100" // /* MW 2 */
+ 5685 "00010000" // /* MW 1 */
+ 5686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5687 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "vector.hpp" 1286 72
+.src_ref 7 "accum.hpp" 1108 103
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 5688 "00011000" // MOVX crRnd, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5689 "10000000" // /* MW 3 */
+ 5690 "11111010" // /* MW 2 */
+ 5691 "00010101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 112 19 first
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 5692 "00000010" // VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5693 "00000000" // /* MW 7 */
+ 5694 "10000000" // /* MW 6 */
+ 5695 "10111001" // /* MW 5 */
+ 5696 "00000010" // /* MW 4 */
+ 5697 "11000000" // /* MW 3 */
+ 5698 "00000010" // /* MW 2 */
+ 5699 "00001000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5700 "11111000" // VMOV x3, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5701 "10010010" // /* MW 3 */
+ 5702 "10100000" // /* MW 2 */
+ 5703 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 5704 "01100010" // VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5705 "10000011" // /* MW 7 */
+ 5706 "01000000" // /* MW 6 */
+ 5707 "00010000" // /* MW 5 */
+ 5708 "11100110" // /* MW 4 */
+ 5709 "10010010" // /* MW 3 */
+ 5710 "10100110" // /* MW 2 */
+ 5711 "00000010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 5712 "11111000" // VMOV x6, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5713 "10010010" // /* MW 3 */
+ 5714 "00101010" // /* MW 2 */
+ 5715 "00011011" // /* MW 1 */
+ 5716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5717 "00000000" // /* MW 1 */
+ 5718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5719 "00000000" // /* MW 1 */
+ 5720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5721 "00000000" // /* MW 1 */
+ 5722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5723 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 19 first
+ 5724 "00011000" // VCONV.bf16.fp32 wl3, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5725 "00010110" // /* MW 3 */
+ 5726 "11000000" // /* MW 2 */
+ 5727 "00001001" // /* MW 1 */
+ 5728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5729 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 5730 "01001000" // VMSC.f dm0, dm0, x3, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5731 "10000011" // /* MW 3 */
+ 5732 "00000110" // /* MW 2 */
+ 5733 "00010000" // /* MW 1 */
+ 5734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5735 "00000000" // /* MW 1 */
+ 5736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5737 "00000000" // /* MW 1 */
+ 5738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5739 "00000000" // /* MW 1 */
+ 5740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5741 "00000000" // /* MW 1 */
+ 5742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5743 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+ 5744 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5745 "00000000" // /* MW 15 */
+ 5746 "00000000" // /* MW 14 */
+ 5747 "01111000" // /* MW 13 */
+ 5748 "10100101" // /* MW 12 */
+ 5749 "00000001" // /* MW 11 */
+ 5750 "00001000" // /* MW 10 */
+ 5751 "01110001" // /* MW 9 */
+ 5752 "00000000" // /* MW 8 */
+ 5753 "00010110" // /* MW 7 */
+ 5754 "11000000" // /* MW 6 */
+ 5755 "00100010" // /* MW 5 */
+ 5756 "00000000" // /* MW 4 */
+ 5757 "11110000" // /* MW 3 */
+ 5758 "00101100" // /* MW 2 */
+ 5759 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 3 "reduce_mean_c8_impl.h" 226 22 first
+.begin_of_loop
+.loop_nesting 1
+ 5760 "11110100" // VLDB x7, [p1], #64; VMOV bmhh4, x9 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5761 "00100101" // /* MW 5 */
+ 5762 "10100101" // /* MW 4 */
+ 5763 "10001001" // /* MW 3 */
+ 5764 "10111110" // /* MW 2 */
+ 5765 "00100011" // /* MW 1 */
+ 5766 "11111000" // VMOV bmhh3, x11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5767 "10010010" // /* MW 3 */
+ 5768 "11010110" // /* MW 2 */
+ 5769 "00011011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1280 49
+ 5770 "11111000" // MOV r28, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5771 "11000000" // /* MW 3 */
+ 5772 "00011110" // /* MW 2 */
+ 5773 "00011111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1280 49 first
+ 5774 "10011000" // AND r29, r28, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5775 "10000100" // /* MW 3 */
+ 5776 "00111011" // /* MW 2 */
+ 5777 "00010111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1285 72 first
+ 5778 "00100100" // LT r27, r29, r4; ADD.NC r28, r29, #-32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5779 "11100000" // /* MW 5 */
+ 5780 "00111101" // /* MW 4 */
+ 5781 "01011110" // /* MW 3 */
+ 5782 "11001001" // /* MW 2 */
+ 5783 "11101110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+ 5784 "10011000" // LSHL r30, r22, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5785 "11011101" // /* MW 3 */
+ 5786 "10111101" // /* MW 2 */
+ 5787 "00010101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 98 first
+ 5788 "10011000" // SUB r31, r26, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5789 "11010001" // /* MW 3 */
+ 5790 "10111111" // /* MW 2 */
+ 5791 "00010110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "add_reduce.hpp" 322 47 first
+ 5792 "10100100" // SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5793 "11001101" // /* MW 5 */
+ 5794 "01110000" // /* MW 4 */
+ 5795 "01001000" // /* MW 3 */
+ 5796 "10111100" // /* MW 2 */
+ 5797 "00101111" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+ 5798 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5799 "10010010" // /* MW 3 */
+ 5800 "00010000" // /* MW 2 */
+ 5801 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 7 "accum.hpp" 198 120
+ 5802 "11111000" // VMOV wl8, wh7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5803 "00100010" // /* MW 3 */
+ 5804 "01001110" // /* MW 2 */
+ 5805 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 7 "accum.hpp" 198 120 first
+ 5806 "11111000" // VMOV wl10, wl7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5807 "00100010" // /* MW 3 */
+ 5808 "01001111" // /* MW 2 */
+ 5809 "00011101" // /* MW 1 */
+ 5810 "11111000" // VMOV bmhl4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5811 "10010010" // /* MW 3 */
+ 5812 "10010000" // /* MW 2 */
+ 5813 "00011100" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5814 "11111000" // VMOV bmhl3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5815 "10010010" // /* MW 3 */
+ 5816 "10010100" // /* MW 2 */
+ 5817 "00011011" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 5 "add.hpp" 28 49 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5818 "01100010" // VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5819 "00111101" // /* MW 7 */
+ 5820 "00101000" // /* MW 6 */
+ 5821 "00010011" // /* MW 5 */
+ 5822 "11100110" // /* MW 4 */
+ 5823 "10001010" // /* MW 3 */
+ 5824 "00010010" // /* MW 2 */
+ 5825 "00000010" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5826 "11111000" // VMOV cml1, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5827 "10001010" // /* MW 3 */
+ 5828 "00001110" // /* MW 2 */
+ 5829 "00011001" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 5 "vector.hpp" 243 115 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5830 "01100010" // VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5831 "00111101" // /* MW 7 */
+ 5832 "01010000" // /* MW 6 */
+ 5833 "00010010" // /* MW 5 */
+ 5834 "11100110" // /* MW 4 */
+ 5835 "00100010" // /* MW 3 */
+ 5836 "01001110" // /* MW 2 */
+ 5837 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5838 "11111000" // VMOV bmll2, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5839 "10010010" // /* MW 3 */
+ 5840 "00001110" // /* MW 2 */
+ 5841 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5842 "11011000" // VSHIFT x9, x8, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5843 "01100110" // /* MW 3 */
+ 5844 "11000000" // /* MW 2 */
+ 5845 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5846 "01100010" // VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5847 "00111101" // /* MW 7 */
+ 5848 "00110000" // /* MW 6 */
+ 5849 "00010100" // /* MW 5 */
+ 5850 "11100110" // /* MW 4 */
+ 5851 "10010010" // /* MW 3 */
+ 5852 "00010000" // /* MW 2 */
+ 5853 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5854 "11111000" // VMOV bmll4, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5855 "10010010" // /* MW 3 */
+ 5856 "00010010" // /* MW 2 */
+ 5857 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 151 136 first
+ 5858 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5859 "00010010" // /* MW 3 */
+ 5860 "00101100" // /* MW 2 */
+ 5861 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 243 115 first
+.src_ref 7 "accum.hpp" 151 115
+ 5862 "11111000" // VMOV wl9, wl8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5863 "00100010" // /* MW 3 */
+ 5864 "11010001" // /* MW 2 */
+ 5865 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5866 "11011000" // VSHIFT x8, x9, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5867 "01100110" // /* MW 3 */
+ 5868 "01001000" // /* MW 2 */
+ 5869 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5870 "01100010" // VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5871 "00111101" // /* MW 7 */
+ 5872 "01100100" // /* MW 6 */
+ 5873 "00010001" // /* MW 5 */
+ 5874 "11100110" // /* MW 4 */
+ 5875 "10010010" // /* MW 3 */
+ 5876 "00010000" // /* MW 2 */
+ 5877 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5878 "11111000" // VMOV bmll3, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5879 "10010010" // /* MW 3 */
+ 5880 "00010010" // /* MW 2 */
+ 5881 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22
+ 5882 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5883 "00010010" // /* MW 3 */
+ 5884 "00101000" // /* MW 2 */
+ 5885 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 5886 "11011000" // VSHIFT x10, x8, x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5887 "00011110" // /* MW 3 */
+ 5888 "01000000" // /* MW 2 */
+ 5889 "00011101" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5890 "01100010" // VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5891 "00111101" // /* MW 7 */
+ 5892 "01001100" // /* MW 6 */
+ 5893 "00010010" // /* MW 5 */
+ 5894 "11100110" // /* MW 4 */
+ 5895 "00010010" // /* MW 3 */
+ 5896 "00110000" // /* MW 2 */
+ 5897 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 5898 "11111000" // VMOV bmll3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5899 "10010010" // /* MW 3 */
+ 5900 "00010100" // /* MW 2 */
+ 5901 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5902 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5903 "00111101" // /* MW 7 */
+ 5904 "10001100" // /* MW 6 */
+ 5905 "00010011" // /* MW 5 */
+ 5906 "11000110" // /* MW 4 */
+ 5907 "00011110" // /* MW 3 */
+ 5908 "01000000" // /* MW 2 */
+ 5909 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5910 "11111000" // VMOV bmll3, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5911 "10010010" // /* MW 3 */
+ 5912 "00010000" // /* MW 2 */
+ 5913 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 5914 "11111000" // VMOV x8, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5915 "00010010" // /* MW 3 */
+ 5916 "00100100" // /* MW 2 */
+ 5917 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 5918 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5919 "00111101" // /* MW 7 */
+ 5920 "00110000" // /* MW 6 */
+ 5921 "00010001" // /* MW 5 */
+ 5922 "11000110" // /* MW 4 */
+ 5923 "00011110" // /* MW 3 */
+ 5924 "01000000" // /* MW 2 */
+ 5925 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5926 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5927 "10010010" // /* MW 3 */
+ 5928 "00010000" // /* MW 2 */
+ 5929 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 5930 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5931 "00010010" // /* MW 3 */
+ 5932 "00101000" // /* MW 2 */
+ 5933 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 5934 "01100010" // VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5935 "00111101" // /* MW 7 */
+ 5936 "01010000" // /* MW 6 */
+ 5937 "00010010" // /* MW 5 */
+ 5938 "11000110" // /* MW 4 */
+ 5939 "00000010" // /* MW 3 */
+ 5940 "01000000" // /* MW 2 */
+ 5941 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5942 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5943 "10010010" // /* MW 3 */
+ 5944 "00010000" // /* MW 2 */
+ 5945 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 5946 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5947 "00010010" // /* MW 3 */
+ 5948 "00101100" // /* MW 2 */
+ 5949 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 5950 "11011000" // VSHIFT x8, x8, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5951 "00000010" // /* MW 3 */
+ 5952 "01000000" // /* MW 2 */
+ 5953 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5954 "01100010" // VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5955 "00111101" // /* MW 7 */
+ 5956 "01110000" // /* MW 6 */
+ 5957 "00010011" // /* MW 5 */
+ 5958 "11100110" // /* MW 4 */
+ 5959 "00010010" // /* MW 3 */
+ 5960 "00100100" // /* MW 2 */
+ 5961 "00000101" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 10
+.nohwbrkpt
+.noswbrkpt
+ 5962 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5963 "10010010" // /* MW 3 */
+ 5964 "00010000" // /* MW 2 */
+ 5965 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5966 "01100010" // VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5967 "00111101" // /* MW 7 */
+ 5968 "00110000" // /* MW 6 */
+ 5969 "00010000" // /* MW 5 */
+ 5970 "11000110" // /* MW 4 */
+ 5971 "00000010" // /* MW 3 */
+ 5972 "01010000" // /* MW 2 */
+ 5973 "00000101" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5974 "11111000" // VMOV bmll4, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5975 "10010010" // /* MW 3 */
+ 5976 "00010100" // /* MW 2 */
+ 5977 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 5978 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5979 "00010010" // /* MW 3 */
+ 5980 "00101000" // /* MW 2 */
+ 5981 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+ 5982 "10111000" // VEXTRACT.32 r23, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5983 "00000001" // /* MW 3 */
+ 5984 "11100010" // /* MW 2 */
+ 5985 "00011101" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 5986 "11111000" // VMOV x10, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5987 "00010010" // /* MW 3 */
+ 5988 "00101100" // /* MW 2 */
+ 5989 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+.src_ref 5 "vector.hpp" 1288 16 first
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 5990 "01110100" // VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5991 "00000011" // /* MW 5 */
+ 5992 "01010100" // /* MW 4 */
+ 5993 "10000011" // /* MW 3 */
+ 5994 "11010000" // /* MW 2 */
+ 5995 "11100010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 5996 "11111000" // VMOV x11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5997 "00010010" // /* MW 3 */
+ 5998 "10100000" // /* MW 2 */
+ 5999 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+.src_ref 5 "vector.hpp" 1287 41 first
+.src_ref 5 "broadcast.hpp" 80 25 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6000 "10110100" // VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6001 "00000110" // /* MW 5 */
+ 6002 "10110100" // /* MW 4 */
+ 6003 "10001010" // /* MW 3 */
+ 6004 "11010100" // /* MW 2 */
+ 6005 "11100000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6006 "00111000" // VSEL.32 x9, x10, x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6007 "10100000" // /* MW 3 */
+ 6008 "11010100" // /* MW 2 */
+ 6009 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 853 46 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6010 "01111000" // VINSERT.32 x10, x2, #0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6011 "11010001" // /* MW 3 */
+ 6012 "00010000" // /* MW 2 */
+ 6013 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 853 46
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6014 "01111000" // VINSERT.32 x8, x2, #0, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6015 "11110001" // /* MW 3 */
+ 6016 "00010010" // /* MW 2 */
+ 6017 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 5 "vector.hpp" 1413 19 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6018 "11111000" // VMOV wl11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6019 "00100010" // /* MW 3 */
+ 6020 "11010011" // /* MW 2 */
+ 6021 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 5 "vector.hpp" 1413 19
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6022 "11111000" // VMOV wh11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6023 "00100010" // /* MW 3 */
+ 6024 "10010011" // /* MW 2 */
+ 6025 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 5 "vector.hpp" 1413 19
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6026 "11111000" // VMOV wh8, wl10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6027 "00100010" // /* MW 3 */
+ 6028 "00010101" // /* MW 2 */
+ 6029 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36 first
+ 6030 "00111000" // VSEL.32 x8, x11, x8, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6031 "00000000" // /* MW 3 */
+ 6032 "01011100" // /* MW 2 */
+ 6033 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 6034 "00111000" // VSEL.32 x8, x1, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6035 "00001000" // /* MW 3 */
+ 6036 "00001100" // /* MW 2 */
+ 6037 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 6038 "00111000" // VSEL.32 x7, x8, x7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6039 "10101000" // /* MW 3 */
+ 6040 "11000011" // /* MW 2 */
+ 6041 "00011011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+ 6042 "11111000" // VMOV bmll0, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6043 "10010010" // /* MW 3 */
+ 6044 "00001110" // /* MW 2 */
+ 6045 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6046 "11111000" // VMOV x9, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6047 "10010010" // /* MW 3 */
+ 6048 "10101100" // /* MW 2 */
+ 6049 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+ 6050 "00000010" // VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6051 "01110000" // /* MW 7 */
+ 6052 "01001001" // /* MW 6 */
+ 6053 "00000111" // /* MW 5 */
+ 6054 "00000001" // /* MW 4 */
+ 6055 "11000000" // /* MW 3 */
+ 6056 "00000010" // /* MW 2 */
+ 6057 "01101000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6058 "11111000" // VMOV x8, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6059 "10010010" // /* MW 3 */
+ 6060 "00110010" // /* MW 2 */
+ 6061 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1289 16
+ 6062 "01011010" // LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6063 "10000011" // /* MW 9 */
+ 6064 "01001100" // /* MW 8 */
+ 6065 "00010010" // /* MW 7 */
+ 6066 "00001111" // /* MW 6 */
+ 6067 "11101010" // /* MW 5 */
+ 6068 "11101101" // /* MW 4 */
+ 6069 "11001101" // /* MW 3 */
+ 6070 "10111011" // /* MW 2 */
+ 6071 "00000101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 9 first
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1289 16 first
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id first
+ 6072 "01100010" // SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6073 "10100001" // /* MW 7 */
+ 6074 "11101100" // /* MW 6 */
+ 6075 "00010001" // /* MW 5 */
+ 6076 "10010001" // /* MW 4 */
+ 6077 "00111110" // /* MW 3 */
+ 6078 "00001011" // /* MW 2 */
+ 6079 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 124 9 first
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1289 16
+.aggressive_scheduled_block_id 12
+.noswbrkpt
+ 6080 "01011010" // SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6081 "01100001" // /* MW 9 */
+ 6082 "11101100" // /* MW 8 */
+ 6083 "00010000" // /* MW 7 */
+ 6084 "00101111" // /* MW 6 */
+ 6085 "00001001" // /* MW 5 */
+ 6086 "00110011" // /* MW 4 */
+ 6087 "11100010" // /* MW 3 */
+ 6088 "10100101" // /* MW 2 */
+ 6089 "00000101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 9 first
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6090 "01001000" // VMUL.f dm3, x6, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6091 "00000001" // /* MW 3 */
+ 6092 "11101100" // /* MW 2 */
+ 6093 "00010011" // /* MW 1 */
+ 6094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6095 "00000000" // /* MW 1 */
+ 6096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6097 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 19 first
+ 6098 "00011000" // VCONV.bf16.fp32 wl9, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6099 "00010110" // /* MW 3 */
+ 6100 "11000001" // /* MW 2 */
+ 6101 "00001100" // /* MW 1 */
+ 6102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6103 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 6104 "01001000" // VMSC.f dm2, dm2, x9, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6105 "10000011" // /* MW 3 */
+ 6106 "01010010" // /* MW 2 */
+ 6107 "00010010" // /* MW 1 */
+ 6108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6109 "00000000" // /* MW 1 */
+ 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6111 "00000000" // /* MW 1 */
+ 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6113 "00000000" // /* MW 1 */
+ 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6115 "00000000" // /* MW 1 */
+ 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6117 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6118 "00011000" // VCONV.bf16.fp32 wl8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6119 "00010110" // /* MW 3 */
+ 6120 "01000001" // /* MW 2 */
+ 6121 "00001100" // /* MW 1 */
+ 6122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6123 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+ 6124 "01001000" // VMUL.f dm4, x8, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6125 "10100001" // /* MW 3 */
+ 6126 "11110000" // /* MW 2 */
+ 6127 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 9 first
+ 6128 "01001000" // VMUL.f dm2, x8, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6129 "01100001" // /* MW 3 */
+ 6130 "11110000" // /* MW 2 */
+ 6131 "00010010" // /* MW 1 */
+ 6132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6133 "00000000" // /* MW 1 */
+ 6134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6135 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id first
+ 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6137 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 9 first
+.aggressive_scheduled_block_id 13
+.noswbrkpt
+ 6138 "01001000" // VMUL.f dm2, x9, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6139 "10100001" // /* MW 3 */
+ 6140 "11110010" // /* MW 2 */
+ 6141 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6142 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6143 "00010010" // /* MW 3 */
+ 6144 "01110000" // /* MW 2 */
+ 6145 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6146 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6147 "00111101" // /* MW 3 */
+ 6148 "10001000" // /* MW 2 */
+ 6149 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6150 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6151 "10010010" // /* MW 3 */
+ 6152 "00000101" // /* MW 2 */
+ 6153 "00011100" // /* MW 1 */
+ 6154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6155 "00000000" // /* MW 1 */
+ 6156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6157 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id first
+ 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6159 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 9 first
+.aggressive_scheduled_block_id 14
+.noswbrkpt
+ 6160 "01001000" // VMUL.f dm2, x0, x8, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6161 "00000001" // /* MW 3 */
+ 6162 "11100001" // /* MW 2 */
+ 6163 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6164 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6165 "00010010" // /* MW 3 */
+ 6166 "01110000" // /* MW 2 */
+ 6167 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6168 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6169 "00111101" // /* MW 3 */
+ 6170 "10001000" // /* MW 2 */
+ 6171 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6172 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6173 "10010010" // /* MW 3 */
+ 6174 "00000001" // /* MW 2 */
+ 6175 "00011100" // /* MW 1 */
+ 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6177 "00000000" // /* MW 1 */
+ 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6179 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id first
+ 6180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6181 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 9 first
+.aggressive_scheduled_block_id 15
+.noswbrkpt
+ 6182 "01001000" // VMUL.f dm1, x9, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6183 "01100001" // /* MW 3 */
+ 6184 "11110010" // /* MW 2 */
+ 6185 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6186 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6187 "00010010" // /* MW 3 */
+ 6188 "01110000" // /* MW 2 */
+ 6189 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6190 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6191 "00111101" // /* MW 3 */
+ 6192 "10000100" // /* MW 2 */
+ 6193 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6194 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6195 "10010010" // /* MW 3 */
+ 6196 "00000101" // /* MW 2 */
+ 6197 "00011100" // /* MW 1 */
+ 6198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6199 "00000000" // /* MW 1 */
+ 6200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6201 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id first
+ 6202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6203 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 9 first
+.aggressive_scheduled_block_id 16
+.noswbrkpt
+ 6204 "01001000" // VMUL.f dm1, x9, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6205 "00000001" // /* MW 3 */
+ 6206 "11110010" // /* MW 2 */
+ 6207 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6208 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6209 "00010010" // /* MW 3 */
+ 6210 "01110000" // /* MW 2 */
+ 6211 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6212 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6213 "00111101" // /* MW 3 */
+ 6214 "10000100" // /* MW 2 */
+ 6215 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6216 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6217 "10010010" // /* MW 3 */
+ 6218 "00000001" // /* MW 2 */
+ 6219 "00011100" // /* MW 1 */
+ 6220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6221 "00000000" // /* MW 1 */
+ 6222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6223 "00000000" // /* MW 1 */
+ 6224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6225 "00000000" // /* MW 1 */
+ 6226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6227 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id first
+ 6228 "11111000" // VMOV lfl1, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6229 "00010010" // /* MW 3 */
+ 6230 "01110000" // /* MW 2 */
+ 6231 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 17
+.noswbrkpt
+ 6232 "01001000" // VADD.f dm2, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6233 "00111101" // /* MW 3 */
+ 6234 "10001000" // /* MW 2 */
+ 6235 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6236 "11111000" // VMOV bmll4, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6237 "10010010" // /* MW 3 */
+ 6238 "00010101" // /* MW 2 */
+ 6239 "00011100" // /* MW 1 */
+ 6240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6241 "00000000" // /* MW 1 */
+ 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6243 "00000000" // /* MW 1 */
+ 6244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6245 "00000000" // /* MW 1 */
+ 6246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6247 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id first
+ 6248 "11111000" // VMOV lfh1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6249 "00010010" // /* MW 3 */
+ 6250 "01101000" // /* MW 2 */
+ 6251 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6 first
+.aggressive_scheduled_block_id 18
+.noswbrkpt
+ 6252 "01001000" // VADD.f dm2, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6253 "00111101" // /* MW 3 */
+ 6254 "01000100" // /* MW 2 */
+ 6255 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6256 "11111000" // VMOV bmll2, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6257 "10010010" // /* MW 3 */
+ 6258 "00010001" // /* MW 2 */
+ 6259 "00011010" // /* MW 1 */
+ 6260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6261 "00000000" // /* MW 1 */
+ 6262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6263 "00000000" // /* MW 1 */
+ 6264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6265 "00000000" // /* MW 1 */
+ 6266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6267 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id first
+ 6268 "11111000" // VMOV lfl1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6269 "00010010" // /* MW 3 */
+ 6270 "01101000" // /* MW 2 */
+ 6271 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 19
+.noswbrkpt
+ 6272 "01001000" // VADD.f dm0, dm1, dm0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6273 "00111101" // /* MW 3 */
+ 6274 "00100000" // /* MW 2 */
+ 6275 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6276 "11111000" // VMOV bmll1, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6277 "10010010" // /* MW 3 */
+ 6278 "00010101" // /* MW 2 */
+ 6279 "00011001" // /* MW 1 */
+ 6280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6281 "00000000" // /* MW 1 */
+ 6282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6283 "00000000" // /* MW 1 */
+ 6284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6285 "00000000" // /* MW 1 */
+ 6286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6287 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id first
+ 6288 "11111000" // VMOV lfh1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6289 "00010010" // /* MW 3 */
+ 6290 "01100000" // /* MW 2 */
+ 6291 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6 first
+.aggressive_scheduled_block_id 20
+.noswbrkpt
+ 6292 "01001000" // VADD.f dm0, dm0, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6293 "00111101" // /* MW 3 */
+ 6294 "00001100" // /* MW 2 */
+ 6295 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6296 "11111000" // VMOV bmll0, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6297 "10010010" // /* MW 3 */
+ 6298 "00010001" // /* MW 2 */
+ 6299 "00011000" // /* MW 1 */
+ 6300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6301 "00000000" // /* MW 1 */
+ 6302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6303 "00000000" // /* MW 1 */
+ 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6305 "00000000" // /* MW 1 */
+ 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6307 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 72 first
+.src_ref 7 "accum.hpp" 1108 103 first
+ 6308 "00011000" // VCONV.bf16.fp32 wl11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6309 "00010110" // /* MW 3 */
+ 6310 "11000000" // /* MW 2 */
+ 6311 "00001101" // /* MW 1 */
+ 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6313 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 41
+ 6314 "11011000" // VSHIFT x11, x0, x11, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6315 "11111110" // /* MW 3 */
+ 6316 "10000101" // /* MW 2 */
+ 6317 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1289 16 first
+ 6318 "00111000" // VSEL.8 x11, x10, x11, r19:r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6319 "11001100" // /* MW 3 */
+ 6320 "11010101" // /* MW 2 */
+ 6321 "00011101" // /* MW 1 */
+ 6322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6323 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98 first
+.src_ref 5 "vector.hpp" 1292 26 first
+ 6324 "00110110" // NOPA; NOPB; VST wh11, [p7, #32]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6325 "01000001" // /* MW 11 */
+ 6326 "01100101" // /* MW 10 */
+ 6327 "10001011" // /* MW 9 */
+ 6328 "00000011" // /* MW 8 */
+ 6329 "00000000" // /* MW 7 */
+ 6330 "00000000" // /* MW 6 */
+ 6331 "00100000" // /* MW 5 */
+ 6332 "00000000" // /* MW 4 */
+ 6333 "11110000" // /* MW 3 */
+ 6334 "00101100" // /* MW 2 */
+ 6335 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19 first
+.end_of_loop
+ 6336 "11100001" // NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6337 "00000000" // /* MW 15 */
+ 6338 "00000000" // /* MW 14 */
+ 6339 "01111000" // /* MW 13 */
+ 6340 "10100101" // /* MW 12 */
+ 6341 "00000001" // /* MW 11 */
+ 6342 "00000000" // /* MW 10 */
+ 6343 "00000000" // /* MW 9 */
+ 6344 "10000000" // /* MW 8 */
+ 6345 "11101010" // /* MW 7 */
+ 6346 "10001010" // /* MW 6 */
+ 6347 "00100111" // /* MW 5 */
+ 6348 "00000000" // /* MW 4 */
+ 6349 "11110000" // /* MW 3 */
+ 6350 "00101100" // /* MW 2 */
+ 6351 "00000000" // /* MW 1 */
+.loop_nesting 0
+ 6352 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */
+ 6353 "00000000" // /* MW 5 */
+ 6354 "00000000" // /* MW 4 */
+ 6355 "01111000" // /* MW 3 */
+ 6356 "00001100" // /* MW 2 */
+ 6357 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6359 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6361 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6363 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6365 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6367 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520
+ 6368 "01011100" // ST dn3, [sp, #-4]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6369 "10000000" // /* MW 5 */
+ 6370 "10110100" // /* MW 4 */
+ 6371 "10110000" // /* MW 3 */
+ 6372 "10110100" // /* MW 2 */
+ 6373 "11111111" // /* MW 1 */
+ 6374 "01111010" // NOPA; ST lr, [sp, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6375 "00000000" // /* MW 9 */
+ 6376 "00000000" // /* MW 8 */
+ 6377 "00000000" // /* MW 7 */
+ 6378 "10000000" // /* MW 6 */
+ 6379 "00111101" // /* MW 5 */
+ 6380 "11111000" // /* MW 4 */
+ 6381 "11110111" // /* MW 3 */
+ 6382 "00101100" // /* MW 2 */
+ 6383 "00000000" // /* MW 1 */
+.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 2 "reduce_base_c8.h" 352 30 first
+ 6384 "00011000" // ADD.NC p7, r3, #34 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6385 "10010001" // /* MW 3 */
+ 6386 "01100001" // /* MW 2 */
+ 6387 "00011111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+ 6388 "11010100" // LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6389 "11000001" // /* MW 5 */
+ 6390 "01100100" // /* MW 4 */
+ 6391 "01011011" // /* MW 3 */
+ 6392 "10001111" // /* MW 2 */
+ 6393 "11100000" // /* MW 1 */
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id first
+ 6394 "11111000" // MOV crSCDEn, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6395 "01100000" // /* MW 3 */
+ 6396 "01111011" // /* MW 2 */
+ 6397 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+.aggressive_scheduled_block_id 21
+.noswbrkpt
+ 6398 "00011000" // ST.s16 r3, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6399 "01110111" // /* MW 3 */
+ 6400 "00000100" // /* MW 2 */
+ 6401 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 353 57 first
+.aggressive_scheduled_block_id 21
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6402 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 6403 "00000001" // /* MW 5 */
+ 6404 "00000000" // /* MW 4 */
+ 6405 "11111000" // /* MW 3 */
+ 6406 "00010011" // /* MW 2 */
+ 6407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6413 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30 first
+.delay_slot
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6414 "00011000" // ADD r3, r3, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6415 "00000111" // /* MW 3 */
+ 6416 "11000110" // /* MW 2 */
+ 6417 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+.delay_slot
+ 6418 "01111110" // NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6419 "01100000" // /* MW 13 */
+ 6420 "00101011" // /* MW 12 */
+ 6421 "00000000" // /* MW 11 */
+ 6422 "10101111" // /* MW 10 */
+ 6423 "00110100" // /* MW 9 */
+ 6424 "00000000" // /* MW 8 */
+ 6425 "10110000" // /* MW 7 */
+ 6426 "11000000" // /* MW 6 */
+ 6427 "00100000" // /* MW 5 */
+ 6428 "00000000" // /* MW 4 */
+ 6429 "11110000" // /* MW 3 */
+ 6430 "00101100" // /* MW 2 */
+ 6431 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4
+.return_address
+ 6432 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6433 "00111001" // /* MW 3 */
+ 6434 "11111000" // /* MW 2 */
+ 6435 "00000111" // /* MW 1 */
+ 6436 "00011000" // LDA p1, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6437 "10011001" // /* MW 3 */
+ 6438 "11111100" // /* MW 2 */
+ 6439 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 353 23 first
+ 6440 "00011000" // ST.s16 r3, [p7, #10] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6441 "01110111" // /* MW 3 */
+ 6442 "01010100" // /* MW 2 */
+ 6443 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4 first
+ 6444 "11000100" // PADDXM [sp], #-256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6445 "00000001" // /* MW 5 */
+ 6446 "00000000" // /* MW 4 */
+ 6447 "00000000" // /* MW 3 */
+ 6448 "11100000" // /* MW 2 */
+ 6449 "11111111" // /* MW 1 */
+ 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6451 "00000000" // /* MW 1 */
+ 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6453 "00000000" // /* MW 1 */
+ 6454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6455 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4
+ 6456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 6457 "00000000" // /* MW 3 */
+ 6458 "00101000" // /* MW 2 */
+ 6459 "00010000" // /* MW 1 */
+.delay_slot
+ 6460 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6461 "11000000" // /* MW 3 */
+ 6462 "01100010" // /* MW 2 */
+ 6463 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6470 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6471 "01111110" // /* MW 9 */
+ 6472 "10100101" // /* MW 8 */
+ 6473 "00000001" // /* MW 7 */
+ 6474 "00000000" // /* MW 6 */
+ 6475 "00010000" // /* MW 5 */
+ 6476 "00000000" // /* MW 4 */
+ 6477 "11110000" // /* MW 3 */
+ 6478 "00101100" // /* MW 2 */
+ 6479 "00000000" // /* MW 1 */
+.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 5 "blend.hpp" 163 48
+ 6480 "10111010" // MOVA r20, #255; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 6481 "00100000" // /* MW 9 */
+ 6482 "00000000" // /* MW 8 */
+ 6483 "00000000" // /* MW 7 */
+ 6484 "10111000" // /* MW 6 */
+ 6485 "00000010" // /* MW 5 */
+ 6486 "00000000" // /* MW 4 */
+ 6487 "00000000" // /* MW 3 */
+ 6488 "11110100" // /* MW 2 */
+ 6489 "00011111" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 6490 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6491 "00000001" // /* MW 3 */
+ 6492 "00101010" // /* MW 2 */
+ 6493 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6495 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6500 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6501 "10000001" // /* MW 11 */
+ 6502 "10101101" // /* MW 10 */
+ 6503 "00000000" // /* MW 9 */
+ 6504 "00000000" // /* MW 8 */
+ 6505 "00000000" // /* MW 7 */
+ 6506 "00000000" // /* MW 6 */
+ 6507 "00100000" // /* MW 5 */
+ 6508 "00000000" // /* MW 4 */
+ 6509 "11110000" // /* MW 3 */
+ 6510 "00101100" // /* MW 2 */
+ 6511 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6512 "00011000" // MOVX r5, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6513 "00010101" // /* MW 3 */
+ 6514 "00001010" // /* MW 2 */
+ 6515 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first
+ 6516 "10011000" // EQ r5, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6517 "01100111" // /* MW 3 */
+ 6518 "01001010" // /* MW 2 */
+ 6519 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6520 "10000100" // JNZ r5, #7264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7264 delay_slots=5 */
+ 6521 "00000001" // /* MW 5 */
+ 6522 "01000000" // /* MW 4 */
+ 6523 "00110000" // /* MW 3 */
+ 6524 "00001110" // /* MW 2 */
+ 6525 "00101000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6527 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6535 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6536 "00011000" // MOVX r7, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6537 "00011001" // /* MW 3 */
+ 6538 "00001110" // /* MW 2 */
+ 6539 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6540 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6541 "01100111" // /* MW 3 */
+ 6542 "11001110" // /* MW 2 */
+ 6543 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6544 "10000100" // JNZ r7, #7504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7504 delay_slots=5 */
+ 6545 "00000001" // /* MW 5 */
+ 6546 "01000000" // /* MW 4 */
+ 6547 "10101000" // /* MW 3 */
+ 6548 "00001110" // /* MW 2 */
+ 6549 "00111000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.delay_slot
+ 6550 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6551 "01000001" // /* MW 3 */
+ 6552 "00001010" // /* MW 2 */
+ 6553 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6555 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6557 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6559 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6561 "00000000" // /* MW 15 */
+ 6562 "00000000" // /* MW 14 */
+ 6563 "01111000" // /* MW 13 */
+ 6564 "10100101" // /* MW 12 */
+ 6565 "00000001" // /* MW 11 */
+ 6566 "00000000" // /* MW 10 */
+ 6567 "00000000" // /* MW 9 */
+ 6568 "00000000" // /* MW 8 */
+ 6569 "01011011" // /* MW 7 */
+ 6570 "00000001" // /* MW 6 */
+ 6571 "00100000" // /* MW 5 */
+ 6572 "00000000" // /* MW 4 */
+ 6573 "11110000" // /* MW 3 */
+ 6574 "00101100" // /* MW 2 */
+ 6575 "00000000" // /* MW 1 */
+.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first
+.src_ref 3 "reduce_mean_c8_impl.h" 202 30
+ 6576 "10111010" // LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6577 "01111000" // /* MW 9 */
+ 6578 "11110000" // /* MW 8 */
+ 6579 "01100000" // /* MW 7 */
+ 6580 "11101010" // /* MW 6 */
+ 6581 "00010000" // /* MW 5 */
+ 6582 "00000001" // /* MW 4 */
+ 6583 "01010000" // /* MW 3 */
+ 6584 "00011110" // /* MW 2 */
+ 6585 "01001000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 202 30 first
+ 6586 "01100100" // NE r6, r17, r6; MOV r17, #257 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6587 "00000101" // /* MW 5 */
+ 6588 "10100100" // /* MW 4 */
+ 6589 "00011000" // /* MW 3 */
+ 6590 "10001101" // /* MW 2 */
+ 6591 "10001001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 202 12
+ 6592 "10000100" // JNZ r6, #7232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7232 delay_slots=5 */
+ 6593 "00000001" // /* MW 5 */
+ 6594 "01000000" // /* MW 4 */
+ 6595 "00100000" // /* MW 3 */
+ 6596 "00001110" // /* MW 2 */
+ 6597 "00110000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6599 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6601 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6603 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6605 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49 first
+.delay_slot
+ 6606 "10011000" // ASHL r5, r7, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6607 "01011110" // /* MW 3 */
+ 6608 "11001010" // /* MW 2 */
+ 6609 "00010001" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 199 120
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first
+.src_ref 3 "reduce_mean_c8_impl.h" 206 35
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22
+ 6610 "01110110" // MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6611 "00010000" // /* MW 11 */
+ 6612 "00001000" // /* MW 10 */
+ 6613 "01111101" // /* MW 9 */
+ 6614 "00000100" // /* MW 8 */
+ 6615 "00000000" // /* MW 7 */
+ 6616 "00000000" // /* MW 6 */
+ 6617 "10001011" // /* MW 5 */
+ 6618 "10000100" // /* MW 4 */
+ 6619 "10000000" // /* MW 3 */
+ 6620 "10001010" // /* MW 2 */
+ 6621 "00000100" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9
+.src_ref 3 "reduce_mean_c8_impl.h" 206 35
+ 6622 "01110110" // LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6623 "00010000" // /* MW 11 */
+ 6624 "00111000" // /* MW 10 */
+ 6625 "10111101" // /* MW 9 */
+ 6626 "00000101" // /* MW 8 */
+ 6627 "00000000" // /* MW 7 */
+ 6628 "10000000" // /* MW 6 */
+ 6629 "10100101" // /* MW 5 */
+ 6630 "11111101" // /* MW 4 */
+ 6631 "11010111" // /* MW 3 */
+ 6632 "00011110" // /* MW 2 */
+ 6633 "01001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first
+ 6634 "10011000" // VLDA bmll2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6635 "00010101" // /* MW 3 */
+ 6636 "00011101" // /* MW 2 */
+ 6637 "00000000" // /* MW 1 */
+ 6638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6639 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+ 6640 "11111000" // VMOV bmhh4, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6641 "10010010" // /* MW 3 */
+ 6642 "11000010" // /* MW 2 */
+ 6643 "00011100" // /* MW 1 */
+ 6644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6645 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+.src_ref 5 "add.hpp" 28 49 first
+ 6646 "01100010" // VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6647 "00111101" // /* MW 7 */
+ 6648 "01101000" // /* MW 6 */
+ 6649 "00010001" // /* MW 5 */
+ 6650 "11100110" // /* MW 4 */
+ 6651 "00010010" // /* MW 3 */
+ 6652 "00010011" // /* MW 2 */
+ 6653 "00000011" // /* MW 1 */
+ 6654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6655 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first
+ 6656 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6657 "00000000" // /* MW 15 */
+ 6658 "00000000" // /* MW 14 */
+ 6659 "11001000" // /* MW 13 */
+ 6660 "11111111" // /* MW 12 */
+ 6661 "10111001" // /* MW 11 */
+ 6662 "00000010" // /* MW 10 */
+ 6663 "00000000" // /* MW 9 */
+ 6664 "00000000" // /* MW 8 */
+ 6665 "01011011" // /* MW 7 */
+ 6666 "00000001" // /* MW 6 */
+ 6667 "00100000" // /* MW 5 */
+ 6668 "00000000" // /* MW 4 */
+ 6669 "11110000" // /* MW 3 */
+ 6670 "00101100" // /* MW 2 */
+ 6671 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first
+.begin_of_loop
+.loop_nesting 1
+ 6672 "11100001" // VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6673 "00000000" // /* MW 15 */
+ 6674 "00000000" // /* MW 14 */
+ 6675 "01111000" // /* MW 13 */
+ 6676 "10100101" // /* MW 12 */
+ 6677 "00000001" // /* MW 11 */
+ 6678 "00000000" // /* MW 10 */
+ 6679 "00000000" // /* MW 9 */
+ 6680 "00000000" // /* MW 8 */
+ 6681 "01011011" // /* MW 7 */
+ 6682 "00000001" // /* MW 6 */
+ 6683 "00100000" // /* MW 5 */
+ 6684 "00000000" // /* MW 4 */
+ 6685 "10110000" // /* MW 3 */
+ 6686 "10100010" // /* MW 2 */
+ 6687 "00000011" // /* MW 1 */
+ 6688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6689 "00000000" // /* MW 15 */
+ 6690 "00000000" // /* MW 14 */
+ 6691 "01111000" // /* MW 13 */
+ 6692 "10100101" // /* MW 12 */
+ 6693 "00000001" // /* MW 11 */
+ 6694 "00000000" // /* MW 10 */
+ 6695 "00000000" // /* MW 9 */
+ 6696 "00000000" // /* MW 8 */
+ 6697 "01011011" // /* MW 7 */
+ 6698 "00000001" // /* MW 6 */
+ 6699 "00100000" // /* MW 5 */
+ 6700 "00000000" // /* MW 4 */
+ 6701 "11110000" // /* MW 3 */
+ 6702 "00101100" // /* MW 2 */
+ 6703 "00000000" // /* MW 1 */
+ 6704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6705 "00000000" // /* MW 15 */
+ 6706 "00000000" // /* MW 14 */
+ 6707 "01111000" // /* MW 13 */
+ 6708 "10100101" // /* MW 12 */
+ 6709 "00000001" // /* MW 11 */
+ 6710 "00000000" // /* MW 10 */
+ 6711 "00000000" // /* MW 9 */
+ 6712 "00000000" // /* MW 8 */
+ 6713 "01011011" // /* MW 7 */
+ 6714 "00000001" // /* MW 6 */
+ 6715 "00100000" // /* MW 5 */
+ 6716 "00000000" // /* MW 4 */
+ 6717 "11110000" // /* MW 3 */
+ 6718 "00101100" // /* MW 2 */
+ 6719 "00000000" // /* MW 1 */
+ 6720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6721 "00000000" // /* MW 15 */
+ 6722 "00000000" // /* MW 14 */
+ 6723 "01111000" // /* MW 13 */
+ 6724 "10100101" // /* MW 12 */
+ 6725 "00000001" // /* MW 11 */
+ 6726 "00000000" // /* MW 10 */
+ 6727 "00000000" // /* MW 9 */
+ 6728 "00000000" // /* MW 8 */
+ 6729 "01011011" // /* MW 7 */
+ 6730 "00000001" // /* MW 6 */
+ 6731 "00100000" // /* MW 5 */
+ 6732 "00000000" // /* MW 4 */
+ 6733 "11110000" // /* MW 3 */
+ 6734 "00101100" // /* MW 2 */
+ 6735 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 150 115 first
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id first
+ 6736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6737 "00000000" // /* MW 15 */
+ 6738 "00000000" // /* MW 14 */
+ 6739 "01111000" // /* MW 13 */
+ 6740 "00001001" // /* MW 12 */
+ 6741 "01100010" // /* MW 11 */
+ 6742 "00000010" // /* MW 10 */
+ 6743 "00000000" // /* MW 9 */
+ 6744 "00000000" // /* MW 8 */
+ 6745 "01011011" // /* MW 7 */
+ 6746 "00000001" // /* MW 6 */
+ 6747 "00100000" // /* MW 5 */
+ 6748 "00000000" // /* MW 4 */
+ 6749 "11110000" // /* MW 3 */
+ 6750 "00101100" // /* MW 2 */
+ 6751 "00000000" // /* MW 1 */
+.src_ref 5 "add.hpp" 28 49 first
+.aggressive_scheduled_block_id 22
+.noswbrkpt
+ 6752 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6753 "01000001" // /* MW 15 */
+ 6754 "10001011" // /* MW 14 */
+ 6755 "01111000" // /* MW 13 */
+ 6756 "10100101" // /* MW 12 */
+ 6757 "00000001" // /* MW 11 */
+ 6758 "00000000" // /* MW 10 */
+ 6759 "00000000" // /* MW 9 */
+ 6760 "00000000" // /* MW 8 */
+ 6761 "01011011" // /* MW 7 */
+ 6762 "00000001" // /* MW 6 */
+ 6763 "00100000" // /* MW 5 */
+ 6764 "00000000" // /* MW 4 */
+ 6765 "11110000" // /* MW 3 */
+ 6766 "00101100" // /* MW 2 */
+ 6767 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920
+.src_ref 7 "accum.hpp" 199 120 first
+.end_of_loop
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6768 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6769 "00000000" // /* MW 15 */
+ 6770 "00000000" // /* MW 14 */
+ 6771 "01111000" // /* MW 13 */
+ 6772 "10001001" // /* MW 12 */
+ 6773 "10001001" // /* MW 11 */
+ 6774 "00000001" // /* MW 10 */
+ 6775 "00000000" // /* MW 9 */
+ 6776 "00000000" // /* MW 8 */
+ 6777 "01011011" // /* MW 7 */
+ 6778 "00000001" // /* MW 6 */
+ 6779 "00100000" // /* MW 5 */
+ 6780 "00000000" // /* MW 4 */
+ 6781 "11110000" // /* MW 3 */
+ 6782 "00101100" // /* MW 2 */
+ 6783 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id first
+.loop_nesting 0
+ 6784 "10111010" // MOVA r16, #16; MOVXM p7, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6785 "00010000" // /* MW 9 */
+ 6786 "01111000" // /* MW 8 */
+ 6787 "10110010" // /* MW 7 */
+ 6788 "11110011" // /* MW 6 */
+ 6789 "00000001" // /* MW 5 */
+ 6790 "00000000" // /* MW 4 */
+ 6791 "00000000" // /* MW 3 */
+ 6792 "00010000" // /* MW 2 */
+ 6793 "00000010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6794 "10111010" // LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6795 "01011000" // /* MW 9 */
+ 6796 "00000001" // /* MW 8 */
+ 6797 "10011000" // /* MW 7 */
+ 6798 "00001000" // /* MW 6 */
+ 6799 "01100001" // /* MW 5 */
+ 6800 "00000000" // /* MW 4 */
+ 6801 "01010000" // /* MW 3 */
+ 6802 "10010000" // /* MW 2 */
+ 6803 "11100000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6804 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6805 "00000101" // /* MW 3 */
+ 6806 "00100010" // /* MW 2 */
+ 6807 "00010000" // /* MW 1 */
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6809 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 150 115 first
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6810 "11111000" // VMOV bmhh4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6811 "00010010" // /* MW 3 */
+ 6812 "11000100" // /* MW 2 */
+ 6813 "00011100" // /* MW 1 */
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6815 "00000000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6816 "11111000" // VMOV x2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6817 "00010010" // /* MW 3 */
+ 6818 "00110011" // /* MW 2 */
+ 6819 "00011001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6820 "11011000" // VSHIFT x2, x2, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6821 "00010010" // /* MW 3 */
+ 6822 "00010000" // /* MW 2 */
+ 6823 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 1108 103
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6824 "01011010" // MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6825 "00111101" // /* MW 9 */
+ 6826 "01000000" // /* MW 8 */
+ 6827 "00010000" // /* MW 7 */
+ 6828 "00101111" // /* MW 6 */
+ 6829 "01001001" // /* MW 5 */
+ 6830 "00000000" // /* MW 4 */
+ 6831 "10000000" // /* MW 3 */
+ 6832 "00111010" // /* MW 2 */
+ 6833 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6834 "11111000" // VMOV bmll2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6835 "00010010" // /* MW 3 */
+ 6836 "00010011" // /* MW 2 */
+ 6837 "00011010" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+ 6838 "11111000" // VBCST.32 x2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6839 "01110010" // /* MW 3 */
+ 6840 "00010110" // /* MW 2 */
+ 6841 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+ 6842 "11111000" // VMOV bmll1, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6843 "10010010" // /* MW 3 */
+ 6844 "00000100" // /* MW 2 */
+ 6845 "00011001" // /* MW 1 */
+ 6846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6847 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+ 6848 "11111000" // VMOV bmll2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6849 "00010010" // /* MW 3 */
+ 6850 "00000100" // /* MW 2 */
+ 6851 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id first
+ 6852 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6853 "00010010" // /* MW 3 */
+ 6854 "00100000" // /* MW 2 */
+ 6855 "00011001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 24
+.noswbrkpt
+ 6856 "01100010" // VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6857 "00111101" // /* MW 7 */
+ 6858 "00001100" // /* MW 6 */
+ 6859 "00010000" // /* MW 5 */
+ 6860 "11000110" // /* MW 4 */
+ 6861 "01000010" // /* MW 3 */
+ 6862 "00010000" // /* MW 2 */
+ 6863 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6864 "11111000" // VMOV bmll3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6865 "10010010" // /* MW 3 */
+ 6866 "00000100" // /* MW 2 */
+ 6867 "00011011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 6868 "11111000" // VMOV x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6869 "10010010" // /* MW 3 */
+ 6870 "00100000" // /* MW 2 */
+ 6871 "00011001" // /* MW 1 */
+ 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6873 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 112 19 first
+ 6874 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6875 "10010110" // /* MW 3 */
+ 6876 "01000000" // /* MW 2 */
+ 6877 "00001000" // /* MW 1 */
+ 6878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6879 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id first
+ 6880 "01100010" // VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6881 "10000011" // /* MW 7 */
+ 6882 "01000000" // /* MW 6 */
+ 6883 "00010100" // /* MW 5 */
+ 6884 "11100110" // /* MW 4 */
+ 6885 "00010010" // /* MW 3 */
+ 6886 "10100000" // /* MW 2 */
+ 6887 "00000001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 25
+.noswbrkpt
+ 6888 "01100010" // VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6889 "00111101" // /* MW 7 */
+ 6890 "00001000" // /* MW 6 */
+ 6891 "00010000" // /* MW 5 */
+ 6892 "11000110" // /* MW 4 */
+ 6893 "00011010" // /* MW 3 */
+ 6894 "10011000" // /* MW 2 */
+ 6895 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6896 "11111000" // VMOV bmll2, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6897 "10010010" // /* MW 3 */
+ 6898 "00000110" // /* MW 2 */
+ 6899 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 6900 "11111000" // VMOV x3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6901 "10010010" // /* MW 3 */
+ 6902 "10100100" // /* MW 2 */
+ 6903 "00011001" // /* MW 1 */
+ 6904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6905 "00000000" // /* MW 1 */
+ 6906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6907 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 19 first
+ 6908 "00011000" // VCONV.bf16.fp32 wl2, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6909 "00010110" // /* MW 3 */
+ 6910 "01000010" // /* MW 2 */
+ 6911 "00001001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id first
+ 6912 "11111000" // VMOV x5, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6913 "00010010" // /* MW 3 */
+ 6914 "10100000" // /* MW 2 */
+ 6915 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 26
+.noswbrkpt
+ 6916 "01100010" // VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6917 "00111101" // /* MW 7 */
+ 6918 "00001000" // /* MW 6 */
+ 6919 "00010000" // /* MW 5 */
+ 6920 "11000110" // /* MW 4 */
+ 6921 "00000010" // /* MW 3 */
+ 6922 "00101000" // /* MW 2 */
+ 6923 "00000011" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6924 "11111000" // VMOV bmll2, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6925 "10010010" // /* MW 3 */
+ 6926 "00001100" // /* MW 2 */
+ 6927 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6928 "11111000" // VMOV x5, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6929 "10010010" // /* MW 3 */
+ 6930 "10100110" // /* MW 2 */
+ 6931 "00011010" // /* MW 1 */
+ 6932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6933 "00000000" // /* MW 1 */
+ 6934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6935 "00000000" // /* MW 1 */
+ 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6937 "00000000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 6938 "11111000" // VMOV x6, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6939 "00010010" // /* MW 3 */
+ 6940 "00100000" // /* MW 2 */
+ 6941 "00011011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+ 6942 "10111000" // VEXTRACT.32 r0, x6, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6943 "00000001" // /* MW 3 */
+ 6944 "00011010" // /* MW 2 */
+ 6945 "00011000" // /* MW 1 */
+ 6946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6947 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 856 23 first
+ 6948 "01111000" // VINSERT.32 x6, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6949 "00010001" // /* MW 3 */
+ 6950 "00000000" // /* MW 2 */
+ 6951 "00011011" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36 first
+ 6952 "00111000" // VSEL.32 x1, x1, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6953 "00001000" // /* MW 3 */
+ 6954 "10001011" // /* MW 2 */
+ 6955 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+ 6956 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6957 "10010010" // /* MW 3 */
+ 6958 "00000010" // /* MW 2 */
+ 6959 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6960 "11111000" // VMOV x1, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6961 "10010010" // /* MW 3 */
+ 6962 "10101010" // /* MW 2 */
+ 6963 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6964 "00000010" // VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6965 "01110000" // /* MW 7 */
+ 6966 "01001001" // /* MW 6 */
+ 6967 "10010001" // /* MW 5 */
+ 6968 "00000001" // /* MW 4 */
+ 6969 "11000000" // /* MW 3 */
+ 6970 "00100010" // /* MW 2 */
+ 6971 "01011000" // /* MW 1 */
+ 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6973 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+ 6974 "01001000" // VMSC.f dm1, dm2, x5, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6975 "10000011" // /* MW 3 */
+ 6976 "01001010" // /* MW 2 */
+ 6977 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 9 first
+ 6978 "01001000" // VMUL.f dm0, x5, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6979 "01000001" // /* MW 3 */
+ 6980 "11101010" // /* MW 2 */
+ 6981 "00010000" // /* MW 1 */
+ 6982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6983 "00000000" // /* MW 1 */
+ 6984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6985 "00000000" // /* MW 1 */
+ 6986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6987 "00000000" // /* MW 1 */
+ 6988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6989 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 19 first
+ 6990 "00011000" // VCONV.bf16.fp32 wl1, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6991 "10010110" // /* MW 3 */
+ 6992 "11000000" // /* MW 2 */
+ 6993 "00001000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 6994 "01001000" // VMSC.f dm4, dm4, x2, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6995 "10000011" // /* MW 3 */
+ 6996 "10000100" // /* MW 2 */
+ 6997 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 6998 "01001000" // VMSC.f dm3, dm1, x1, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6999 "10000011" // /* MW 3 */
+ 7000 "00100010" // /* MW 2 */
+ 7001 "00010011" // /* MW 1 */
+ 7002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7003 "00000000" // /* MW 1 */
+ 7004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7005 "00000000" // /* MW 1 */
+ 7006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7007 "00000000" // /* MW 1 */
+ 7008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7009 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 7010 "00011000" // VCONV.bf16.fp32 wl3, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7011 "00010110" // /* MW 3 */
+ 7012 "11000010" // /* MW 2 */
+ 7013 "00001001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 7014 "00011000" // VCONV.bf16.fp32 wl6, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7015 "10010110" // /* MW 3 */
+ 7016 "01000001" // /* MW 2 */
+ 7017 "00001011" // /* MW 1 */
+ 7018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7019 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+ 7020 "01001000" // VMUL.f dm2, x6, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7021 "01100001" // /* MW 3 */
+ 7022 "11101100" // /* MW 2 */
+ 7023 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 9 first
+ 7024 "01001000" // VMUL.f dm3, x6, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7025 "01000001" // /* MW 3 */
+ 7026 "11101100" // /* MW 2 */
+ 7027 "00010011" // /* MW 1 */
+ 7028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7029 "00000000" // /* MW 1 */
+ 7030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7031 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id first
+ 7032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7033 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 9 first
+.aggressive_scheduled_block_id 27
+.noswbrkpt
+ 7034 "01001000" // VMUL.f dm3, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7035 "01100001" // /* MW 3 */
+ 7036 "11100010" // /* MW 2 */
+ 7037 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7038 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7039 "00010010" // /* MW 3 */
+ 7040 "01101000" // /* MW 2 */
+ 7041 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7042 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7043 "00111101" // /* MW 3 */
+ 7044 "01001100" // /* MW 2 */
+ 7045 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7046 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7047 "10010010" // /* MW 3 */
+ 7048 "00000101" // /* MW 2 */
+ 7049 "00011010" // /* MW 1 */
+ 7050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7051 "00000000" // /* MW 1 */
+ 7052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7053 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id first
+ 7054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7055 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 9 first
+.aggressive_scheduled_block_id 28
+.noswbrkpt
+ 7056 "01001000" // VMUL.f dm3, x5, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7057 "01100001" // /* MW 3 */
+ 7058 "11101010" // /* MW 2 */
+ 7059 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7060 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7061 "00010010" // /* MW 3 */
+ 7062 "01101000" // /* MW 2 */
+ 7063 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7064 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7065 "00111101" // /* MW 3 */
+ 7066 "01001100" // /* MW 2 */
+ 7067 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7068 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7069 "10010010" // /* MW 3 */
+ 7070 "00000001" // /* MW 2 */
+ 7071 "00011010" // /* MW 1 */
+ 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7073 "00000000" // /* MW 1 */
+ 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7075 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id first
+ 7076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7077 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 9 first
+.aggressive_scheduled_block_id 29
+.noswbrkpt
+ 7078 "01001000" // VMUL.f dm3, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7079 "01000001" // /* MW 3 */
+ 7080 "11100010" // /* MW 2 */
+ 7081 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7082 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7083 "00010010" // /* MW 3 */
+ 7084 "01101000" // /* MW 2 */
+ 7085 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7086 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7087 "00111101" // /* MW 3 */
+ 7088 "01001100" // /* MW 2 */
+ 7089 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7090 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7091 "10010010" // /* MW 3 */
+ 7092 "00000101" // /* MW 2 */
+ 7093 "00011010" // /* MW 1 */
+ 7094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7095 "00000000" // /* MW 1 */
+ 7096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7097 "00000000" // /* MW 1 */
+ 7098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7099 "00000000" // /* MW 1 */
+ 7100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7101 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id first
+ 7102 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7103 "00010010" // /* MW 3 */
+ 7104 "01101000" // /* MW 2 */
+ 7105 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 30
+.noswbrkpt
+ 7106 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7107 "00111101" // /* MW 3 */
+ 7108 "01001100" // /* MW 2 */
+ 7109 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7110 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7111 "10010010" // /* MW 3 */
+ 7112 "00000001" // /* MW 2 */
+ 7113 "00011010" // /* MW 1 */
+ 7114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7115 "00000000" // /* MW 1 */
+ 7116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7117 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 9 first
+ 7118 "01001000" // VMUL.f dm3, x0, x6, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7119 "11000001" // /* MW 3 */
+ 7120 "11100000" // /* MW 2 */
+ 7121 "00010011" // /* MW 1 */
+ 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7123 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id first
+ 7124 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7125 "00010010" // /* MW 3 */
+ 7126 "01101000" // /* MW 2 */
+ 7127 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 31
+.noswbrkpt
+ 7128 "01001000" // VADD.f dm3, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7129 "00111101" // /* MW 3 */
+ 7130 "01001100" // /* MW 2 */
+ 7131 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7132 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7133 "10010010" // /* MW 3 */
+ 7134 "00000101" // /* MW 2 */
+ 7135 "00011010" // /* MW 1 */
+ 7136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7137 "00000000" // /* MW 1 */
+ 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7139 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 9 first
+ 7140 "01001000" // VMUL.f dm1, x1, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7141 "00000001" // /* MW 3 */
+ 7142 "11100010" // /* MW 2 */
+ 7143 "00010001" // /* MW 1 */
+ 7144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7145 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id first
+ 7146 "11111000" // VMOV lfh0, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7147 "00010010" // /* MW 3 */
+ 7148 "01101100" // /* MW 2 */
+ 7149 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6 first
+.aggressive_scheduled_block_id 32
+.noswbrkpt
+ 7150 "01001000" // VADD.f dm1, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7151 "00111101" // /* MW 3 */
+ 7152 "01000100" // /* MW 2 */
+ 7153 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7154 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7155 "10010010" // /* MW 3 */
+ 7156 "00000001" // /* MW 2 */
+ 7157 "00011010" // /* MW 1 */
+ 7158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7159 "00000000" // /* MW 1 */
+ 7160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7161 "00000000" // /* MW 1 */
+ 7162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7163 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id first
+ 7164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7165 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 33
+.noswbrkpt
+ 7166 "01100010" // VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7167 "00111101" // /* MW 7 */
+ 7168 "01000000" // /* MW 6 */
+ 7169 "00010000" // /* MW 5 */
+ 7170 "11100110" // /* MW 4 */
+ 7171 "00010010" // /* MW 3 */
+ 7172 "00100100" // /* MW 2 */
+ 7173 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9 first
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7174 "01100010" // VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7175 "00000001" // /* MW 7 */
+ 7176 "11101010" // /* MW 6 */
+ 7177 "00010100" // /* MW 5 */
+ 7178 "11100110" // /* MW 4 */
+ 7179 "10010010" // /* MW 3 */
+ 7180 "00000000" // /* MW 2 */
+ 7181 "00000010" // /* MW 1 */
+ 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7183 "00000000" // /* MW 1 */
+ 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7185 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id first
+ 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7187 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.aggressive_scheduled_block_id 34
+.noswbrkpt
+ 7188 "01001000" // VADD.f dm0, dm2, dm4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7189 "00111101" // /* MW 3 */
+ 7190 "01010000" // /* MW 2 */
+ 7191 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7193 "00010010" // /* MW 3 */
+ 7194 "00000000" // /* MW 2 */
+ 7195 "00011010" // /* MW 1 */
+ 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7197 "00000000" // /* MW 1 */
+ 7198 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */
+ 7199 "00000000" // /* MW 5 */
+ 7200 "00000000" // /* MW 4 */
+ 7201 "01111000" // /* MW 3 */
+ 7202 "00001100" // /* MW 2 */
+ 7203 "00000000" // /* MW 1 */
+.delay_slot
+ 7204 "10011000" // ST dc4, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7205 "01100101" // /* MW 3 */
+ 7206 "11111010" // /* MW 2 */
+ 7207 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7209 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6 first
+.delay_slot
+ 7210 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7211 "00010010" // /* MW 3 */
+ 7212 "00000000" // /* MW 2 */
+ 7213 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7215 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 1108 103 first
+.delay_slot
+ 7216 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7217 "00000000" // /* MW 15 */
+ 7218 "00000000" // /* MW 14 */
+ 7219 "01111000" // /* MW 13 */
+ 7220 "10100101" // /* MW 12 */
+ 7221 "00000001" // /* MW 11 */
+ 7222 "00000000" // /* MW 10 */
+ 7223 "00000000" // /* MW 9 */
+ 7224 "10000000" // /* MW 8 */
+ 7225 "00010010" // /* MW 7 */
+ 7226 "00000101" // /* MW 6 */
+ 7227 "00100001" // /* MW 5 */
+ 7228 "00000000" // /* MW 4 */
+ 7229 "11110000" // /* MW 3 */
+ 7230 "00101100" // /* MW 2 */
+ 7231 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384
+.src_ref 5 "blend.hpp" 163 48
+ 7232 "10111010" // MOVA r20, #0; J #5616 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5616 delay_slots=5 */
+ 7233 "00100000" // /* MW 9 */
+ 7234 "00000000" // /* MW 8 */
+ 7235 "00000000" // /* MW 7 */
+ 7236 "10111110" // /* MW 6 */
+ 7237 "00000010" // /* MW 5 */
+ 7238 "00000000" // /* MW 4 */
+ 7239 "00000000" // /* MW 3 */
+ 7240 "00010100" // /* MW 2 */
+ 7241 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7242 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7243 "00000001" // /* MW 3 */
+ 7244 "00101010" // /* MW 2 */
+ 7245 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7247 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7249 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7251 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7252 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7253 "10000001" // /* MW 11 */
+ 7254 "10101101" // /* MW 10 */
+ 7255 "00000000" // /* MW 9 */
+ 7256 "00000000" // /* MW 8 */
+ 7257 "00000000" // /* MW 7 */
+ 7258 "00000000" // /* MW 6 */
+ 7259 "00100000" // /* MW 5 */
+ 7260 "00000000" // /* MW 4 */
+ 7261 "11110000" // /* MW 3 */
+ 7262 "00101100" // /* MW 2 */
+ 7263 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416
+ 7264 "10000100" // J #7456 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */
+ 7265 "00000000" // /* MW 5 */
+ 7266 "00000000" // /* MW 4 */
+ 7267 "10010000" // /* MW 3 */
+ 7268 "00001110" // /* MW 2 */
+ 7269 "00000000" // /* MW 1 */
+.delay_slot
+ 7270 "00000010" // ST p1, [sp, #-4]; MOV dc4, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7271 "01110000" // /* MW 7 */
+ 7272 "11110000" // /* MW 6 */
+ 7273 "01100000" // /* MW 5 */
+ 7274 "00000010" // /* MW 4 */
+ 7275 "10110000" // /* MW 3 */
+ 7276 "10010011" // /* MW 2 */
+ 7277 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7279 "00000000" // /* MW 1 */
+.delay_slot
+ 7280 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7281 "00110011" // /* MW 3 */
+ 7282 "11110000" // /* MW 2 */
+ 7283 "00001111" // /* MW 1 */
+.delay_slot
+ 7284 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7285 "00110011" // /* MW 3 */
+ 7286 "11110101" // /* MW 2 */
+ 7287 "00001111" // /* MW 1 */
+.delay_slot
+ 7288 "00000010" // VST x1, [sp, #-128]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7289 "01110000" // /* MW 7 */
+ 7290 "10100101" // /* MW 6 */
+ 7291 "00000001" // /* MW 5 */
+ 7292 "00000000" // /* MW 4 */
+ 7293 "01100000" // /* MW 3 */
+ 7294 "00001110" // /* MW 2 */
+ 7295 "11111111" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7296 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7297 "00000101" // /* MW 3 */
+ 7298 "00100010" // /* MW 2 */
+ 7299 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first
+ 7300 "10011000" // EQ r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7301 "01100111" // /* MW 3 */
+ 7302 "01100010" // /* MW 2 */
+ 7303 "00010100" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7304 "10000100" // JNZ r17, #7456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7456 delay_slots=5 */
+ 7305 "00000001" // /* MW 5 */
+ 7306 "01000000" // /* MW 4 */
+ 7307 "10010000" // /* MW 3 */
+ 7308 "00001110" // /* MW 2 */
+ 7309 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7311 "00000000" // /* MW 1 */
+.delay_slot
+ 7312 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7313 "00110011" // /* MW 3 */
+ 7314 "11110000" // /* MW 2 */
+ 7315 "00001111" // /* MW 1 */
+.delay_slot
+ 7316 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7317 "00110011" // /* MW 3 */
+ 7318 "11110101" // /* MW 2 */
+ 7319 "00001111" // /* MW 1 */
+.delay_slot
+ 7320 "00011000" // VST x1, [sp, #-128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7321 "01110011" // /* MW 3 */
+ 7322 "11111000" // /* MW 2 */
+ 7323 "00001111" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+.delay_slot
+ 7324 "00111010" // ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7325 "01111001" // /* MW 9 */
+ 7326 "11110000" // /* MW 8 */
+ 7327 "01100000" // /* MW 7 */
+ 7328 "01001010" // /* MW 6 */
+ 7329 "01110000" // /* MW 5 */
+ 7330 "00000000" // /* MW 4 */
+ 7331 "10110000" // /* MW 3 */
+ 7332 "10010011" // /* MW 2 */
+ 7333 "11111111" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7334 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7335 "01100111" // /* MW 3 */
+ 7336 "11001110" // /* MW 2 */
+ 7337 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7338 "10000100" // JNZ r7, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */
+ 7339 "00000001" // /* MW 5 */
+ 7340 "01000000" // /* MW 4 */
+ 7341 "10000000" // /* MW 3 */
+ 7342 "00001110" // /* MW 2 */
+ 7343 "00111000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7345 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7347 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7349 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7351 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7353 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7354 "10011000" // EQ r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7355 "01100111" // /* MW 3 */
+ 7356 "01001110" // /* MW 2 */
+ 7357 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7358 "10000100" // JNZ r7, #7392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7392 delay_slots=5 */
+ 7359 "00000001" // /* MW 5 */
+ 7360 "01000000" // /* MW 4 */
+ 7361 "01110000" // /* MW 3 */
+ 7362 "00001110" // /* MW 2 */
+ 7363 "00111000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.delay_slot
+ 7364 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7365 "01000001" // /* MW 3 */
+ 7366 "00001010" // /* MW 2 */
+ 7367 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7369 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7371 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7373 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7375 "00000000" // /* MW 1 */
+ 7376 "10000100" // J #6576 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6576 delay_slots=5 */
+ 7377 "00000000" // /* MW 5 */
+ 7378 "00000000" // /* MW 4 */
+ 7379 "11011000" // /* MW 3 */
+ 7380 "00001100" // /* MW 2 */
+ 7381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7383 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7385 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7387 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7391 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544
+.src_ref 5 "blend.hpp" 170 36
+ 7392 "10111010" // MOVA r17, #257; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 7393 "00100000" // /* MW 9 */
+ 7394 "00000000" // /* MW 8 */
+ 7395 "00000000" // /* MW 7 */
+ 7396 "10111000" // /* MW 6 */
+ 7397 "00000010" // /* MW 5 */
+ 7398 "00000000" // /* MW 4 */
+ 7399 "00000000" // /* MW 3 */
+ 7400 "00110001" // /* MW 2 */
+ 7401 "00100000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7402 "01100100" // MOVX r21, #0; MOV m4, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7403 "01000001" // /* MW 5 */
+ 7404 "00000000" // /* MW 4 */
+ 7405 "00101000" // /* MW 3 */
+ 7406 "01000000" // /* MW 2 */
+ 7407 "00000101" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48
+.delay_slot
+ 7408 "00011000" // MOVX r20, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7409 "00000001" // /* MW 3 */
+ 7410 "00101000" // /* MW 2 */
+ 7411 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7416 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7417 "00011100" // /* MW 7 */
+ 7418 "00000000" // /* MW 6 */
+ 7419 "00000000" // /* MW 5 */
+ 7420 "00000100" // /* MW 4 */
+ 7421 "11110000" // /* MW 3 */
+ 7422 "00101100" // /* MW 2 */
+ 7423 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576
+ 7424 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */
+ 7425 "00000000" // /* MW 5 */
+ 7426 "00000000" // /* MW 4 */
+ 7427 "10101000" // /* MW 3 */
+ 7428 "00001100" // /* MW 2 */
+ 7429 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7430 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7431 "11111110" // /* MW 5 */
+ 7432 "10111111" // /* MW 4 */
+ 7433 "11111000" // /* MW 3 */
+ 7434 "00000000" // /* MW 2 */
+ 7435 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7436 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7437 "00100000" // /* MW 3 */
+ 7438 "00000000" // /* MW 2 */
+ 7439 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7444 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7445 "10000001" // /* MW 11 */
+ 7446 "10101101" // /* MW 10 */
+ 7447 "00000000" // /* MW 9 */
+ 7448 "00000000" // /* MW 8 */
+ 7449 "00000000" // /* MW 7 */
+ 7450 "00000000" // /* MW 6 */
+ 7451 "00100000" // /* MW 5 */
+ 7452 "00000000" // /* MW 4 */
+ 7453 "11110000" // /* MW 3 */
+ 7454 "00101100" // /* MW 2 */
+ 7455 "00000000" // /* MW 1 */
+.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 7456 "10111010" // VLDA x0, [sp, #-256]; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 7457 "00100000" // /* MW 9 */
+ 7458 "00000000" // /* MW 8 */
+ 7459 "00000000" // /* MW 7 */
+ 7460 "10111000" // /* MW 6 */
+ 7461 "00000010" // /* MW 5 */
+ 7462 "00000000" // /* MW 4 */
+ 7463 "01110000" // /* MW 3 */
+ 7464 "00000111" // /* MW 2 */
+ 7465 "11111110" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "blend.hpp" 163 48
+.delay_slot
+ 7466 "10111010" // VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7467 "01011000" // /* MW 9 */
+ 7468 "00000000" // /* MW 8 */
+ 7469 "10001000" // /* MW 7 */
+ 7470 "10001010" // /* MW 6 */
+ 7471 "00000000" // /* MW 5 */
+ 7472 "00000000" // /* MW 4 */
+ 7473 "01110000" // /* MW 3 */
+ 7474 "10100111" // /* MW 2 */
+ 7475 "11111110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1280 49
+.src_ref 5 "vector.hpp" 1287 41
+.src_ref 5 "vector.hpp" 1288 16
+.src_ref 5 "vector.hpp" 1292 26
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 226 22
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7476 "10111010" // LDA p1, [sp, #-4]; MOVXM r16, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7477 "10010000" // /* MW 9 */
+ 7478 "11111111" // /* MW 8 */
+ 7479 "00001111" // /* MW 7 */
+ 7480 "00111110" // /* MW 6 */
+ 7481 "00000000" // /* MW 5 */
+ 7482 "00000000" // /* MW 4 */
+ 7483 "00100000" // /* MW 3 */
+ 7484 "10010011" // /* MW 2 */
+ 7485 "11111111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7486 "01100100" // MOVX r21, #0; MOV m4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7487 "10000001" // /* MW 5 */
+ 7488 "00000000" // /* MW 4 */
+ 7489 "00101000" // /* MW 3 */
+ 7490 "01000000" // /* MW 2 */
+ 7491 "00000101" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7492 "00011000" // MOVX r17, #257 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7493 "00000101" // /* MW 3 */
+ 7494 "00100010" // /* MW 2 */
+ 7495 "00010001" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7496 "00100010" // VLDA x1, [sp, #-128]; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7497 "00011100" // /* MW 7 */
+ 7498 "00000000" // /* MW 6 */
+ 7499 "00000000" // /* MW 5 */
+ 7500 "00000100" // /* MW 4 */
+ 7501 "01110000" // /* MW 3 */
+ 7502 "00001111" // /* MW 2 */
+ 7503 "11111111" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656
+ 7504 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */
+ 7505 "00000000" // /* MW 5 */
+ 7506 "00000000" // /* MW 4 */
+ 7507 "10101000" // /* MW 3 */
+ 7508 "00001100" // /* MW 2 */
+ 7509 "00000000" // /* MW 1 */
+.delay_slot
+ 7510 "11111000" // MOV dc4, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7511 "11100000" // /* MW 3 */
+ 7512 "11000001" // /* MW 2 */
+ 7513 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7514 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7515 "11111110" // /* MW 5 */
+ 7516 "10111111" // /* MW 4 */
+ 7517 "11111000" // /* MW 3 */
+ 7518 "00000000" // /* MW 2 */
+ 7519 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7520 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7521 "00100000" // /* MW 3 */
+ 7522 "00000000" // /* MW 2 */
+ 7523 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0
+ 7527 "00000000" // /* MW 1 */
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_reduce_mean_c8 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 8 "superkernels.cpp" 472
+.src_ref 8 "superkernels.cpp" 472 first
+.function_start
+ 7536 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7537 "00000001" // /* MW 5 */
+ 7538 "00000000" // /* MW 4 */
+ 7539 "00000000" // /* MW 3 */
+ 7540 "00010000" // /* MW 2 */
+ 7541 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7542 "00111010" // ST p7, [sp, #-20]; MOVXM p7, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7543 "00010001" // /* MW 9 */
+ 7544 "01100000" // /* MW 8 */
+ 7545 "10110010" // /* MW 7 */
+ 7546 "11110011" // /* MW 6 */
+ 7547 "00000001" // /* MW 5 */
+ 7548 "00000000" // /* MW 4 */
+ 7549 "10110000" // /* MW 3 */
+ 7550 "11110011" // /* MW 2 */
+ 7551 "11111101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7552 "10111010" // LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7553 "01110010" // /* MW 9 */
+ 7554 "01110000" // /* MW 8 */
+ 7555 "00001101" // /* MW 7 */
+ 7556 "10000010" // /* MW 6 */
+ 7557 "00011101" // /* MW 5 */
+ 7558 "11100111" // /* MW 4 */
+ 7559 "11010111" // /* MW 3 */
+ 7560 "11000010" // /* MW 2 */
+ 7561 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 22 first
+.src_ref 8 "superkernels.cpp" 569
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7562 "00111010" // ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7563 "01111001" // /* MW 9 */
+ 7564 "11110000" // /* MW 8 */
+ 7565 "01101000" // /* MW 7 */
+ 7566 "10000001" // /* MW 6 */
+ 7567 "00000100" // /* MW 5 */
+ 7568 "00100001" // /* MW 4 */
+ 7569 "10110000" // /* MW 3 */
+ 7570 "00101110" // /* MW 2 */
+ 7571 "11111111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 30
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7572 "01011100" // ST r15, [sp, #-16]; ADD r17, r16, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7573 "11110110" // /* MW 5 */
+ 7574 "01000111" // /* MW 4 */
+ 7575 "10111000" // /* MW 3 */
+ 7576 "00111110" // /* MW 2 */
+ 7577 "11111110" // /* MW 1 */
+ 7578 "10011000" // ST r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7579 "10110101" // /* MW 3 */
+ 7580 "11101001" // /* MW 2 */
+ 7581 "00001111" // /* MW 1 */
+ 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7583 "00000000" // /* MW 1 */
+ 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7585 "00000000" // /* MW 1 */
+ 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7587 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6 first
+.src_ref 8 "superkernels.cpp" 477 16 first
+ 7588 "10000100" // JNZ r16, #8160 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8160 delay_slots=5 */
+ 7589 "00000001" // /* MW 5 */
+ 7590 "01000000" // /* MW 4 */
+ 7591 "11110000" // /* MW 3 */
+ 7592 "00001111" // /* MW 2 */
+ 7593 "10000000" // /* MW 1 */
+.delay_slot
+ 7594 "10011000" // ST r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7595 "10010101" // /* MW 3 */
+ 7596 "11111101" // /* MW 2 */
+ 7597 "00001111" // /* MW 1 */
+.delay_slot
+ 7598 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7599 "11010101" // /* MW 3 */
+ 7600 "11110101" // /* MW 2 */
+ 7601 "00001111" // /* MW 1 */
+.delay_slot
+ 7602 "10011000" // ST p0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7603 "00011101" // /* MW 3 */
+ 7604 "11100000" // /* MW 2 */
+ 7605 "00001111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 11
+.delay_slot
+ 7606 "01000100" // MOVXM p6, #509128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7607 "10010000" // /* MW 5 */
+ 7608 "11001001" // /* MW 4 */
+ 7609 "11001100" // /* MW 3 */
+ 7610 "00000111" // /* MW 2 */
+ 7611 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 11 first
+.delay_slot
+ 7612 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7613 "00110001" // /* MW 3 */
+ 7614 "00000110" // /* MW 2 */
+ 7615 "00001110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 5 "tile.hpp" 74 8
+.src_ref 5 "tile.hpp" 74 8
+ 7616 "01110110" // MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7617 "00010000" // /* MW 11 */
+ 7618 "01110110" // /* MW 10 */
+ 7619 "00110010" // /* MW 9 */
+ 7620 "11110001" // /* MW 8 */
+ 7621 "00000001" // /* MW 7 */
+ 7622 "00000000" // /* MW 6 */
+ 7623 "10001011" // /* MW 5 */
+ 7624 "10001000" // /* MW 4 */
+ 7625 "00000111" // /* MW 3 */
+ 7626 "00110001" // /* MW 2 */
+ 7627 "00000000" // /* MW 1 */
+.src_ref 5 "tile.hpp" 74 8 first
+.src_ref 5 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7628 "00111010" // ST r17, [p2]; MOVXM p2, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7629 "00010001" // /* MW 9 */
+ 7630 "01111000" // /* MW 8 */
+ 7631 "00110010" // /* MW 7 */
+ 7632 "11110001" // /* MW 6 */
+ 7633 "00000001" // /* MW 5 */
+ 7634 "00000000" // /* MW 4 */
+ 7635 "00110000" // /* MW 3 */
+ 7636 "11000110" // /* MW 2 */
+ 7637 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 44
+.src_ref 5 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7638 "11010100" // ST.s8 r16, [p2]; MOV p6, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7639 "10000001" // /* MW 5 */
+ 7640 "11000101" // /* MW 4 */
+ 7641 "11101100" // /* MW 3 */
+ 7642 "11000000" // /* MW 2 */
+ 7643 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 480 4 first
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 7644 "00000100" // JL #2576 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2576 delay_slots=5 */
+ 7645 "00000001" // /* MW 5 */
+ 7646 "00000000" // /* MW 4 */
+ 7647 "00001000" // /* MW 3 */
+ 7648 "00000101" // /* MW 2 */
+ 7649 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 480 4
+.delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7650 "01000100" // MOVXM p0, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7651 "10000000" // /* MW 5 */
+ 7652 "11001000" // /* MW 4 */
+ 7653 "11000000" // /* MW 3 */
+ 7654 "00000111" // /* MW 2 */
+ 7655 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7659 "00000000" // /* MW 1 */
+.src_ref 5 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7660 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7661 "00110001" // /* MW 3 */
+ 7662 "00100000" // /* MW 2 */
+ 7663 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7665 "00000000" // /* MW 15 */
+ 7666 "00000000" // /* MW 14 */
+ 7667 "01111000" // /* MW 13 */
+ 7668 "10100101" // /* MW 12 */
+ 7669 "00000001" // /* MW 11 */
+ 7670 "00000000" // /* MW 10 */
+ 7671 "00000000" // /* MW 9 */
+ 7672 "00000000" // /* MW 8 */
+ 7673 "01011011" // /* MW 7 */
+ 7674 "00000001" // /* MW 6 */
+ 7675 "00100000" // /* MW 5 */
+ 7676 "00000000" // /* MW 4 */
+ 7677 "11110000" // /* MW 3 */
+ 7678 "00101100" // /* MW 2 */
+ 7679 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 51
+.src_ref 8 "superkernels.cpp" 487 47
+.return_address
+ 7680 "10111010" // MOVA r17, #0; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7681 "00010000" // /* MW 9 */
+ 7682 "00100000" // /* MW 8 */
+ 7683 "00110010" // /* MW 7 */
+ 7684 "11110001" // /* MW 6 */
+ 7685 "00000001" // /* MW 5 */
+ 7686 "00000000" // /* MW 4 */
+ 7687 "00000000" // /* MW 3 */
+ 7688 "00010001" // /* MW 2 */
+ 7689 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 18
+.src_ref 8 "superkernels.cpp" 481 51 first
+ 7690 "10111010" // LDA r14, [p2]; MOVXM p2, #509128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7691 "00010000" // /* MW 9 */
+ 7692 "01100100" // /* MW 8 */
+ 7693 "00110010" // /* MW 7 */
+ 7694 "11110001" // /* MW 6 */
+ 7695 "00000001" // /* MW 5 */
+ 7696 "00000000" // /* MW 4 */
+ 7697 "11010000" // /* MW 3 */
+ 7698 "10111010" // /* MW 2 */
+ 7699 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 18
+.src_ref 8 "superkernels.cpp" 481 85
+ 7700 "10111010" // LDA r18, [p2]; MOVXM p2, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7701 "00010000" // /* MW 9 */
+ 7702 "00100010" // /* MW 8 */
+ 7703 "00110010" // /* MW 7 */
+ 7704 "11110001" // /* MW 6 */
+ 7705 "00000001" // /* MW 5 */
+ 7706 "00000000" // /* MW 4 */
+ 7707 "11010000" // /* MW 3 */
+ 7708 "11001010" // /* MW 2 */
+ 7709 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 85
+.src_ref 8 "superkernels.cpp" 482 16
+ 7710 "10111010" // LDA r13, [p2], #4; MOVXM p3, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7711 "00010000" // /* MW 9 */
+ 7712 "01101000" // /* MW 8 */
+ 7713 "10110010" // /* MW 7 */
+ 7714 "11110001" // /* MW 6 */
+ 7715 "00000001" // /* MW 5 */
+ 7716 "00000000" // /* MW 4 */
+ 7717 "11010000" // /* MW 3 */
+ 7718 "10110110" // /* MW 2 */
+ 7719 "01000011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 16
+.src_ref 8 "superkernels.cpp" 482 40 first
+ 7720 "10111010" // LDA el0, [p2, #4]; MOVXM p1, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7721 "00010000" // /* MW 9 */
+ 7722 "01100110" // /* MW 8 */
+ 7723 "10110010" // /* MW 7 */
+ 7724 "11110000" // /* MW 6 */
+ 7725 "00000001" // /* MW 5 */
+ 7726 "00000000" // /* MW 4 */
+ 7727 "11010000" // /* MW 3 */
+ 7728 "10000101" // /* MW 2 */
+ 7729 "01000010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 120 first
+.src_ref 8 "superkernels.cpp" 483 44
+ 7730 "11010100" // LDA r15, [p2]; MOV r16, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7731 "10000001" // /* MW 5 */
+ 7732 "00111001" // /* MW 4 */
+ 7733 "11011000" // /* MW 3 */
+ 7734 "10111110" // /* MW 2 */
+ 7735 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 44
+ 7736 "00011000" // ADD.NC p2, r16, #40 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7737 "00010100" // /* MW 3 */
+ 7738 "01101000" // /* MW 2 */
+ 7739 "00011010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7740 "01000100" // MOVXM p6, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7741 "00000000" // /* MW 5 */
+ 7742 "11001010" // /* MW 4 */
+ 7743 "11001100" // /* MW 3 */
+ 7744 "00000111" // /* MW 2 */
+ 7745 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 13
+ 7746 "01000100" // MOVXM p0, #509160 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7747 "11010000" // /* MW 5 */
+ 7748 "11001001" // /* MW 4 */
+ 7749 "11000000" // /* MW 3 */
+ 7750 "00000111" // /* MW 2 */
+ 7751 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 27
+ 7752 "10011000" // MUL r18, r14, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7753 "00101111" // /* MW 3 */
+ 7754 "10100101" // /* MW 2 */
+ 7755 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7756 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7757 "00000000" // /* MW 5 */
+ 7758 "00100000" // /* MW 4 */
+ 7759 "00001000" // /* MW 3 */
+ 7760 "00000000" // /* MW 2 */
+ 7761 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 61
+.src_ref 8 "superkernels.cpp" 482 16 first
+ 7762 "01011100" // ST el0, [p3]; MUL r18, r13, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7763 "01011111" // /* MW 5 */
+ 7764 "11001010" // /* MW 4 */
+ 7765 "00110110" // /* MW 3 */
+ 7766 "10000101" // /* MW 2 */
+ 7767 "01100000" // /* MW 1 */
+ 7768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7769 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 96 first
+ 7770 "10011000" // MUL r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7771 "00101111" // /* MW 3 */
+ 7772 "11100101" // /* MW 2 */
+ 7773 "00010011" // /* MW 1 */
+ 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7775 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 16
+ 7776 "10011000" // ST r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7777 "01010001" // /* MW 3 */
+ 7778 "00000110" // /* MW 2 */
+ 7779 "00001001" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 15 first
+ 7780 "10011000" // LDA el0, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7781 "00101110" // /* MW 3 */
+ 7782 "01001100" // /* MW 2 */
+ 7783 "00000010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47 first
+ 7784 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7785 "00110001" // /* MW 3 */
+ 7786 "00011110" // /* MW 2 */
+ 7787 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7788 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7789 "00110001" // /* MW 3 */
+ 7790 "00011110" // /* MW 2 */
+ 7791 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7792 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7793 "00110001" // /* MW 3 */
+ 7794 "00011110" // /* MW 2 */
+ 7795 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7796 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7797 "00110001" // /* MW 3 */
+ 7798 "00011110" // /* MW 2 */
+ 7799 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7800 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7801 "00110001" // /* MW 3 */
+ 7802 "00011110" // /* MW 2 */
+ 7803 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7804 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7805 "00110001" // /* MW 3 */
+ 7806 "00011110" // /* MW 2 */
+ 7807 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 13 first
+ 7808 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7809 "00101001" // /* MW 3 */
+ 7810 "00000100" // /* MW 2 */
+ 7811 "00001000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47 first
+ 7812 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7813 "00110001" // /* MW 3 */
+ 7814 "00011110" // /* MW 2 */
+ 7815 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7816 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7817 "00110001" // /* MW 3 */
+ 7818 "00011110" // /* MW 2 */
+ 7819 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7820 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7821 "00110001" // /* MW 3 */
+ 7822 "00011110" // /* MW 2 */
+ 7823 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40 first
+ 7824 "10011000" // LDA r1, [p2], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7825 "00110110" // /* MW 3 */
+ 7826 "11011100" // /* MW 2 */
+ 7827 "00000010" // /* MW 1 */
+ 7828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7829 "00000000" // /* MW 1 */
+ 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7831 "00000000" // /* MW 1 */
+ 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7833 "00000000" // /* MW 1 */
+ 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7835 "00000000" // /* MW 1 */
+ 7836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7837 "00000000" // /* MW 1 */
+ 7838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7839 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7840 "10011000" // GEU r17, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7841 "00001011" // /* MW 3 */
+ 7842 "01100011" // /* MW 2 */
+ 7843 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7844 "10000100" // JNZ r17, #7920 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7920 delay_slots=5 */
+ 7845 "00000001" // /* MW 5 */
+ 7846 "01000000" // /* MW 4 */
+ 7847 "01111000" // /* MW 3 */
+ 7848 "00001111" // /* MW 2 */
+ 7849 "10001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+.delay_slot
+ 7850 "11111000" // MOV r12, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7851 "11000000" // /* MW 3 */
+ 7852 "00011110" // /* MW 2 */
+ 7853 "00011011" // /* MW 1 */
+.delay_slot
+ 7854 "10011000" // ST p2, [sp, #-40] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7855 "00011101" // /* MW 3 */
+ 7856 "11011001" // /* MW 2 */
+ 7857 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7859 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7861 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7863 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.no_stack_arguments
+ 7864 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */
+ 7865 "00000001" // /* MW 5 */
+ 7866 "00000000" // /* MW 4 */
+ 7867 "01010000" // /* MW 3 */
+ 7868 "00010101" // /* MW 2 */
+ 7869 "00000000" // /* MW 1 */
+.delay_slot
+ 7870 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7871 "10010101" // /* MW 3 */
+ 7872 "11011101" // /* MW 2 */
+ 7873 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7875 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7877 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7879 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7880 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7881 "00011100" // /* MW 7 */
+ 7882 "00000000" // /* MW 6 */
+ 7883 "00000000" // /* MW 5 */
+ 7884 "00000100" // /* MW 4 */
+ 7885 "11110000" // /* MW 3 */
+ 7886 "00101100" // /* MW 2 */
+ 7887 "00000000" // /* MW 1 */
+.return_address
+ 7888 "10000100" // J #7984 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7984 delay_slots=5 */
+ 7889 "00000000" // /* MW 5 */
+ 7890 "00000000" // /* MW 4 */
+ 7891 "10011000" // /* MW 3 */
+ 7892 "00001111" // /* MW 2 */
+ 7893 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7894 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7895 "11100000" // /* MW 5 */
+ 7896 "11001001" // /* MW 4 */
+ 7897 "11001110" // /* MW 3 */
+ 7898 "00000111" // /* MW 2 */
+ 7899 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7901 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7903 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7905 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7906 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7907 "00011100" // /* MW 13 */
+ 7908 "00000000" // /* MW 12 */
+ 7909 "00000000" // /* MW 11 */
+ 7910 "01010111" // /* MW 10 */
+ 7911 "00011010" // /* MW 9 */
+ 7912 "01000000" // /* MW 8 */
+ 7913 "00000000" // /* MW 7 */
+ 7914 "00000000" // /* MW 6 */
+ 7915 "10110110" // /* MW 5 */
+ 7916 "00000010" // /* MW 4 */
+ 7917 "11110000" // /* MW 3 */
+ 7918 "00101100" // /* MW 2 */
+ 7919 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384
+.src_ref 8 "superkernels.cpp" 491 40
+.no_stack_arguments
+ 7920 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */
+ 7921 "00000001" // /* MW 5 */
+ 7922 "00000000" // /* MW 4 */
+ 7923 "01010000" // /* MW 3 */
+ 7924 "00010101" // /* MW 2 */
+ 7925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7932 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7933 "01100111" // /* MW 3 */
+ 7934 "00000001" // /* MW 2 */
+ 7935 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7936 "11100001" // NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7937 "00000000" // /* MW 15 */
+ 7938 "00000000" // /* MW 14 */
+ 7939 "01111000" // /* MW 13 */
+ 7940 "10100101" // /* MW 12 */
+ 7941 "00000001" // /* MW 11 */
+ 7942 "00001100" // /* MW 10 */
+ 7943 "00011000" // /* MW 9 */
+ 7944 "00000010" // /* MW 8 */
+ 7945 "01011011" // /* MW 7 */
+ 7946 "00000001" // /* MW 6 */
+ 7947 "00100000" // /* MW 5 */
+ 7948 "00000000" // /* MW 4 */
+ 7949 "11110000" // /* MW 3 */
+ 7950 "00101100" // /* MW 2 */
+ 7951 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.return_address
+.no_stack_arguments
+ 7952 "00000100" // JL #12416 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12416 delay_slots=5 */
+ 7953 "00000001" // /* MW 5 */
+ 7954 "00000000" // /* MW 4 */
+ 7955 "01000000" // /* MW 3 */
+ 7956 "00011000" // /* MW 2 */
+ 7957 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7958 "11111000" // MOV r1, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7959 "00100000" // /* MW 3 */
+ 7960 "01010000" // /* MW 2 */
+ 7961 "00011000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7962 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7963 "11100000" // /* MW 5 */
+ 7964 "11001001" // /* MW 4 */
+ 7965 "11001110" // /* MW 3 */
+ 7966 "00000111" // /* MW 2 */
+ 7967 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7968 "01000100" // MOVXM r2, #1325400064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7969 "00000000" // /* MW 5 */
+ 7970 "00100000" // /* MW 4 */
+ 7971 "00000001" // /* MW 3 */
+ 7972 "00000000" // /* MW 2 */
+ 7973 "01001111" // /* MW 1 */
+.delay_slot
+ 7974 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7975 "10010101" // /* MW 3 */
+ 7976 "11011101" // /* MW 2 */
+ 7977 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7978 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7979 "00100000" // /* MW 5 */
+ 7980 "00000000" // /* MW 4 */
+ 7981 "11110000" // /* MW 3 */
+ 7982 "00101100" // /* MW 2 */
+ 7983 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 491 40
+.return_address
+ 7984 "10111010" // LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7985 "10111000" // /* MW 9 */
+ 7986 "00001000" // /* MW 8 */
+ 7987 "00000000" // /* MW 7 */
+ 7988 "00000000" // /* MW 6 */
+ 7989 "11010010" // /* MW 5 */
+ 7990 "00000010" // /* MW 4 */
+ 7991 "01010000" // /* MW 3 */
+ 7992 "11000000" // /* MW 2 */
+ 7993 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 492 38
+.src_ref 8 "superkernels.cpp" 492 38
+ 7994 "10111010" // MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7995 "01111000" // /* MW 9 */
+ 7996 "01001001" // /* MW 8 */
+ 7997 "00000000" // /* MW 7 */
+ 7998 "00001000" // /* MW 6 */
+ 7999 "10000000" // /* MW 5 */
+ 8000 "00000001" // /* MW 4 */
+ 8001 "10000000" // /* MW 3 */
+ 8002 "01000000" // /* MW 2 */
+ 8003 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 13
+.src_ref 8 "superkernels.cpp" 498 15
+ 8004 "10111010" // LDA p2, [sp, #-40]; MOVXM p3, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8005 "00010000" // /* MW 9 */
+ 8006 "01101010" // /* MW 8 */
+ 8007 "10110010" // /* MW 7 */
+ 8008 "11110001" // /* MW 6 */
+ 8009 "00000001" // /* MW 5 */
+ 8010 "00000000" // /* MW 4 */
+ 8011 "00100000" // /* MW 3 */
+ 8012 "00100011" // /* MW 2 */
+ 8013 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 14
+ 8014 "01000100" // MOVXM p1, #509144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8015 "10110000" // /* MW 5 */
+ 8016 "11001001" // /* MW 4 */
+ 8017 "11000010" // /* MW 3 */
+ 8018 "00000111" // /* MW 2 */
+ 8019 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 7
+.src_ref 8 "superkernels.cpp" 508 7
+.src_ref 8 "superkernels.cpp" 511 7
+ 8020 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8021 "10100000" // /* MW 5 */
+ 8022 "11001001" // /* MW 4 */
+ 8023 "11001110" // /* MW 3 */
+ 8024 "00000111" // /* MW 2 */
+ 8025 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8027 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 38
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 8028 "00011000" // ST.s16 r16, [p6], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8029 "00010111" // /* MW 3 */
+ 8030 "00011110" // /* MW 2 */
+ 8031 "00000110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8032 "00011000" // MOVX crRnd, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8033 "10000000" // /* MW 3 */
+ 8034 "00111010" // /* MW 2 */
+ 8035 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8036 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8037 "00010110" // /* MW 3 */
+ 8038 "01000000" // /* MW 2 */
+ 8039 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8041 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8042 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8043 "00000001" // /* MW 3 */
+ 8044 "00000001" // /* MW 2 */
+ 8045 "00011100" // /* MW 1 */
+ 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8047 "00000000" // /* MW 1 */
+ 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8049 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 492 38 first
+ 8050 "00011000" // ST.s8 r24, [p6], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8051 "00000111" // /* MW 3 */
+ 8052 "00001011" // /* MW 2 */
+ 8053 "00000110" // /* MW 1 */
+ 8054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8055 "00000000" // /* MW 1 */
+ 8056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8057 "00000000" // /* MW 1 */
+ 8058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8059 "00000000" // /* MW 1 */
+ 8060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8061 "00000000" // /* MW 1 */
+ 8062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8063 "00000000" // /* MW 1 */
+ 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8065 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 494 25 first
+ 8066 "10011000" // ST r14, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8067 "11010001" // /* MW 3 */
+ 8068 "00011101" // /* MW 2 */
+ 8069 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 495 24 first
+ 8070 "10011000" // ST r15, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8071 "11110001" // /* MW 3 */
+ 8072 "00000101" // /* MW 2 */
+ 8073 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 496 24 first
+ 8074 "10011000" // ST r13, [p6, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8075 "10110001" // /* MW 3 */
+ 8076 "00010101" // /* MW 2 */
+ 8077 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 15 first
+ 8078 "10011000" // LDA el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8079 "00101110" // /* MW 3 */
+ 8080 "00011100" // /* MW 2 */
+ 8081 "00000010" // /* MW 1 */
+ 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8083 "00000000" // /* MW 1 */
+ 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8085 "00000000" // /* MW 1 */
+ 8086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8087 "00000000" // /* MW 1 */
+ 8088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8089 "00000000" // /* MW 1 */
+ 8090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8091 "00000000" // /* MW 1 */
+ 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8093 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 13
+ 8094 "10011000" // ST el0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8095 "00101001" // /* MW 3 */
+ 8096 "00000100" // /* MW 2 */
+ 8097 "00001011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 16 first
+ 8098 "10011000" // LDA el0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8099 "00101110" // /* MW 3 */
+ 8100 "00000100" // /* MW 2 */
+ 8101 "00000010" // /* MW 1 */
+ 8102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8103 "00000000" // /* MW 1 */
+ 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8105 "00000000" // /* MW 1 */
+ 8106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8107 "00000000" // /* MW 1 */
+ 8108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8109 "00000000" // /* MW 1 */
+ 8110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8111 "00000000" // /* MW 1 */
+ 8112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8113 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 14
+ 8114 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8115 "00101001" // /* MW 3 */
+ 8116 "00000100" // /* MW 2 */
+ 8117 "00001001" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 15 first
+ 8118 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8119 "00101110" // /* MW 3 */
+ 8120 "00010100" // /* MW 2 */
+ 8121 "00000010" // /* MW 1 */
+ 8122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8123 "00000000" // /* MW 1 */
+ 8124 "10000100" // J #8176 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8176 delay_slots=5 */
+ 8125 "00000000" // /* MW 5 */
+ 8126 "00000000" // /* MW 4 */
+ 8127 "11111000" // /* MW 3 */
+ 8128 "00001111" // /* MW 2 */
+ 8129 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 13
+.delay_slot
+ 8130 "01000100" // MOVXM p0, #509148 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8131 "10111000" // /* MW 5 */
+ 8132 "11001001" // /* MW 4 */
+ 8133 "11000000" // /* MW 3 */
+ 8134 "00000111" // /* MW 2 */
+ 8135 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8137 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8139 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8140 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8141 "01100111" // /* MW 3 */
+ 8142 "00000001" // /* MW 2 */
+ 8143 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 13
+.delay_slot
+ 8144 "11100001" // NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8145 "00000000" // /* MW 15 */
+ 8146 "00000000" // /* MW 14 */
+ 8147 "01111000" // /* MW 13 */
+ 8148 "10100101" // /* MW 12 */
+ 8149 "00000001" // /* MW 11 */
+ 8150 "00000000" // /* MW 10 */
+ 8151 "00000000" // /* MW 9 */
+ 8152 "10000000" // /* MW 8 */
+ 8153 "00101001" // /* MW 7 */
+ 8154 "00000100" // /* MW 6 */
+ 8155 "00100000" // /* MW 5 */
+ 8156 "00000000" // /* MW 4 */
+ 8157 "11110000" // /* MW 3 */
+ 8158 "00101100" // /* MW 2 */
+ 8159 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624
+.src_ref 8 "superkernels.cpp" 505 7
+.src_ref 8 "superkernels.cpp" 508 7
+.src_ref 8 "superkernels.cpp" 511 7
+ 8160 "00111010" // ST p2, [sp, #-36]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8161 "00010001" // /* MW 9 */
+ 8162 "01101000" // /* MW 8 */
+ 8163 "10110010" // /* MW 7 */
+ 8164 "11110011" // /* MW 6 */
+ 8165 "00000001" // /* MW 5 */
+ 8166 "00000000" // /* MW 4 */
+ 8167 "10110000" // /* MW 3 */
+ 8168 "10100011" // /* MW 2 */
+ 8169 "11111011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+ 8170 "11010100" // NOPA; MOV r12, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8171 "10000001" // /* MW 5 */
+ 8172 "00101001" // /* MW 4 */
+ 8173 "11110110" // /* MW 3 */
+ 8174 "00101100" // /* MW 2 */
+ 8175 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640
+.src_ref 8 "superkernels.cpp" 505 7 first
+.src_ref 8 "superkernels.cpp" 505 19
+ 8176 "00101100" // LDA r16, [p7]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8177 "00001010" // /* MW 5 */
+ 8178 "01000100" // /* MW 4 */
+ 8179 "11010000" // /* MW 3 */
+ 8180 "11000010" // /* MW 2 */
+ 8181 "11100000" // /* MW 1 */
+ 8182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8183 "00000000" // /* MW 1 */
+ 8184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8185 "00000000" // /* MW 1 */
+ 8186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8187 "00000000" // /* MW 1 */
+ 8188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8189 "00000000" // /* MW 1 */
+ 8190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8191 "00000000" // /* MW 1 */
+ 8192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8193 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 19
+ 8194 "10011000" // NE r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8195 "00001000" // /* MW 3 */
+ 8196 "01100011" // /* MW 2 */
+ 8197 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 25
+ 8198 "10000100" // JNZ r17, #8368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8368 delay_slots=5 */
+ 8199 "00000001" // /* MW 5 */
+ 8200 "01000000" // /* MW 4 */
+ 8201 "01011000" // /* MW 3 */
+ 8202 "00010000" // /* MW 2 */
+ 8203 "10001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.delay_slot
+ 8204 "00011000" // ADD.NC p6, r12, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8205 "00000110" // /* MW 3 */
+ 8206 "01100110" // /* MW 2 */
+ 8207 "00011110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8209 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8211 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8213 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8215 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 29
+ 8216 "01000100" // MOVXM p2, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8217 "10001000" // /* MW 5 */
+ 8218 "11001001" // /* MW 4 */
+ 8219 "11000100" // /* MW 3 */
+ 8220 "00000111" // /* MW 2 */
+ 8221 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 29 first
+.src_ref 8 "superkernels.cpp" 505 65
+ 8222 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8223 "00010000" // /* MW 9 */
+ 8224 "00110000" // /* MW 8 */
+ 8225 "00110010" // /* MW 7 */
+ 8226 "11110001" // /* MW 6 */
+ 8227 "00000001" // /* MW 5 */
+ 8228 "00000000" // /* MW 4 */
+ 8229 "11010000" // /* MW 3 */
+ 8230 "11000010" // /* MW 2 */
+ 8231 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 65
+ 8232 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8233 "00111010" // /* MW 3 */
+ 8234 "00000100" // /* MW 2 */
+ 8235 "00000010" // /* MW 1 */
+ 8236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8237 "00000000" // /* MW 1 */
+ 8238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8239 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.no_stack_arguments
+ 8240 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8241 "00000001" // /* MW 5 */
+ 8242 "00000000" // /* MW 4 */
+ 8243 "11111000" // /* MW 3 */
+ 8244 "00010011" // /* MW 2 */
+ 8245 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8246 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8247 "00000001" // /* MW 3 */
+ 8248 "00011010" // /* MW 2 */
+ 8249 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8251 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8252 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8253 "11011010" // /* MW 3 */
+ 8254 "00110110" // /* MW 2 */
+ 8255 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8256 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8257 "01000001" // /* MW 5 */
+ 8258 "10111011" // /* MW 4 */
+ 8259 "00110111" // /* MW 3 */
+ 8260 "01100000" // /* MW 2 */
+ 8261 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8262 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8263 "00010010" // /* MW 9 */
+ 8264 "00000001" // /* MW 8 */
+ 8265 "00000100" // /* MW 7 */
+ 8266 "00000000" // /* MW 6 */
+ 8267 "01011011" // /* MW 5 */
+ 8268 "00000001" // /* MW 4 */
+ 8269 "11110000" // /* MW 3 */
+ 8270 "00101100" // /* MW 2 */
+ 8271 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.src_ref 8 "superkernels.cpp" 505 41
+.return_address
+ 8272 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8273 "01000001" // /* MW 5 */
+ 8274 "10101111" // /* MW 4 */
+ 8275 "00111101" // /* MW 3 */
+ 8276 "00000110" // /* MW 2 */
+ 8277 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+ 8278 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8279 "00000010" // /* MW 3 */
+ 8280 "11100001" // /* MW 2 */
+ 8281 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 6
+.src_ref 8 "superkernels.cpp" 505 76
+ 8282 "10000100" // JNZ r16, #8352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8352 delay_slots=5 */
+ 8283 "00000001" // /* MW 5 */
+ 8284 "01000000" // /* MW 4 */
+ 8285 "01010000" // /* MW 3 */
+ 8286 "00010000" // /* MW 2 */
+ 8287 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8289 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8293 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8295 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8297 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8298 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8299 "10000001" // /* MW 5 */
+ 8300 "11011001" // /* MW 4 */
+ 8301 "10100100" // /* MW 3 */
+ 8302 "00011111" // /* MW 2 */
+ 8303 "11111100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8304 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8305 "01110110" // /* MW 3 */
+ 8306 "11111111" // /* MW 2 */
+ 8307 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8308 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8309 "00110110" // /* MW 3 */
+ 8310 "11111110" // /* MW 2 */
+ 8311 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8312 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8313 "01010110" // /* MW 3 */
+ 8314 "11111110" // /* MW 2 */
+ 8315 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8317 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 8318 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8319 "00110110" // /* MW 3 */
+ 8320 "01000110" // /* MW 2 */
+ 8321 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8323 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8325 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8327 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8329 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8330 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8331 "00010010" // /* MW 3 */
+ 8332 "10100011" // /* MW 2 */
+ 8333 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8334 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8335 "00110001" // /* MW 3 */
+ 8336 "00000110" // /* MW 2 */
+ 8337 "00001010" // /* MW 1 */
+ 8338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8339 "00000000" // /* MW 1 */
+ 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8341 "00000000" // /* MW 1 */
+ 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8343 "00000000" // /* MW 1 */
+ 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8345 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8346 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8347 "00010000" // /* MW 5 */
+ 8348 "10100110" // /* MW 4 */
+ 8349 "11111000" // /* MW 3 */
+ 8350 "00101100" // /* MW 2 */
+ 8351 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816
+ 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8353 "00000000" // /* MW 1 */
+ 8354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8355 "00000000" // /* MW 1 */
+ 8356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8357 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 7 first
+ 8358 "10111010" // LDA r16, [p7]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8359 "01111110" // /* MW 9 */
+ 8360 "10100101" // /* MW 8 */
+ 8361 "00000001" // /* MW 7 */
+ 8362 "00000000" // /* MW 6 */
+ 8363 "00010000" // /* MW 5 */
+ 8364 "00000000" // /* MW 4 */
+ 8365 "11010000" // /* MW 3 */
+ 8366 "11000010" // /* MW 2 */
+ 8367 "11100000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832
+.src_ref 8 "superkernels.cpp" 508 19
+.src_ref 8 "superkernels.cpp" 522 6
+.src_ref 8 "superkernels.cpp" 558 19
+ 8368 "00011000" // MOVX r14, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8369 "00001001" // /* MW 3 */
+ 8370 "00011100" // /* MW 2 */
+ 8371 "00010000" // /* MW 1 */
+ 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8373 "00000000" // /* MW 1 */
+ 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8375 "00000000" // /* MW 1 */
+ 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8377 "00000000" // /* MW 1 */
+ 8378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8379 "00000000" // /* MW 1 */
+ 8380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8381 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 19
+ 8382 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8383 "00001000" // /* MW 3 */
+ 8384 "10100001" // /* MW 2 */
+ 8385 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 25
+ 8386 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */
+ 8387 "00000001" // /* MW 5 */
+ 8388 "01000000" // /* MW 4 */
+ 8389 "10110000" // /* MW 3 */
+ 8390 "00010000" // /* MW 2 */
+ 8391 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8395 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8397 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8399 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8401 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 29
+ 8402 "01000100" // MOVXM p2, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8403 "11000000" // /* MW 5 */
+ 8404 "11001001" // /* MW 4 */
+ 8405 "11000100" // /* MW 3 */
+ 8406 "00000111" // /* MW 2 */
+ 8407 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 29
+.src_ref 8 "superkernels.cpp" 508 65
+ 8408 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8409 "00010000" // /* MW 9 */
+ 8410 "00110000" // /* MW 8 */
+ 8411 "00110010" // /* MW 7 */
+ 8412 "11110001" // /* MW 6 */
+ 8413 "00000001" // /* MW 5 */
+ 8414 "00000000" // /* MW 4 */
+ 8415 "11010000" // /* MW 3 */
+ 8416 "11000010" // /* MW 2 */
+ 8417 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 65
+ 8418 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8419 "00111010" // /* MW 3 */
+ 8420 "00000100" // /* MW 2 */
+ 8421 "00000010" // /* MW 1 */
+ 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8423 "00000000" // /* MW 1 */
+ 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8425 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.no_stack_arguments
+ 8426 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8427 "00000001" // /* MW 5 */
+ 8428 "00000000" // /* MW 4 */
+ 8429 "11111000" // /* MW 3 */
+ 8430 "00010011" // /* MW 2 */
+ 8431 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8432 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8433 "00000001" // /* MW 3 */
+ 8434 "00011010" // /* MW 2 */
+ 8435 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8437 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8438 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8439 "11011010" // /* MW 3 */
+ 8440 "00110110" // /* MW 2 */
+ 8441 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8442 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8443 "01000001" // /* MW 5 */
+ 8444 "10111011" // /* MW 4 */
+ 8445 "00110111" // /* MW 3 */
+ 8446 "01100000" // /* MW 2 */
+ 8447 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8448 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8449 "00000000" // /* MW 15 */
+ 8450 "00000000" // /* MW 14 */
+ 8451 "01111000" // /* MW 13 */
+ 8452 "10100101" // /* MW 12 */
+ 8453 "00000001" // /* MW 11 */
+ 8454 "10010000" // /* MW 10 */
+ 8455 "00001000" // /* MW 9 */
+ 8456 "00100000" // /* MW 8 */
+ 8457 "01011011" // /* MW 7 */
+ 8458 "00000001" // /* MW 6 */
+ 8459 "00100000" // /* MW 5 */
+ 8460 "00000000" // /* MW 4 */
+ 8461 "11110000" // /* MW 3 */
+ 8462 "00101100" // /* MW 2 */
+ 8463 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.src_ref 8 "superkernels.cpp" 508 41
+.return_address
+ 8464 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8465 "01000001" // /* MW 5 */
+ 8466 "10101111" // /* MW 4 */
+ 8467 "00111101" // /* MW 3 */
+ 8468 "00000110" // /* MW 2 */
+ 8469 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+ 8470 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8471 "00000010" // /* MW 3 */
+ 8472 "11100001" // /* MW 2 */
+ 8473 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 6
+.src_ref 8 "superkernels.cpp" 508 76
+ 8474 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */
+ 8475 "00000001" // /* MW 5 */
+ 8476 "01000000" // /* MW 4 */
+ 8477 "10110000" // /* MW 3 */
+ 8478 "00010000" // /* MW 2 */
+ 8479 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8483 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8485 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8487 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8489 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8490 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8491 "10000001" // /* MW 5 */
+ 8492 "11011001" // /* MW 4 */
+ 8493 "10100100" // /* MW 3 */
+ 8494 "00011111" // /* MW 2 */
+ 8495 "11111100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8496 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8497 "01110110" // /* MW 3 */
+ 8498 "11111111" // /* MW 2 */
+ 8499 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8500 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8501 "00110110" // /* MW 3 */
+ 8502 "11111110" // /* MW 2 */
+ 8503 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8504 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8505 "01010110" // /* MW 3 */
+ 8506 "11111110" // /* MW 2 */
+ 8507 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 8508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8509 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 8510 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8511 "00110110" // /* MW 3 */
+ 8512 "01000110" // /* MW 2 */
+ 8513 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8515 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8517 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8519 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8521 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8522 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8523 "00010010" // /* MW 3 */
+ 8524 "10100011" // /* MW 2 */
+ 8525 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8526 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8527 "00110001" // /* MW 3 */
+ 8528 "00000110" // /* MW 2 */
+ 8529 "00001010" // /* MW 1 */
+ 8530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8531 "00000000" // /* MW 1 */
+ 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8533 "00000000" // /* MW 1 */
+ 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8535 "00000000" // /* MW 1 */
+ 8536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8537 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8538 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8539 "00010000" // /* MW 5 */
+ 8540 "10100110" // /* MW 4 */
+ 8541 "11111000" // /* MW 3 */
+ 8542 "00101100" // /* MW 2 */
+ 8543 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008
+ 8544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8545 "00000000" // /* MW 1 */
+ 8546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8547 "00000000" // /* MW 1 */
+ 8548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8549 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 7 first
+.src_ref 8 "superkernels.cpp" 511 29
+ 8550 "10111010" // LDA r16, [p7]; MOVXM p7, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8551 "00010000" // /* MW 9 */
+ 8552 "01110010" // /* MW 8 */
+ 8553 "10110010" // /* MW 7 */
+ 8554 "11110011" // /* MW 6 */
+ 8555 "00000001" // /* MW 5 */
+ 8556 "00000000" // /* MW 4 */
+ 8557 "11010000" // /* MW 3 */
+ 8558 "11000010" // /* MW 2 */
+ 8559 "11100000" // /* MW 1 */
+ 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8561 "00000000" // /* MW 1 */
+ 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8563 "00000000" // /* MW 1 */
+ 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8565 "00000000" // /* MW 1 */
+ 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8567 "00000000" // /* MW 1 */
+ 8568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8569 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 19
+ 8570 "00011000" // MOVX r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8571 "00010001" // /* MW 3 */
+ 8572 "00100100" // /* MW 2 */
+ 8573 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 19
+ 8574 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8575 "00001000" // /* MW 3 */
+ 8576 "10100001" // /* MW 2 */
+ 8577 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 25
+ 8578 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */
+ 8579 "00000001" // /* MW 5 */
+ 8580 "01000000" // /* MW 4 */
+ 8581 "00100000" // /* MW 3 */
+ 8582 "00010001" // /* MW 2 */
+ 8583 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 66
+.delay_slot
+ 8584 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8585 "11000000" // /* MW 5 */
+ 8586 "11001000" // /* MW 4 */
+ 8587 "11000100" // /* MW 3 */
+ 8588 "00000111" // /* MW 2 */
+ 8589 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8591 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8593 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8595 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8596 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8597 "00000001" // /* MW 3 */
+ 8598 "00100010" // /* MW 2 */
+ 8599 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 29
+.src_ref 8 "superkernels.cpp" 511 42
+ 8600 "00101100" // LDA r16, [p7]; MOVX r13, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8601 "00000010" // /* MW 5 */
+ 8602 "00110100" // /* MW 4 */
+ 8603 "11010000" // /* MW 3 */
+ 8604 "11000010" // /* MW 2 */
+ 8605 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 66
+ 8606 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8607 "00111010" // /* MW 3 */
+ 8608 "00000100" // /* MW 2 */
+ 8609 "00000010" // /* MW 1 */
+ 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8611 "00000000" // /* MW 1 */
+ 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8613 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.no_stack_arguments
+ 8614 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8615 "00000001" // /* MW 5 */
+ 8616 "00000000" // /* MW 4 */
+ 8617 "11111000" // /* MW 3 */
+ 8618 "00010011" // /* MW 2 */
+ 8619 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8621 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8623 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8624 "10011000" // LT r27, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8625 "00011010" // /* MW 3 */
+ 8626 "00110111" // /* MW 2 */
+ 8627 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8628 "11100100" // SUB r17, r17, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8629 "01000001" // /* MW 5 */
+ 8630 "10111011" // /* MW 4 */
+ 8631 "00110111" // /* MW 3 */
+ 8632 "01100000" // /* MW 2 */
+ 8633 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8634 "00101100" // NOPA; SEL.EQZ r0, r16, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8635 "00100100" // /* MW 5 */
+ 8636 "00000010" // /* MW 4 */
+ 8637 "11111000" // /* MW 3 */
+ 8638 "00101100" // /* MW 2 */
+ 8639 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.src_ref 8 "superkernels.cpp" 511 42
+.return_address
+ 8640 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8641 "01000001" // /* MW 5 */
+ 8642 "10101111" // /* MW 4 */
+ 8643 "00111101" // /* MW 3 */
+ 8644 "00000110" // /* MW 2 */
+ 8645 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+ 8646 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8647 "00000010" // /* MW 3 */
+ 8648 "11100001" // /* MW 2 */
+ 8649 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 6
+.src_ref 8 "superkernels.cpp" 511 77
+ 8650 "10000100" // JNZ r16, #8736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8736 delay_slots=5 */
+ 8651 "00000001" // /* MW 5 */
+ 8652 "01000000" // /* MW 4 */
+ 8653 "00010000" // /* MW 3 */
+ 8654 "00010001" // /* MW 2 */
+ 8655 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8659 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8661 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8663 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8665 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8666 "10111010" // LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8667 "01011000" // /* MW 9 */
+ 8668 "00000001" // /* MW 8 */
+ 8669 "00001000" // /* MW 7 */
+ 8670 "11101010" // /* MW 6 */
+ 8671 "00010111" // /* MW 5 */
+ 8672 "00111111" // /* MW 4 */
+ 8673 "11010000" // /* MW 3 */
+ 8674 "11101110" // /* MW 2 */
+ 8675 "11011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8676 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8677 "01010110" // /* MW 3 */
+ 8678 "11111110" // /* MW 2 */
+ 8679 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8680 "10011000" // LDA r19, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8681 "01110110" // /* MW 3 */
+ 8682 "11111110" // /* MW 2 */
+ 8683 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 8684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8685 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 8686 "10011000" // LDA r18, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8687 "01010110" // /* MW 3 */
+ 8688 "01000110" // /* MW 2 */
+ 8689 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8691 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8693 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8695 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8697 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8698 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8699 "00100010" // /* MW 3 */
+ 8700 "11100101" // /* MW 2 */
+ 8701 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8702 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8703 "01010001" // /* MW 3 */
+ 8704 "00000110" // /* MW 2 */
+ 8705 "00001110" // /* MW 1 */
+ 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8707 "00000000" // /* MW 1 */
+ 8708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8709 "00000000" // /* MW 1 */
+ 8710 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */
+ 8711 "00000000" // /* MW 5 */
+ 8712 "00000000" // /* MW 4 */
+ 8713 "00101000" // /* MW 3 */
+ 8714 "00010001" // /* MW 2 */
+ 8715 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8717 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+.delay_slot
+ 8718 "00011000" // ACQ r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8719 "00011000" // /* MW 3 */
+ 8720 "10010011" // /* MW 2 */
+ 8721 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8723 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8726 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8727 "01111110" // /* MW 9 */
+ 8728 "10100101" // /* MW 8 */
+ 8729 "00000001" // /* MW 7 */
+ 8730 "00000000" // /* MW 6 */
+ 8731 "00010000" // /* MW 5 */
+ 8732 "00000000" // /* MW 4 */
+ 8733 "11110000" // /* MW 3 */
+ 8734 "00101100" // /* MW 2 */
+ 8735 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200
+ 8736 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */
+ 8737 "00000000" // /* MW 5 */
+ 8738 "00000000" // /* MW 4 */
+ 8739 "00101000" // /* MW 3 */
+ 8740 "00010001" // /* MW 2 */
+ 8741 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+.delay_slot
+ 8742 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8743 "00000101" // /* MW 3 */
+ 8744 "00100000" // /* MW 2 */
+ 8745 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8747 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8749 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8751 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8753 "00000000" // /* MW 15 */
+ 8754 "00000000" // /* MW 14 */
+ 8755 "01111000" // /* MW 13 */
+ 8756 "10100101" // /* MW 12 */
+ 8757 "00000001" // /* MW 11 */
+ 8758 "00000000" // /* MW 10 */
+ 8759 "00000000" // /* MW 9 */
+ 8760 "00000000" // /* MW 8 */
+ 8761 "01011011" // /* MW 7 */
+ 8762 "00000001" // /* MW 6 */
+ 8763 "00100000" // /* MW 5 */
+ 8764 "00000000" // /* MW 4 */
+ 8765 "11110000" // /* MW 3 */
+ 8766 "00101100" // /* MW 2 */
+ 8767 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+ 8768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8769 "00000000" // /* MW 15 */
+ 8770 "00000000" // /* MW 14 */
+ 8771 "01111000" // /* MW 13 */
+ 8772 "10100101" // /* MW 12 */
+ 8773 "00000001" // /* MW 11 */
+ 8774 "00101000" // /* MW 10 */
+ 8775 "00000000" // /* MW 9 */
+ 8776 "00000001" // /* MW 8 */
+ 8777 "01011011" // /* MW 7 */
+ 8778 "00000001" // /* MW 6 */
+ 8779 "00100000" // /* MW 5 */
+ 8780 "00000000" // /* MW 4 */
+ 8781 "11110000" // /* MW 3 */
+ 8782 "00101100" // /* MW 2 */
+ 8783 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248
+.src_ref 8 "superkernels.cpp" 516 47
+.src_ref 1 "io_buffer_main.h" 125 25
+ 8784 "10111010" // LDA p7, [sp, #-32]; MOVXM p6, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8785 "00010000" // /* MW 9 */
+ 8786 "01100110" // /* MW 8 */
+ 8787 "00110010" // /* MW 7 */
+ 8788 "11110011" // /* MW 6 */
+ 8789 "00000001" // /* MW 5 */
+ 8790 "00000000" // /* MW 4 */
+ 8791 "00100000" // /* MW 3 */
+ 8792 "01110011" // /* MW 2 */
+ 8793 "11111100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 47 first
+.src_ref 8 "superkernels.cpp" 522 6
+ 8794 "10111010" // LDA r21, [p6]; MOVXM p2, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8795 "00010000" // /* MW 9 */
+ 8796 "01101000" // /* MW 8 */
+ 8797 "00110010" // /* MW 7 */
+ 8798 "11110001" // /* MW 6 */
+ 8799 "00000001" // /* MW 5 */
+ 8800 "00000000" // /* MW 4 */
+ 8801 "11010000" // /* MW 3 */
+ 8802 "11010110" // /* MW 2 */
+ 8803 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8804 "10111010" // LDA r17, [p2]; MOVXM p6, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8805 "00010000" // /* MW 9 */
+ 8806 "01100000" // /* MW 8 */
+ 8807 "00110010" // /* MW 7 */
+ 8808 "11110011" // /* MW 6 */
+ 8809 "00000001" // /* MW 5 */
+ 8810 "00000000" // /* MW 4 */
+ 8811 "11010000" // /* MW 3 */
+ 8812 "11000110" // /* MW 2 */
+ 8813 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2 first
+ 8814 "10011000" // LDA r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8815 "10010110" // /* MW 3 */
+ 8816 "00000110" // /* MW 2 */
+ 8817 "00000110" // /* MW 1 */
+ 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8819 "00000000" // /* MW 1 */
+ 8820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8821 "00000000" // /* MW 1 */
+ 8822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8823 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 8824 "10011000" // LDA r19, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8825 "01110110" // /* MW 3 */
+ 8826 "00000110" // /* MW 2 */
+ 8827 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45 first
+ 8828 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8829 "00001101" // /* MW 3 */
+ 8830 "01101011" // /* MW 2 */
+ 8831 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8832 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8833 "00000111" // /* MW 3 */
+ 8834 "01100001" // /* MW 2 */
+ 8835 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8836 "10000100" // JNZ r16, #9232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9232 delay_slots=5 */
+ 8837 "00000001" // /* MW 5 */
+ 8838 "01000000" // /* MW 4 */
+ 8839 "00001000" // /* MW 3 */
+ 8840 "00010010" // /* MW 2 */
+ 8841 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2 first
+.delay_slot
+ 8842 "00011000" // ADD r20, r20, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8843 "00000111" // /* MW 3 */
+ 8844 "00101000" // /* MW 2 */
+ 8845 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2
+.delay_slot
+ 8846 "10011000" // ST r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8847 "10010001" // /* MW 3 */
+ 8848 "00000110" // /* MW 2 */
+ 8849 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8851 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45 first
+.delay_slot
+ 8852 "01011000" // ADD.NC p0, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8853 "11010101" // /* MW 3 */
+ 8854 "01101001" // /* MW 2 */
+ 8855 "00011000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 12
+.src_ref 8 "superkernels.cpp" 522 6
+.delay_slot
+ 8856 "01011100" // ST p0, [sp, #-68]; MOVX r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8857 "00100010" // /* MW 5 */
+ 8858 "01001000" // /* MW 4 */
+ 8859 "10110000" // /* MW 3 */
+ 8860 "10000011" // /* MW 2 */
+ 8861 "11110111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8862 "10011000" // EQ r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8863 "00100111" // /* MW 3 */
+ 8864 "01100001" // /* MW 2 */
+ 8865 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8866 "10000100" // JNZ r16, #9088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9088 delay_slots=5 */
+ 8867 "00000001" // /* MW 5 */
+ 8868 "01000000" // /* MW 4 */
+ 8869 "11000000" // /* MW 3 */
+ 8870 "00010001" // /* MW 2 */
+ 8871 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8873 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8875 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8877 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8879 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8881 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8882 "10011000" // NE r16, r17, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8883 "11101000" // /* MW 3 */
+ 8884 "01100000" // /* MW 2 */
+ 8885 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8886 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */
+ 8887 "00000001" // /* MW 5 */
+ 8888 "01000000" // /* MW 4 */
+ 8889 "10101000" // /* MW 3 */
+ 8890 "00010001" // /* MW 2 */
+ 8891 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 26
+.delay_slot
+ 8892 "01000100" // MOVXM p6, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8893 "11000000" // /* MW 5 */
+ 8894 "11001001" // /* MW 4 */
+ 8895 "11001100" // /* MW 3 */
+ 8896 "00000111" // /* MW 2 */
+ 8897 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8899 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8901 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8903 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8905 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 26 first
+.src_ref 8 "superkernels.cpp" 523 61
+ 8906 "10111010" // LDA r18, [p6]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8907 "00010000" // /* MW 9 */
+ 8908 "00100100" // /* MW 8 */
+ 8909 "00110010" // /* MW 7 */
+ 8910 "11110011" // /* MW 6 */
+ 8911 "00000001" // /* MW 5 */
+ 8912 "00000000" // /* MW 4 */
+ 8913 "11010000" // /* MW 3 */
+ 8914 "11001010" // /* MW 2 */
+ 8915 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 61
+.src_ref 8 "superkernels.cpp" 524 44
+ 8916 "10111010" // LDA r16, [p6]; MOVXM p6, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8917 "00010000" // /* MW 9 */
+ 8918 "01101010" // /* MW 8 */
+ 8919 "00110010" // /* MW 7 */
+ 8920 "11110011" // /* MW 6 */
+ 8921 "00000001" // /* MW 5 */
+ 8922 "00000000" // /* MW 4 */
+ 8923 "11010000" // /* MW 3 */
+ 8924 "11000010" // /* MW 2 */
+ 8925 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+.src_ref 8 "superkernels.cpp" 524 44 first
+ 8926 "00101100" // LDA r17, [p6]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8927 "00000010" // /* MW 5 */
+ 8928 "01100000" // /* MW 4 */
+ 8929 "11010000" // /* MW 3 */
+ 8930 "11000110" // /* MW 2 */
+ 8931 "11000000" // /* MW 1 */
+ 8932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8933 "00000000" // /* MW 1 */
+ 8934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8935 "00000000" // /* MW 1 */
+ 8936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8937 "00000000" // /* MW 1 */
+ 8938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8939 "00000000" // /* MW 1 */
+ 8940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8941 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 37 first
+ 8942 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8943 "00001111" // /* MW 3 */
+ 8944 "10100101" // /* MW 2 */
+ 8945 "00010100" // /* MW 1 */
+ 8946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8947 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30 first
+.src_ref 8 "superkernels.cpp" 524 30 first
+ 8948 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8949 "10000010" // /* MW 5 */
+ 8950 "00110010" // /* MW 4 */
+ 8951 "00111010" // /* MW 3 */
+ 8952 "11100100" // /* MW 2 */
+ 8953 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8954 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8955 "00011100" // /* MW 3 */
+ 8956 "00110111" // /* MW 2 */
+ 8957 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8958 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8959 "00000010" // /* MW 3 */
+ 8960 "11100111" // /* MW 2 */
+ 8961 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 42
+ 8962 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8963 "00011100" // /* MW 3 */
+ 8964 "10110111" // /* MW 2 */
+ 8965 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8966 "00011000" // SEL.EQZ r17, r24, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8967 "00110010" // /* MW 3 */
+ 8968 "00100011" // /* MW 2 */
+ 8969 "00010110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 65 first
+ 8970 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8971 "00010001" // /* MW 3 */
+ 8972 "00100101" // /* MW 2 */
+ 8973 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 526 36 first
+ 8974 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8975 "00001000" // /* MW 3 */
+ 8976 "01100001" // /* MW 2 */
+ 8977 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 8978 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */
+ 8979 "00000001" // /* MW 5 */
+ 8980 "01000000" // /* MW 4 */
+ 8981 "01000000" // /* MW 3 */
+ 8982 "00010010" // /* MW 2 */
+ 8983 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 32
+.delay_slot
+ 8984 "01000100" // MOVXM p6, #509200 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8985 "00100000" // /* MW 5 */
+ 8986 "11001010" // /* MW 4 */
+ 8987 "11001100" // /* MW 3 */
+ 8988 "00000111" // /* MW 2 */
+ 8989 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 32 first
+.delay_slot
+ 8990 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8991 "01010001" // /* MW 3 */
+ 8992 "00000110" // /* MW 2 */
+ 8993 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8995 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8997 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8999 "00000000" // /* MW 1 */
+ 9000 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */
+ 9001 "00000000" // /* MW 5 */
+ 9002 "00000000" // /* MW 4 */
+ 9003 "11111000" // /* MW 3 */
+ 9004 "00010001" // /* MW 2 */
+ 9005 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.src_ref 8 "superkernels.cpp" 558 19
+.delay_slot
+ 9006 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9007 "00010000" // /* MW 9 */
+ 9008 "01101000" // /* MW 8 */
+ 9009 "10110010" // /* MW 7 */
+ 9010 "11110011" // /* MW 6 */
+ 9011 "00000001" // /* MW 5 */
+ 9012 "00000000" // /* MW 4 */
+ 9013 "00000000" // /* MW 3 */
+ 9014 "01001110" // /* MW 2 */
+ 9015 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9016 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9017 "00010000" // /* MW 9 */
+ 9018 "00100000" // /* MW 8 */
+ 9019 "00110010" // /* MW 7 */
+ 9020 "11110001" // /* MW 6 */
+ 9021 "00000001" // /* MW 5 */
+ 9022 "00000000" // /* MW 4 */
+ 9023 "00000000" // /* MW 3 */
+ 9024 "00101111" // /* MW 2 */
+ 9025 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9026 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9027 "00000001" // /* MW 3 */
+ 9028 "00011010" // /* MW 2 */
+ 9029 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9031 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9032 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9033 "00011100" // /* MW 7 */
+ 9034 "00000000" // /* MW 6 */
+ 9035 "00000000" // /* MW 5 */
+ 9036 "00000100" // /* MW 4 */
+ 9037 "11110000" // /* MW 3 */
+ 9038 "00101100" // /* MW 2 */
+ 9039 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504
+ 9040 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */
+ 9041 "00000000" // /* MW 5 */
+ 9042 "00000000" // /* MW 4 */
+ 9043 "11111000" // /* MW 3 */
+ 9044 "00010001" // /* MW 2 */
+ 9045 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.src_ref 8 "superkernels.cpp" 558 19
+.delay_slot
+ 9046 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9047 "00010000" // /* MW 9 */
+ 9048 "01101000" // /* MW 8 */
+ 9049 "10110010" // /* MW 7 */
+ 9050 "11110011" // /* MW 6 */
+ 9051 "00000001" // /* MW 5 */
+ 9052 "00000000" // /* MW 4 */
+ 9053 "00000000" // /* MW 3 */
+ 9054 "01001110" // /* MW 2 */
+ 9055 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9056 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9057 "00010000" // /* MW 9 */
+ 9058 "00100000" // /* MW 8 */
+ 9059 "00110010" // /* MW 7 */
+ 9060 "11110001" // /* MW 6 */
+ 9061 "00000001" // /* MW 5 */
+ 9062 "00000000" // /* MW 4 */
+ 9063 "00000000" // /* MW 3 */
+ 9064 "00101111" // /* MW 2 */
+ 9065 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9066 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9067 "00000001" // /* MW 3 */
+ 9068 "00011010" // /* MW 2 */
+ 9069 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9073 "00000000" // /* MW 15 */
+ 9074 "00000000" // /* MW 14 */
+ 9075 "01111000" // /* MW 13 */
+ 9076 "10100101" // /* MW 12 */
+ 9077 "00000001" // /* MW 11 */
+ 9078 "00000000" // /* MW 10 */
+ 9079 "00000000" // /* MW 9 */
+ 9080 "00000000" // /* MW 8 */
+ 9081 "01011011" // /* MW 7 */
+ 9082 "00000001" // /* MW 6 */
+ 9083 "00100000" // /* MW 5 */
+ 9084 "00000000" // /* MW 4 */
+ 9085 "11110000" // /* MW 3 */
+ 9086 "00101100" // /* MW 2 */
+ 9087 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552
+.src_ref 8 "superkernels.cpp" 532 27
+.src_ref 8 "superkernels.cpp" 533 31
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+ 9088 "10111010" // MOVA r13, #0; MOVXM p6, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9089 "00010000" // /* MW 9 */
+ 9090 "01110010" // /* MW 8 */
+ 9091 "00110010" // /* MW 7 */
+ 9092 "11110011" // /* MW 6 */
+ 9093 "00000001" // /* MW 5 */
+ 9094 "00000000" // /* MW 4 */
+ 9095 "00000000" // /* MW 3 */
+ 9096 "00001101" // /* MW 2 */
+ 9097 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 27 first
+.src_ref 8 "superkernels.cpp" 532 63
+.src_ref 8 "superkernels.cpp" 552 2
+ 9098 "10111010" // LDA r18, [p6]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9099 "00010000" // /* MW 9 */
+ 9100 "00100000" // /* MW 8 */
+ 9101 "00110010" // /* MW 7 */
+ 9102 "11110001" // /* MW 6 */
+ 9103 "00000001" // /* MW 5 */
+ 9104 "00000000" // /* MW 4 */
+ 9105 "11010000" // /* MW 3 */
+ 9106 "11001010" // /* MW 2 */
+ 9107 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 63
+.src_ref 8 "superkernels.cpp" 533 46
+ 9108 "10111010" // LDA r16, [p2]; MOVXM p6, #509144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9109 "00010000" // /* MW 9 */
+ 9110 "01101100" // /* MW 8 */
+ 9111 "00110010" // /* MW 7 */
+ 9112 "11110011" // /* MW 6 */
+ 9113 "00000001" // /* MW 5 */
+ 9114 "00000000" // /* MW 4 */
+ 9115 "11010000" // /* MW 3 */
+ 9116 "11000010" // /* MW 2 */
+ 9117 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 46 first
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+ 9118 "00101100" // LDA r17, [p6]; MOVX r15, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9119 "00001010" // /* MW 5 */
+ 9120 "00111100" // /* MW 4 */
+ 9121 "11010000" // /* MW 3 */
+ 9122 "11000110" // /* MW 2 */
+ 9123 "11000000" // /* MW 1 */
+ 9124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9125 "00000000" // /* MW 1 */
+ 9126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9127 "00000000" // /* MW 1 */
+ 9128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9129 "00000000" // /* MW 1 */
+ 9130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9131 "00000000" // /* MW 1 */
+ 9132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9133 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 39 first
+ 9134 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9135 "00001111" // /* MW 3 */
+ 9136 "10100101" // /* MW 2 */
+ 9137 "00010100" // /* MW 1 */
+ 9138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9139 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31 first
+.src_ref 8 "superkernels.cpp" 533 31 first
+ 9140 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9141 "10000010" // /* MW 5 */
+ 9142 "00110010" // /* MW 4 */
+ 9143 "00111010" // /* MW 3 */
+ 9144 "11100100" // /* MW 2 */
+ 9145 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9146 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9147 "00011100" // /* MW 3 */
+ 9148 "00110111" // /* MW 2 */
+ 9149 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9150 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9151 "00000010" // /* MW 3 */
+ 9152 "11100111" // /* MW 2 */
+ 9153 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 44
+ 9154 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9155 "00011100" // /* MW 3 */
+ 9156 "10110111" // /* MW 2 */
+ 9157 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9158 "00011000" // SEL.EQZ r17, r13, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9159 "00110010" // /* MW 3 */
+ 9160 "01100011" // /* MW 2 */
+ 9161 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 67 first
+ 9162 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9163 "00010001" // /* MW 3 */
+ 9164 "00100101" // /* MW 2 */
+ 9165 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 535 37 first
+ 9166 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9167 "00001000" // /* MW 3 */
+ 9168 "01100001" // /* MW 2 */
+ 9169 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 9170 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */
+ 9171 "00000001" // /* MW 5 */
+ 9172 "01000000" // /* MW 4 */
+ 9173 "01000000" // /* MW 3 */
+ 9174 "00010010" // /* MW 2 */
+ 9175 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 33
+.delay_slot
+ 9176 "01000100" // MOVXM p6, #509208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9177 "00110000" // /* MW 5 */
+ 9178 "11001010" // /* MW 4 */
+ 9179 "11001100" // /* MW 3 */
+ 9180 "00000111" // /* MW 2 */
+ 9181 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 33 first
+.delay_slot
+ 9182 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9183 "01010001" // /* MW 3 */
+ 9184 "00000110" // /* MW 2 */
+ 9185 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9187 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9189 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.delay_slot
+ 9190 "10111010" // NOPA; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9191 "00010000" // /* MW 9 */
+ 9192 "01101000" // /* MW 8 */
+ 9193 "10110010" // /* MW 7 */
+ 9194 "11110011" // /* MW 6 */
+ 9195 "00000001" // /* MW 5 */
+ 9196 "00000000" // /* MW 4 */
+ 9197 "11110000" // /* MW 3 */
+ 9198 "00101100" // /* MW 2 */
+ 9199 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9200 "00111010" // MOVS p6, r12; J #9408 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */
+ 9201 "00100001" // /* MW 9 */
+ 9202 "00000000" // /* MW 8 */
+ 9203 "00000000" // /* MW 7 */
+ 9204 "10011000" // /* MW 6 */
+ 9205 "00000100" // /* MW 5 */
+ 9206 "00000000" // /* MW 4 */
+ 9207 "01100000" // /* MW 3 */
+ 9208 "10000001" // /* MW 2 */
+ 9209 "11010001" // /* MW 1 */
+.delay_slot
+ 9210 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9211 "10010001" // /* MW 3 */
+ 9212 "11100101" // /* MW 2 */
+ 9213 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9215 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9217 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9219 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9220 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9221 "10000001" // /* MW 11 */
+ 9222 "10101101" // /* MW 10 */
+ 9223 "00000000" // /* MW 9 */
+ 9224 "00000000" // /* MW 8 */
+ 9225 "00000000" // /* MW 7 */
+ 9226 "00000000" // /* MW 6 */
+ 9227 "00100000" // /* MW 5 */
+ 9228 "00000000" // /* MW 4 */
+ 9229 "11110000" // /* MW 3 */
+ 9230 "00101100" // /* MW 2 */
+ 9231 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696
+.src_ref 8 "superkernels.cpp" 541 26
+ 9232 "01000100" // MOVXM p6, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9233 "10001000" // /* MW 5 */
+ 9234 "11001001" // /* MW 4 */
+ 9235 "11001100" // /* MW 3 */
+ 9236 "00000111" // /* MW 2 */
+ 9237 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 26 first
+.src_ref 8 "superkernels.cpp" 541 61
+ 9238 "10111010" // LDA r19, [p6]; MOVXM p6, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9239 "00010000" // /* MW 9 */
+ 9240 "00100010" // /* MW 8 */
+ 9241 "00110010" // /* MW 7 */
+ 9242 "11110011" // /* MW 6 */
+ 9243 "00000001" // /* MW 5 */
+ 9244 "00000000" // /* MW 4 */
+ 9245 "11010000" // /* MW 3 */
+ 9246 "11001110" // /* MW 2 */
+ 9247 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 61
+.src_ref 8 "superkernels.cpp" 542 44
+ 9248 "10111010" // LDA r16, [p6]; MOVXM p6, #509148 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9249 "00010000" // /* MW 9 */
+ 9250 "01101110" // /* MW 8 */
+ 9251 "00110010" // /* MW 7 */
+ 9252 "11110011" // /* MW 6 */
+ 9253 "00000001" // /* MW 5 */
+ 9254 "00000000" // /* MW 4 */
+ 9255 "11010000" // /* MW 3 */
+ 9256 "11000010" // /* MW 2 */
+ 9257 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 44 first
+ 9258 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9259 "01010110" // /* MW 3 */
+ 9260 "00000110" // /* MW 2 */
+ 9261 "00000110" // /* MW 1 */
+ 9262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9263 "00000000" // /* MW 1 */
+ 9264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9265 "00000000" // /* MW 1 */
+ 9266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9267 "00000000" // /* MW 1 */
+ 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9269 "00000000" // /* MW 1 */
+ 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9271 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 37 first
+ 9272 "10011000" // MUL r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9273 "00001111" // /* MW 3 */
+ 9274 "11100111" // /* MW 2 */
+ 9275 "00010100" // /* MW 1 */
+ 9276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9277 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30 first
+.src_ref 8 "superkernels.cpp" 542 30 first
+ 9278 "10100100" // SUB r20, r18, r19; ADD.NC r21, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9279 "10000010" // /* MW 5 */
+ 9280 "10110011" // /* MW 4 */
+ 9281 "00111010" // /* MW 3 */
+ 9282 "00100110" // /* MW 2 */
+ 9283 "10010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9284 "10011000" // LTU r27, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9285 "00101100" // /* MW 3 */
+ 9286 "01110111" // /* MW 2 */
+ 9287 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9288 "00011000" // SEL.EQZ r20, r20, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9289 "00000010" // /* MW 3 */
+ 9290 "00101001" // /* MW 2 */
+ 9291 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+.src_ref 8 "superkernels.cpp" 542 42
+ 9292 "01100100" // LTU r27, r19, r18; MOV r17, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9293 "00000001" // /* MW 5 */
+ 9294 "10100000" // /* MW 4 */
+ 9295 "10011000" // /* MW 3 */
+ 9296 "11100101" // /* MW 2 */
+ 9297 "10011110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9298 "00011000" // SEL.EQZ r17, r17, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9299 "01000010" // /* MW 3 */
+ 9300 "01100011" // /* MW 2 */
+ 9301 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 69 first
+ 9302 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9303 "00010001" // /* MW 3 */
+ 9304 "00100101" // /* MW 2 */
+ 9305 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 544 38 first
+ 9306 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9307 "00000111" // /* MW 3 */
+ 9308 "01100001" // /* MW 2 */
+ 9309 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 9310 "10000100" // JNZ r16, #10176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10176 delay_slots=5 */
+ 9311 "00000001" // /* MW 5 */
+ 9312 "01000000" // /* MW 4 */
+ 9313 "11100000" // /* MW 3 */
+ 9314 "00010011" // /* MW 2 */
+ 9315 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 34
+.delay_slot
+ 9316 "01000100" // MOVXM p6, #509216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9317 "01000000" // /* MW 5 */
+ 9318 "11001010" // /* MW 4 */
+ 9319 "11001100" // /* MW 3 */
+ 9320 "00000111" // /* MW 2 */
+ 9321 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 34 first
+.delay_slot
+ 9322 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9323 "01010001" // /* MW 3 */
+ 9324 "00000110" // /* MW 2 */
+ 9325 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9327 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9329 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 9330 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 9331 "00011100" // /* MW 13 */
+ 9332 "00000000" // /* MW 12 */
+ 9333 "00000000" // /* MW 11 */
+ 9334 "01010111" // /* MW 10 */
+ 9335 "00011010" // /* MW 9 */
+ 9336 "01000000" // /* MW 8 */
+ 9337 "00000000" // /* MW 7 */
+ 9338 "00000000" // /* MW 6 */
+ 9339 "10110110" // /* MW 5 */
+ 9340 "00000010" // /* MW 4 */
+ 9341 "11110000" // /* MW 3 */
+ 9342 "00101100" // /* MW 2 */
+ 9343 "00000000" // /* MW 1 */
+.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 8 "superkernels.cpp" 558 19
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 9344 "01110110" // LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9345 "01011000" // /* MW 11 */
+ 9346 "00000001" // /* MW 10 */
+ 9347 "11101000" // /* MW 9 */
+ 9348 "01001001" // /* MW 8 */
+ 9349 "11100000" // /* MW 7 */
+ 9350 "00000000" // /* MW 6 */
+ 9351 "00001011" // /* MW 5 */
+ 9352 "10001100" // /* MW 4 */
+ 9353 "00100110" // /* MW 3 */
+ 9354 "10000011" // /* MW 2 */
+ 9355 "11110111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9356 "00011000" // LDA p1, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9357 "10011001" // /* MW 3 */
+ 9358 "10111100" // /* MW 2 */
+ 9359 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9360 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9361 "10010001" // /* MW 3 */
+ 9362 "11100101" // /* MW 2 */
+ 9363 "00000111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 287 11 first
+.aggressive_scheduled_block_id 7
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9364 "00000100" // JL #4176 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4176 delay_slots=5 */
+ 9365 "00000001" // /* MW 5 */
+ 9366 "00000000" // /* MW 4 */
+ 9367 "00101000" // /* MW 3 */
+ 9368 "00001000" // /* MW 2 */
+ 9369 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9370 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9371 "11000000" // /* MW 3 */
+ 9372 "01100000" // /* MW 2 */
+ 9373 "00011111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9374 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9375 "00000001" // /* MW 3 */
+ 9376 "00011010" // /* MW 2 */
+ 9377 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9379 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9381 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 287 11
+.delay_slot
+ 9382 "10111010" // NOPA; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9383 "00010000" // /* MW 9 */
+ 9384 "10000000" // /* MW 8 */
+ 9385 "00110010" // /* MW 7 */
+ 9386 "11110001" // /* MW 6 */
+ 9387 "00000001" // /* MW 5 */
+ 9388 "00000000" // /* MW 4 */
+ 9389 "11110000" // /* MW 3 */
+ 9390 "00101100" // /* MW 2 */
+ 9391 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 552 2
+.return_address
+ 9392 "00111010" // MOVS p0, p7; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9393 "00010001" // /* MW 9 */
+ 9394 "00100000" // /* MW 8 */
+ 9395 "00110010" // /* MW 7 */
+ 9396 "11110001" // /* MW 6 */
+ 9397 "00000001" // /* MW 5 */
+ 9398 "00000000" // /* MW 4 */
+ 9399 "01100000" // /* MW 3 */
+ 9400 "10010001" // /* MW 2 */
+ 9401 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+ 9402 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9403 "10100000" // /* MW 5 */
+ 9404 "11001001" // /* MW 4 */
+ 9405 "11001110" // /* MW 3 */
+ 9406 "00000111" // /* MW 2 */
+ 9407 "00000000" // /* MW 1 */
+.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9408 "10011000" // LDA p1, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9409 "10011110" // /* MW 3 */
+ 9410 "01011100" // /* MW 2 */
+ 9411 "00000110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2 first
+.no_stack_arguments
+ 9412 "00000100" // JL #4848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4848 delay_slots=5 */
+ 9413 "00000001" // /* MW 5 */
+ 9414 "00000000" // /* MW 4 */
+ 9415 "01111000" // /* MW 3 */
+ 9416 "00001001" // /* MW 2 */
+ 9417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9421 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9423 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 9427 "00011100" // /* MW 13 */
+ 9428 "00000000" // /* MW 12 */
+ 9429 "00000000" // /* MW 11 */
+ 9430 "01010111" // /* MW 10 */
+ 9431 "00011010" // /* MW 9 */
+ 9432 "01000000" // /* MW 8 */
+ 9433 "00000000" // /* MW 7 */
+ 9434 "00000000" // /* MW 6 */
+ 9435 "10110110" // /* MW 5 */
+ 9436 "00000010" // /* MW 4 */
+ 9437 "11110000" // /* MW 3 */
+ 9438 "00101100" // /* MW 2 */
+ 9439 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7 first
+.return_address
+ 9440 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9441 "00010110" // /* MW 3 */
+ 9442 "00000110" // /* MW 2 */
+ 9443 "00000111" // /* MW 1 */
+ 9444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9445 "00000000" // /* MW 1 */
+ 9446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9447 "00000000" // /* MW 1 */
+ 9448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9449 "00000000" // /* MW 1 */
+ 9450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9451 "00000000" // /* MW 1 */
+ 9452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9453 "00000000" // /* MW 1 */
+ 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9455 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 19
+ 9456 "10011000" // NE r17, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9457 "00001000" // /* MW 3 */
+ 9458 "11100011" // /* MW 2 */
+ 9459 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 25
+ 9460 "10000100" // JNZ r17, #9664 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9664 delay_slots=5 */
+ 9461 "00000001" // /* MW 5 */
+ 9462 "01000000" // /* MW 4 */
+ 9463 "11100000" // /* MW 3 */
+ 9464 "00010010" // /* MW 2 */
+ 9465 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9475 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 555 15
+ 9476 "01000100" // MOVXM p7, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9477 "10001000" // /* MW 5 */
+ 9478 "11001001" // /* MW 4 */
+ 9479 "11001110" // /* MW 3 */
+ 9480 "00000111" // /* MW 2 */
+ 9481 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 554 67
+ 9482 "10111010" // LDA r16, [p7]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9483 "00010000" // /* MW 9 */
+ 9484 "00110000" // /* MW 8 */
+ 9485 "00110010" // /* MW 7 */
+ 9486 "11110001" // /* MW 6 */
+ 9487 "00000001" // /* MW 5 */
+ 9488 "00000000" // /* MW 4 */
+ 9489 "11010000" // /* MW 3 */
+ 9490 "11000010" // /* MW 2 */
+ 9491 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 67
+ 9492 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9493 "00111010" // /* MW 3 */
+ 9494 "00000100" // /* MW 2 */
+ 9495 "00000010" // /* MW 1 */
+ 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9497 "00000000" // /* MW 1 */
+ 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9499 "00000000" // /* MW 1 */
+ 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9501 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.no_stack_arguments
+ 9502 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9503 "00000001" // /* MW 5 */
+ 9504 "00000000" // /* MW 4 */
+ 9505 "11111000" // /* MW 3 */
+ 9506 "00010011" // /* MW 2 */
+ 9507 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9509 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.delay_slot
+ 9510 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9511 "00000111" // /* MW 3 */
+ 9512 "00100000" // /* MW 2 */
+ 9513 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9514 "01011100" // ST r16, [p7]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9515 "10110101" // /* MW 5 */
+ 9516 "01101101" // /* MW 4 */
+ 9517 "00111000" // /* MW 3 */
+ 9518 "11000010" // /* MW 2 */
+ 9519 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9520 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9521 "01000001" // /* MW 5 */
+ 9522 "10111011" // /* MW 4 */
+ 9523 "00110111" // /* MW 3 */
+ 9524 "01100000" // /* MW 2 */
+ 9525 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9526 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9527 "00010010" // /* MW 9 */
+ 9528 "00000001" // /* MW 8 */
+ 9529 "00000100" // /* MW 7 */
+ 9530 "00000000" // /* MW 6 */
+ 9531 "01011011" // /* MW 5 */
+ 9532 "00000001" // /* MW 4 */
+ 9533 "11110000" // /* MW 3 */
+ 9534 "00101100" // /* MW 2 */
+ 9535 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 9536 "10111010" // LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9537 "01111000" // /* MW 9 */
+ 9538 "11010000" // /* MW 8 */
+ 9539 "01101011" // /* MW 7 */
+ 9540 "10001111" // /* MW 6 */
+ 9541 "00000001" // /* MW 5 */
+ 9542 "00011011" // /* MW 4 */
+ 9543 "00100000" // /* MW 3 */
+ 9544 "10100011" // /* MW 2 */
+ 9545 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+ 9546 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9547 "00000010" // /* MW 3 */
+ 9548 "11100001" // /* MW 2 */
+ 9549 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 6
+.src_ref 8 "superkernels.cpp" 554 78
+ 9550 "10000100" // JNZ r16, #9632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9632 delay_slots=5 */
+ 9551 "00000001" // /* MW 5 */
+ 9552 "01000000" // /* MW 4 */
+ 9553 "11010000" // /* MW 3 */
+ 9554 "00010010" // /* MW 2 */
+ 9555 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9556 "00011000" // MOVX r15, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9557 "00000101" // /* MW 3 */
+ 9558 "00011110" // /* MW 2 */
+ 9559 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9561 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9563 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9565 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9567 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 555 15 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9568 "00001100" // LDA r16, [p2, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9569 "01100011" // /* MW 5 */
+ 9570 "00001011" // /* MW 4 */
+ 9571 "11011110" // /* MW 3 */
+ 9572 "11000010" // /* MW 2 */
+ 9573 "01001010" // /* MW 1 */
+ 9574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9575 "00000000" // /* MW 1 */
+ 9576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9577 "00000000" // /* MW 1 */
+ 9578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9579 "00000000" // /* MW 1 */
+ 9580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9581 "00000000" // /* MW 1 */
+ 9582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9583 "00000000" // /* MW 1 */
+ 9584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9585 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9586 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9587 "11111000" // /* MW 3 */
+ 9588 "00010000" // /* MW 2 */
+ 9589 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 7
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9590 "10111010" // LDA r16, [p6, #-8]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9591 "00010000" // /* MW 9 */
+ 9592 "01101000" // /* MW 8 */
+ 9593 "10110010" // /* MW 7 */
+ 9594 "11110011" // /* MW 6 */
+ 9595 "00000001" // /* MW 5 */
+ 9596 "00000000" // /* MW 4 */
+ 9597 "11010000" // /* MW 3 */
+ 9598 "11000010" // /* MW 2 */
+ 9599 "11011100" // /* MW 1 */
+ 9600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9601 "00000000" // /* MW 1 */
+ 9602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9603 "00000000" // /* MW 1 */
+ 9604 "10000100" // J #9648 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9648 delay_slots=5 */
+ 9605 "00000000" // /* MW 5 */
+ 9606 "00000000" // /* MW 4 */
+ 9607 "11011000" // /* MW 3 */
+ 9608 "00010010" // /* MW 2 */
+ 9609 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.delay_slot
+ 9616 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9617 "00000001" // /* MW 3 */
+ 9618 "11100001" // /* MW 2 */
+ 9619 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.delay_slot
+ 9620 "00110110" // NOPA; NOPB; ST r16, [p6, #-8]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9621 "11000001" // /* MW 11 */
+ 9622 "00001000" // /* MW 10 */
+ 9623 "01110011" // /* MW 9 */
+ 9624 "00000011" // /* MW 8 */
+ 9625 "00000000" // /* MW 7 */
+ 9626 "00000000" // /* MW 6 */
+ 9627 "00100000" // /* MW 5 */
+ 9628 "00000000" // /* MW 4 */
+ 9629 "11110000" // /* MW 3 */
+ 9630 "00101100" // /* MW 2 */
+ 9631 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096
+.src_ref 8 "superkernels.cpp" 558 7
+ 9632 "11100001" // NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9633 "00000000" // /* MW 15 */
+ 9634 "00000000" // /* MW 14 */
+ 9635 "00010000" // /* MW 13 */
+ 9636 "01101000" // /* MW 12 */
+ 9637 "10110010" // /* MW 11 */
+ 9638 "11110011" // /* MW 10 */
+ 9639 "00000001" // /* MW 9 */
+ 9640 "00000000" // /* MW 8 */
+ 9641 "01011011" // /* MW 7 */
+ 9642 "00000001" // /* MW 6 */
+ 9643 "00100000" // /* MW 5 */
+ 9644 "00000000" // /* MW 4 */
+ 9645 "11110000" // /* MW 3 */
+ 9646 "00101100" // /* MW 2 */
+ 9647 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112
+.src_ref 8 "superkernels.cpp" 558 7 first
+ 9648 "11100001" // LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9649 "00000000" // /* MW 15 */
+ 9650 "00000000" // /* MW 14 */
+ 9651 "01111000" // /* MW 13 */
+ 9652 "10100101" // /* MW 12 */
+ 9653 "00000001" // /* MW 11 */
+ 9654 "00000000" // /* MW 10 */
+ 9655 "00000000" // /* MW 9 */
+ 9656 "00000000" // /* MW 8 */
+ 9657 "01011011" // /* MW 7 */
+ 9658 "00000001" // /* MW 6 */
+ 9659 "00100000" // /* MW 5 */
+ 9660 "00000000" // /* MW 4 */
+ 9661 "11010000" // /* MW 3 */
+ 9662 "11000010" // /* MW 2 */
+ 9663 "11100000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128
+.src_ref 8 "superkernels.cpp" 558 43
+ 9664 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9665 "00000001" // /* MW 3 */
+ 9666 "00100010" // /* MW 2 */
+ 9667 "00010000" // /* MW 1 */
+ 9668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9669 "00000000" // /* MW 1 */
+ 9670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9671 "00000000" // /* MW 1 */
+ 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9673 "00000000" // /* MW 1 */
+ 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9675 "00000000" // /* MW 1 */
+ 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9677 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 19
+ 9678 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9679 "00001000" // /* MW 3 */
+ 9680 "10100001" // /* MW 2 */
+ 9681 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 25
+ 9682 "10000100" // JNZ r16, #9872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9872 delay_slots=5 */
+ 9683 "00000001" // /* MW 5 */
+ 9684 "01000000" // /* MW 4 */
+ 9685 "01001000" // /* MW 3 */
+ 9686 "00010011" // /* MW 2 */
+ 9687 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.src_ref 8 "superkernels.cpp" 559 15
+.delay_slot
+ 9688 "01000100" // MOVXM p7, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9689 "11000000" // /* MW 5 */
+ 9690 "11001001" // /* MW 4 */
+ 9691 "11001110" // /* MW 3 */
+ 9692 "00000111" // /* MW 2 */
+ 9693 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 67
+.delay_slot
+ 9694 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9695 "11000000" // /* MW 5 */
+ 9696 "11001000" // /* MW 4 */
+ 9697 "11000100" // /* MW 3 */
+ 9698 "00000111" // /* MW 2 */
+ 9699 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9701 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9703 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9705 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+ 9706 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9707 "00010110" // /* MW 3 */
+ 9708 "00000110" // /* MW 2 */
+ 9709 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 67
+ 9710 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9711 "00111010" // /* MW 3 */
+ 9712 "00000100" // /* MW 2 */
+ 9713 "00000010" // /* MW 1 */
+ 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9715 "00000000" // /* MW 1 */
+ 9716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9717 "00000000" // /* MW 1 */
+ 9718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9719 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.no_stack_arguments
+ 9720 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9721 "00000001" // /* MW 5 */
+ 9722 "00000000" // /* MW 4 */
+ 9723 "11111000" // /* MW 3 */
+ 9724 "00010011" // /* MW 2 */
+ 9725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9727 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.delay_slot
+ 9728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9729 "00000111" // /* MW 3 */
+ 9730 "00100000" // /* MW 2 */
+ 9731 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9732 "01011100" // ST r16, [p7]; LT r27, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9733 "00110101" // /* MW 5 */
+ 9734 "01101110" // /* MW 4 */
+ 9735 "00111000" // /* MW 3 */
+ 9736 "11000010" // /* MW 2 */
+ 9737 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9738 "11100100" // SUB r17, r17, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9739 "01000001" // /* MW 5 */
+ 9740 "00111011" // /* MW 4 */
+ 9741 "00110111" // /* MW 3 */
+ 9742 "01100000" // /* MW 2 */
+ 9743 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9744 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9745 "00000000" // /* MW 15 */
+ 9746 "00000000" // /* MW 14 */
+ 9747 "01111000" // /* MW 13 */
+ 9748 "10100101" // /* MW 12 */
+ 9749 "00000001" // /* MW 11 */
+ 9750 "10010000" // /* MW 10 */
+ 9751 "00001000" // /* MW 9 */
+ 9752 "00100000" // /* MW 8 */
+ 9753 "01011011" // /* MW 7 */
+ 9754 "00000001" // /* MW 6 */
+ 9755 "00100000" // /* MW 5 */
+ 9756 "00000000" // /* MW 4 */
+ 9757 "11110000" // /* MW 3 */
+ 9758 "00101100" // /* MW 2 */
+ 9759 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 9760 "10111010" // LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9761 "01111000" // /* MW 9 */
+ 9762 "10010000" // /* MW 8 */
+ 9763 "01101011" // /* MW 7 */
+ 9764 "10001111" // /* MW 6 */
+ 9765 "00000001" // /* MW 5 */
+ 9766 "00011011" // /* MW 4 */
+ 9767 "00100000" // /* MW 3 */
+ 9768 "10010011" // /* MW 2 */
+ 9769 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+ 9770 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9771 "00000010" // /* MW 3 */
+ 9772 "11100001" // /* MW 2 */
+ 9773 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 6
+.src_ref 8 "superkernels.cpp" 558 78
+ 9774 "10000100" // JNZ r16, #9840 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9840 delay_slots=5 */
+ 9775 "00000001" // /* MW 5 */
+ 9776 "01000000" // /* MW 4 */
+ 9777 "00111000" // /* MW 3 */
+ 9778 "00010011" // /* MW 2 */
+ 9779 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 7
+.delay_slot
+ 9780 "01000100" // MOVXM p2, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9781 "10100000" // /* MW 5 */
+ 9782 "11001001" // /* MW 4 */
+ 9783 "11000100" // /* MW 3 */
+ 9784 "00000111" // /* MW 2 */
+ 9785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9789 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9791 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9793 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 559 15 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9794 "00001100" // LDA r16, [p1, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9795 "01100011" // /* MW 5 */
+ 9796 "00001011" // /* MW 4 */
+ 9797 "11011110" // /* MW 3 */
+ 9798 "11000010" // /* MW 2 */
+ 9799 "00101010" // /* MW 1 */
+ 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9801 "00000000" // /* MW 1 */
+ 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9803 "00000000" // /* MW 1 */
+ 9804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9805 "00000000" // /* MW 1 */
+ 9806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9807 "00000000" // /* MW 1 */
+ 9808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9809 "00000000" // /* MW 1 */
+ 9810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9812 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9813 "11111000" // /* MW 3 */
+ 9814 "00010000" // /* MW 2 */
+ 9815 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9816 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9817 "00010110" // /* MW 3 */
+ 9818 "11100110" // /* MW 2 */
+ 9819 "00000110" // /* MW 1 */
+ 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9821 "00000000" // /* MW 1 */
+ 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9823 "00000000" // /* MW 1 */
+ 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9825 "00000000" // /* MW 1 */
+ 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9827 "00000000" // /* MW 1 */
+ 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9829 "00000000" // /* MW 1 */
+ 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9831 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 9832 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9833 "00000001" // /* MW 3 */
+ 9834 "11100001" // /* MW 2 */
+ 9835 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 9836 "10011000" // ST r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9837 "00010001" // /* MW 3 */
+ 9838 "11100110" // /* MW 2 */
+ 9839 "00001110" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304
+ 9840 "10000100" // J #9888 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9888 delay_slots=5 */
+ 9841 "00000000" // /* MW 5 */
+ 9842 "00000000" // /* MW 4 */
+ 9843 "01010000" // /* MW 3 */
+ 9844 "00010011" // /* MW 2 */
+ 9845 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.delay_slot
+ 9846 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9847 "11000000" // /* MW 3 */
+ 9848 "01100010" // /* MW 2 */
+ 9849 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9851 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9853 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9855 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9856 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9857 "00000000" // /* MW 15 */
+ 9858 "00000000" // /* MW 14 */
+ 9859 "01111000" // /* MW 13 */
+ 9860 "10100101" // /* MW 12 */
+ 9861 "00000001" // /* MW 11 */
+ 9862 "00000000" // /* MW 10 */
+ 9863 "00000000" // /* MW 9 */
+ 9864 "00000000" // /* MW 8 */
+ 9865 "01011011" // /* MW 7 */
+ 9866 "00000001" // /* MW 6 */
+ 9867 "00100000" // /* MW 5 */
+ 9868 "00000000" // /* MW 4 */
+ 9869 "11110000" // /* MW 3 */
+ 9870 "00101100" // /* MW 2 */
+ 9871 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336
+.src_ref 8 "superkernels.cpp" 562 7
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9872 "11100001" // LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9873 "00000000" // /* MW 15 */
+ 9874 "00000000" // /* MW 14 */
+ 9875 "00010000" // /* MW 13 */
+ 9876 "01101000" // /* MW 12 */
+ 9877 "00110010" // /* MW 11 */
+ 9878 "11110001" // /* MW 10 */
+ 9879 "00000001" // /* MW 9 */
+ 9880 "00000000" // /* MW 8 */
+ 9881 "01011011" // /* MW 7 */
+ 9882 "00000001" // /* MW 6 */
+ 9883 "00100000" // /* MW 5 */
+ 9884 "00000000" // /* MW 4 */
+ 9885 "00100000" // /* MW 3 */
+ 9886 "11110011" // /* MW 2 */
+ 9887 "11111011" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352
+.src_ref 8 "superkernels.cpp" 562 7 first
+.src_ref 8 "superkernels.cpp" 562 19
+ 9888 "00101100" // LDA r16, [p2]; MOVX r17, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9889 "00100010" // /* MW 5 */
+ 9890 "01000100" // /* MW 4 */
+ 9891 "11010000" // /* MW 3 */
+ 9892 "11000010" // /* MW 2 */
+ 9893 "01000000" // /* MW 1 */
+ 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9895 "00000000" // /* MW 1 */
+ 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9897 "00000000" // /* MW 1 */
+ 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9899 "00000000" // /* MW 1 */
+ 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9901 "00000000" // /* MW 1 */
+ 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9903 "00000000" // /* MW 1 */
+ 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9905 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 19
+ 9906 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9907 "00001000" // /* MW 3 */
+ 9908 "01100001" // /* MW 2 */
+ 9909 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 25
+ 9910 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */
+ 9911 "00000001" // /* MW 5 */
+ 9912 "01000000" // /* MW 4 */
+ 9913 "10101000" // /* MW 3 */
+ 9914 "00010011" // /* MW 2 */
+ 9915 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.delay_slot
+ 9916 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9917 "11001000" // /* MW 5 */
+ 9918 "11001001" // /* MW 4 */
+ 9919 "11000100" // /* MW 3 */
+ 9920 "00000111" // /* MW 2 */
+ 9921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9929 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.src_ref 8 "superkernels.cpp" 562 68
+ 9930 "10111010" // LDA r16, [p2]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9931 "00010000" // /* MW 9 */
+ 9932 "00110000" // /* MW 8 */
+ 9933 "10110010" // /* MW 7 */
+ 9934 "11110000" // /* MW 6 */
+ 9935 "00000001" // /* MW 5 */
+ 9936 "00000000" // /* MW 4 */
+ 9937 "11010000" // /* MW 3 */
+ 9938 "11000010" // /* MW 2 */
+ 9939 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 68
+ 9940 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9941 "00111010" // /* MW 3 */
+ 9942 "00000100" // /* MW 2 */
+ 9943 "00000001" // /* MW 1 */
+ 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9945 "00000000" // /* MW 1 */
+ 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9947 "00000000" // /* MW 1 */
+ 9948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9949 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.no_stack_arguments
+ 9950 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9951 "00000001" // /* MW 5 */
+ 9952 "00000000" // /* MW 4 */
+ 9953 "11111000" // /* MW 3 */
+ 9954 "00010011" // /* MW 2 */
+ 9955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9957 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.delay_slot
+ 9958 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9959 "00000111" // /* MW 3 */
+ 9960 "00100000" // /* MW 2 */
+ 9961 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9962 "01011100" // ST r16, [p2]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9963 "10110101" // /* MW 5 */
+ 9964 "01101101" // /* MW 4 */
+ 9965 "00111000" // /* MW 3 */
+ 9966 "11000010" // /* MW 2 */
+ 9967 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9968 "11100100" // SUB r17, r13, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9969 "01000001" // /* MW 5 */
+ 9970 "00111011" // /* MW 4 */
+ 9971 "00110111" // /* MW 3 */
+ 9972 "01100000" // /* MW 2 */
+ 9973 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9974 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9975 "00010010" // /* MW 9 */
+ 9976 "00000001" // /* MW 8 */
+ 9977 "00000100" // /* MW 7 */
+ 9978 "00000000" // /* MW 6 */
+ 9979 "01011011" // /* MW 5 */
+ 9980 "00000001" // /* MW 4 */
+ 9981 "11110000" // /* MW 3 */
+ 9982 "00101100" // /* MW 2 */
+ 9983 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 562 44
+.return_address
+ 9984 "11100100" // SUB r16, r13, r3; MOV r27, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9985 "01000001" // /* MW 5 */
+ 9986 "10101110" // /* MW 4 */
+ 9987 "00111101" // /* MW 3 */
+ 9988 "00000110" // /* MW 2 */
+ 9989 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+ 9990 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9991 "00000010" // /* MW 3 */
+ 9992 "11100001" // /* MW 2 */
+ 9993 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 6
+.src_ref 8 "superkernels.cpp" 562 79
+ 9994 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */
+ 9995 "00000001" // /* MW 5 */
+ 9996 "01000000" // /* MW 4 */
+ 9997 "10101000" // /* MW 3 */
+ 9998 "00010011" // /* MW 2 */
+ 9999 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 563 16
+.delay_slot
+ 10000 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10001 "11001000" // /* MW 5 */
+ 10002 "11001001" // /* MW 4 */
+ 10003 "11000100" // /* MW 3 */
+ 10004 "00000111" // /* MW 2 */
+ 10005 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10007 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10013 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 563 16 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 10014 "00001100" // LDA r16, [p7, #20]; ST r13, [p2] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10015 "01100011" // /* MW 5 */
+ 10016 "00001011" // /* MW 4 */
+ 10017 "11010100" // /* MW 3 */
+ 10018 "11000010" // /* MW 2 */
+ 10019 "11101010" // /* MW 1 */
+ 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10021 "00000000" // /* MW 1 */
+ 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10023 "00000000" // /* MW 1 */
+ 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10025 "00000000" // /* MW 1 */
+ 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10027 "00000000" // /* MW 1 */
+ 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10029 "00000000" // /* MW 1 */
+ 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10031 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 10032 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10033 "11111000" // /* MW 3 */
+ 10034 "00010000" // /* MW 2 */
+ 10035 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 10036 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10037 "00010110" // /* MW 3 */
+ 10038 "11100110" // /* MW 2 */
+ 10039 "00000110" // /* MW 1 */
+ 10040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10041 "00000000" // /* MW 1 */
+ 10042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10043 "00000000" // /* MW 1 */
+ 10044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10045 "00000000" // /* MW 1 */
+ 10046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10047 "00000000" // /* MW 1 */
+ 10048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10049 "00000000" // /* MW 1 */
+ 10050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10051 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 10052 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10053 "00000001" // /* MW 3 */
+ 10054 "11100001" // /* MW 2 */
+ 10055 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 10056 "00000010" // ST r16, [p6, #-8]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10057 "01110000" // /* MW 7 */
+ 10058 "10100101" // /* MW 6 */
+ 10059 "00000001" // /* MW 5 */
+ 10060 "00000000" // /* MW 4 */
+ 10061 "00110000" // /* MW 3 */
+ 10062 "11000010" // /* MW 2 */
+ 10063 "11011100" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528
+.src_ref 8 "superkernels.cpp" 566 6
+.src_ref 8 "superkernels.cpp" 567 14
+ 10064 "01000100" // MOVXM p6, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10065 "10000000" // /* MW 5 */
+ 10066 "11001001" // /* MW 4 */
+ 10067 "11001100" // /* MW 3 */
+ 10068 "00000111" // /* MW 2 */
+ 10069 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 6 first
+.src_ref 8 "superkernels.cpp" 566 19
+ 10070 "10111010" // LDA r16, [p6]; MOVXM p2, #509160 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10071 "00010000" // /* MW 9 */
+ 10072 "01110100" // /* MW 8 */
+ 10073 "00110010" // /* MW 7 */
+ 10074 "11110001" // /* MW 6 */
+ 10075 "00000001" // /* MW 5 */
+ 10076 "00000000" // /* MW 4 */
+ 10077 "11010000" // /* MW 3 */
+ 10078 "11000010" // /* MW 2 */
+ 10079 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 19
+ 10080 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10081 "00110110" // /* MW 3 */
+ 10082 "00000110" // /* MW 2 */
+ 10083 "00000010" // /* MW 1 */
+ 10084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10085 "00000000" // /* MW 1 */
+ 10086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10087 "00000000" // /* MW 1 */
+ 10088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10089 "00000000" // /* MW 1 */
+ 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10091 "00000000" // /* MW 1 */
+ 10092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10093 "00000000" // /* MW 1 */
+ 10094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10095 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 16
+ 10096 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10097 "00001000" // /* MW 3 */
+ 10098 "01100001" // /* MW 2 */
+ 10099 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 6
+ 10100 "10000100" // JNZ r16, #10128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10128 delay_slots=5 */
+ 10101 "00000001" // /* MW 5 */
+ 10102 "01000000" // /* MW 4 */
+ 10103 "11001000" // /* MW 3 */
+ 10104 "00010011" // /* MW 2 */
+ 10105 "10000000" // /* MW 1 */
+.delay_slot
+ 10106 "00011000" // LDA p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10107 "10011001" // /* MW 3 */
+ 10108 "11101111" // /* MW 2 */
+ 10109 "00000111" // /* MW 1 */
+.delay_slot
+ 10110 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10111 "11110001" // /* MW 3 */
+ 10112 "11110001" // /* MW 2 */
+ 10113 "00000111" // /* MW 1 */
+.delay_slot
+ 10114 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10115 "11010001" // /* MW 3 */
+ 10116 "11110101" // /* MW 2 */
+ 10117 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10119 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10121 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 567 14 first
+ 10122 "00001100" // NOPA; ST r13, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10123 "01100011" // /* MW 5 */
+ 10124 "00001011" // /* MW 4 */
+ 10125 "11111100" // /* MW 3 */
+ 10126 "00101100" // /* MW 2 */
+ 10127 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592
+.src_ref 8 "superkernels.cpp" 569
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 10128 "11010100" // LDA r11, [sp, #-8]; MOV lr, r11 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10129 "01000001" // /* MW 5 */
+ 10130 "11101011" // /* MW 4 */
+ 10131 "00101110" // /* MW 3 */
+ 10132 "00101110" // /* MW 2 */
+ 10133 "11111111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 10134 "00011000" // LDA r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10135 "10010001" // /* MW 3 */
+ 10136 "11111101" // /* MW 2 */
+ 10137 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10138 "00011000" // LDA r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10139 "10110001" // /* MW 3 */
+ 10140 "11101001" // /* MW 2 */
+ 10141 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 569 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10142 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10143 "00000000" // /* MW 3 */
+ 10144 "00101000" // /* MW 2 */
+ 10145 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10146 "11111000" // MOV p6, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10147 "00100000" // /* MW 3 */
+ 10148 "01100110" // /* MW 2 */
+ 10149 "00011110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 569
+.delay_slot
+ 10150 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10151 "00000001" // /* MW 5 */
+ 10152 "00000000" // /* MW 4 */
+ 10153 "00000000" // /* MW 3 */
+ 10154 "11110000" // /* MW 2 */
+ 10155 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10157 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10159 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10161 "00000000" // /* MW 15 */
+ 10162 "00000000" // /* MW 14 */
+ 10163 "01111000" // /* MW 13 */
+ 10164 "10100101" // /* MW 12 */
+ 10165 "00000001" // /* MW 11 */
+ 10166 "00000000" // /* MW 10 */
+ 10167 "00000000" // /* MW 9 */
+ 10168 "00000000" // /* MW 8 */
+ 10169 "01011011" // /* MW 7 */
+ 10170 "00000001" // /* MW 6 */
+ 10171 "00100000" // /* MW 5 */
+ 10172 "00000000" // /* MW 4 */
+ 10173 "11110000" // /* MW 3 */
+ 10174 "00101100" // /* MW 2 */
+ 10175 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 10176 "01110110" // MOVA r13, #0; MOVS p6, r12; J #9408 /* MW 12 */ /* control_operation: words=12 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */
+ 10177 "00100000" // /* MW 11 */
+ 10178 "00000000" // /* MW 10 */
+ 10179 "00000000" // /* MW 9 */
+ 10180 "10011000" // /* MW 8 */
+ 10181 "00000100" // /* MW 7 */
+ 10182 "00000000" // /* MW 6 */
+ 10183 "00001011" // /* MW 5 */
+ 10184 "10001100" // /* MW 4 */
+ 10185 "00000110" // /* MW 3 */
+ 10186 "00001101" // /* MW 2 */
+ 10187 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 8 "superkernels.cpp" 558 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 10188 "01100100" // MOVX r15, #1; MOV r14, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10189 "00001001" // /* MW 5 */
+ 10190 "00100000" // /* MW 4 */
+ 10191 "10100111" // /* MW 3 */
+ 10192 "11000000" // /* MW 2 */
+ 10193 "00000011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.delay_slot
+ 10194 "01000100" // MOVXM p2, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10195 "10000000" // /* MW 5 */
+ 10196 "11001000" // /* MW 4 */
+ 10197 "11000100" // /* MW 3 */
+ 10198 "00000111" // /* MW 2 */
+ 10199 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.delay_slot
+ 10200 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10201 "10100000" // /* MW 5 */
+ 10202 "11001001" // /* MW 4 */
+ 10203 "11001110" // /* MW 3 */
+ 10204 "00000111" // /* MW 2 */
+ 10205 "00000000" // /* MW 1 */
+.delay_slot
+ 10206 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10207 "10010001" // /* MW 3 */
+ 10208 "11100101" // /* MW 2 */
+ 10209 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 10211 "00000000" // /* MW 1 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_
+.src_ref 9 "me_div.c" 108 19
+.src_ref 9 "me_div.c" 108 19
+.src_ref 9 "me_div.c" 115 4 first
+.function_start
+ 10224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10225 "01000001" // /* MW 5 */
+ 10226 "10100000" // /* MW 4 */
+ 10227 "00101111" // /* MW 3 */
+ 10228 "11000000" // /* MW 2 */
+ 10229 "00000000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10231 "00011100" // /* MW 3 */
+ 10232 "11000110" // /* MW 2 */
+ 10233 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10235 "00011100" // /* MW 3 */
+ 10236 "11000110" // /* MW 2 */
+ 10237 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10239 "00011100" // /* MW 3 */
+ 10240 "11000110" // /* MW 2 */
+ 10241 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10243 "00011100" // /* MW 3 */
+ 10244 "11000110" // /* MW 2 */
+ 10245 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10247 "00011100" // /* MW 3 */
+ 10248 "11000110" // /* MW 2 */
+ 10249 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10251 "00011100" // /* MW 3 */
+ 10252 "11000110" // /* MW 2 */
+ 10253 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10255 "00011100" // /* MW 3 */
+ 10256 "11000110" // /* MW 2 */
+ 10257 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10259 "00011100" // /* MW 3 */
+ 10260 "11000110" // /* MW 2 */
+ 10261 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10263 "00011100" // /* MW 3 */
+ 10264 "11000110" // /* MW 2 */
+ 10265 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10267 "00011100" // /* MW 3 */
+ 10268 "11000110" // /* MW 2 */
+ 10269 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10271 "00011100" // /* MW 3 */
+ 10272 "11000110" // /* MW 2 */
+ 10273 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10275 "00011100" // /* MW 3 */
+ 10276 "11000110" // /* MW 2 */
+ 10277 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10279 "00011100" // /* MW 3 */
+ 10280 "11000110" // /* MW 2 */
+ 10281 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10283 "00011100" // /* MW 3 */
+ 10284 "11000110" // /* MW 2 */
+ 10285 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10287 "00011100" // /* MW 3 */
+ 10288 "11000110" // /* MW 2 */
+ 10289 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10291 "00011100" // /* MW 3 */
+ 10292 "11000110" // /* MW 2 */
+ 10293 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10295 "00011100" // /* MW 3 */
+ 10296 "11000110" // /* MW 2 */
+ 10297 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10299 "00011100" // /* MW 3 */
+ 10300 "11000110" // /* MW 2 */
+ 10301 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10303 "00011100" // /* MW 3 */
+ 10304 "11000110" // /* MW 2 */
+ 10305 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10307 "00011100" // /* MW 3 */
+ 10308 "11000110" // /* MW 2 */
+ 10309 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10311 "00011100" // /* MW 3 */
+ 10312 "11000110" // /* MW 2 */
+ 10313 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10315 "00011100" // /* MW 3 */
+ 10316 "11000110" // /* MW 2 */
+ 10317 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10319 "00011100" // /* MW 3 */
+ 10320 "11000110" // /* MW 2 */
+ 10321 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10323 "00011100" // /* MW 3 */
+ 10324 "11000110" // /* MW 2 */
+ 10325 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10327 "00011100" // /* MW 3 */
+ 10328 "11000110" // /* MW 2 */
+ 10329 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10331 "00011100" // /* MW 3 */
+ 10332 "11000110" // /* MW 2 */
+ 10333 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10335 "00011100" // /* MW 3 */
+ 10336 "11000110" // /* MW 2 */
+ 10337 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10339 "00011100" // /* MW 3 */
+ 10340 "11000110" // /* MW 2 */
+ 10341 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 119 first
+ 10342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10343 "00000000" // /* MW 3 */
+ 10344 "00101000" // /* MW 2 */
+ 10345 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19 first
+.delay_slot
+ 10346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10347 "00011100" // /* MW 3 */
+ 10348 "11000110" // /* MW 2 */
+ 10349 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10351 "00011100" // /* MW 3 */
+ 10352 "11000110" // /* MW 2 */
+ 10353 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10355 "00011100" // /* MW 3 */
+ 10356 "11000110" // /* MW 2 */
+ 10357 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10359 "00011100" // /* MW 3 */
+ 10360 "11000110" // /* MW 2 */
+ 10361 "00010000" // /* MW 1 */
+.delay_slot
+ 10362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10363 "10100000" // /* MW 3 */
+ 10364 "10011111" // /* MW 2 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+ 10365 "00011000" // /* MW 1 */
+.label _ZL19propagateFloat32NaNjj
+.function propagateFloat32NaN _ZL19propagateFloat32NaNjj
+.src_ref 10 "softfloat-specialize" 78 24
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 143 4 first
+.function_start
+ 10368 "10111010" // MOVA r3, #-22; MOVXM r18, #-16777216 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10369 "00010000" // /* MW 9 */
+ 10370 "00000000" // /* MW 8 */
+ 10371 "01001000" // /* MW 7 */
+ 10372 "00000010" // /* MW 6 */
+ 10373 "11000000" // /* MW 5 */
+ 10374 "00111111" // /* MW 4 */
+ 10375 "00000000" // /* MW 3 */
+ 10376 "01000011" // /* MW 2 */
+ 10377 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 140 6
+.src_ref 10 "softfloat-specialize" 141 6
+ 10378 "10111010" // MOVA r7, #511; MOVXM r0, #4194304 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10379 "00010000" // /* MW 9 */
+ 10380 "00000000" // /* MW 8 */
+ 10381 "00001000" // /* MW 7 */
+ 10382 "00000000" // /* MW 6 */
+ 10383 "00010000" // /* MW 5 */
+ 10384 "00000000" // /* MW 4 */
+ 10385 "00000000" // /* MW 3 */
+ 10386 "11100111" // /* MW 2 */
+ 10387 "00111111" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 38
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 140 6 first
+ 10388 "10111010" // MOVA r16, #1; OR r4, r1, r0; MOV r5, #510 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10389 "01011000" // /* MW 9 */
+ 10390 "11111110" // /* MW 8 */
+ 10391 "10101001" // /* MW 7 */
+ 10392 "00101100" // /* MW 6 */
+ 10393 "01000000" // /* MW 5 */
+ 10394 "00000010" // /* MW 4 */
+ 10395 "00000000" // /* MW 3 */
+ 10396 "00110000" // /* MW 2 */
+ 10397 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 141 6 first
+ 10398 "10011000" // OR r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10399 "00000101" // /* MW 3 */
+ 10400 "10000000" // /* MW 2 */
+ 10401 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10402 "10011000" // LSHL r6, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10403 "00111101" // /* MW 3 */
+ 10404 "01001100" // /* MW 2 */
+ 10405 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+ 10406 "10011000" // LSHL r3, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10407 "00111101" // /* MW 3 */
+ 10408 "10000110" // /* MW 2 */
+ 10409 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22
+ 10410 "10011000" // AND r3, r7, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10411 "00110100" // /* MW 3 */
+ 10412 "11000110" // /* MW 2 */
+ 10413 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10414 "10011000" // AND r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10415 "01100100" // /* MW 3 */
+ 10416 "11001100" // /* MW 2 */
+ 10417 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+ 10418 "10011000" // EQ r6, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10419 "01100111" // /* MW 3 */
+ 10420 "01001100" // /* MW 2 */
+ 10421 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 38 first
+ 10422 "10011000" // LSHL r17, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10423 "00001101" // /* MW 3 */
+ 10424 "10100011" // /* MW 2 */
+ 10425 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 24
+ 10426 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10427 "00011100" // /* MW 3 */
+ 10428 "10110111" // /* MW 2 */
+ 10429 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 62 first
+ 10430 "00011000" // SEL.EQZ r17, r4, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10431 "00000010" // /* MW 3 */
+ 10432 "00100010" // /* MW 2 */
+ 10433 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+ 10434 "01000100" // MOVXM r16, #4194303 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10435 "11111110" // /* MW 5 */
+ 10436 "00111111" // /* MW 4 */
+ 10437 "11111000" // /* MW 3 */
+ 10438 "00111111" // /* MW 2 */
+ 10439 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+ 10440 "10011000" // AND r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10441 "00000100" // /* MW 3 */
+ 10442 "10000101" // /* MW 2 */
+ 10443 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22
+ 10444 "00011000" // NEZ r2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10445 "11110000" // /* MW 3 */
+ 10446 "10000100" // /* MW 2 */
+ 10447 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10448 "10011000" // AND r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10449 "00000100" // /* MW 3 */
+ 10450 "01000011" // /* MW 2 */
+ 10451 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+ 10452 "00011000" // NEZ r1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10453 "11110000" // /* MW 3 */
+ 10454 "01000010" // /* MW 2 */
+ 10455 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 4 first
+ 10456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10457 "00000000" // /* MW 3 */
+ 10458 "00101000" // /* MW 2 */
+ 10459 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+.delay_slot
+ 10460 "10011000" // AND r27, r1, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10461 "01100100" // /* MW 3 */
+ 10462 "01110110" // /* MW 2 */
+ 10463 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+.delay_slot
+ 10464 "10011000" // EQ r1, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10465 "01010111" // /* MW 3 */
+ 10466 "11000010" // /* MW 2 */
+ 10467 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 49 first
+.delay_slot
+ 10468 "00011000" // SEL.EQZ r3, r17, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10469 "01000010" // /* MW 3 */
+ 10470 "01000110" // /* MW 2 */
+ 10471 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+.delay_slot
+ 10472 "10011000" // AND r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10473 "00100100" // /* MW 3 */
+ 10474 "01110110" // /* MW 2 */
+ 10475 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 27 first
+.delay_slot
+ 10476 "00011000" // SEL.EQZ r0, r3, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10477 "00000010" // /* MW 3 */
+ 10478 "11000000" // /* MW 2 */
+.label _ZL19propagateFloat32NaNjj__end
+ 10479 "00010000" // /* MW 1 */
+.label _ZL19roundAndPackFloat32iij
+.function roundAndPackFloat32 _ZL19roundAndPackFloat32iij
+.src_ref 10 "softfloat.c" 154 first
+.src_ref 10 "softfloat.c" 161 19
+.src_ref 10 "softfloat.c" 203 30
+.function_start
+ 10480 "10111010" // MOVA r0, #64; MOVXM p0, #509172 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10481 "00010000" // /* MW 9 */
+ 10482 "01111010" // /* MW 8 */
+ 10483 "00110010" // /* MW 7 */
+ 10484 "11110000" // /* MW 6 */
+ 10485 "00000001" // /* MW 5 */
+ 10486 "00000000" // /* MW 4 */
+ 10487 "00000000" // /* MW 3 */
+ 10488 "00000000" // /* MW 2 */
+ 10489 "00001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 161 19 first
+.src_ref 10 "softfloat.c" 171 16
+.src_ref 10 "softfloat.c" 174 16
+.src_ref 10 "softfloat.c" 178 21
+.src_ref 10 "softfloat.c" 194 29
+ 10490 "00101100" // LDA r4, [p0]; MOVX r6, #127 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10491 "11111010" // /* MW 5 */
+ 10492 "10011001" // /* MW 4 */
+ 10493 "11010000" // /* MW 3 */
+ 10494 "10010010" // /* MW 2 */
+ 10495 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10497 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10499 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10501 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10503 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10505 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10507 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 162 36 first
+.src_ref 10 "softfloat.c" 164 4 first
+ 10508 "10000100" // JZ r4, #10576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10576 delay_slots=5 */
+ 10509 "00000001" // /* MW 5 */
+ 10510 "00000000" // /* MW 4 */
+ 10511 "10101000" // /* MW 3 */
+ 10512 "00010100" // /* MW 2 */
+ 10513 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 40
+.src_ref 10 "softfloat.c" 185 68
+.src_ref 10 "softfloat.c" 202 18
+.delay_slot
+ 10514 "00011000" // MOVX r5, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10515 "00000001" // /* MW 3 */
+ 10516 "01001010" // /* MW 2 */
+ 10517 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10519 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10521 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10525 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 8
+.src_ref 10 "softfloat.c" 171 16
+.src_ref 10 "softfloat.c" 171 34
+.src_ref 10 "softfloat.c" 174 16
+.src_ref 10 "softfloat.c" 174 34
+ 10526 "10111010" // MOVA r16, #3; MOVX r7, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10527 "01011000" // /* MW 9 */
+ 10528 "00000000" // /* MW 8 */
+ 10529 "00001000" // /* MW 7 */
+ 10530 "01001011" // /* MW 6 */
+ 10531 "01110000" // /* MW 5 */
+ 10532 "00000000" // /* MW 4 */
+ 10533 "00000000" // /* MW 3 */
+ 10534 "01110000" // /* MW 2 */
+ 10535 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 26
+.src_ref 10 "softfloat.c" 171 34 first
+ 10536 "01100100" // EQ r27, r7, r4; MOV r5, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10537 "00000101" // /* MW 5 */
+ 10538 "10100000" // /* MW 4 */
+ 10539 "11110010" // /* MW 3 */
+ 10540 "11001000" // /* MW 2 */
+ 10541 "00111110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 171 16
+ 10542 "00011000" // SEL.EQZ r7, r6, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10543 "10000010" // /* MW 3 */
+ 10544 "10001111" // /* MW 2 */
+ 10545 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 174 34 first
+ 10546 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10547 "00000111" // /* MW 3 */
+ 10548 "00110111" // /* MW 2 */
+ 10549 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 170 12
+.src_ref 10 "softfloat.c" 174 16
+ 10550 "11100100" // SEL.EQZ r16, r6, r24, r27; MOV r27, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10551 "01000001" // /* MW 5 */
+ 10552 "10100001" // /* MW 4 */
+ 10553 "01001101" // /* MW 3 */
+ 10554 "00110000" // /* MW 2 */
+ 10555 "00110100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 170 12 first
+.src_ref 10 "softfloat.c" 170 12 first
+ 10556 "00011000" // SEL.EQZ r7, r16, r7, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10557 "01110010" // /* MW 3 */
+ 10558 "00001110" // /* MW 2 */
+ 10559 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 26 first
+ 10560 "10011000" // EQ r27, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10561 "01000111" // /* MW 3 */
+ 10562 "01110110" // /* MW 2 */
+ 10563 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 8
+ 10564 "00110110" // NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10565 "10000001" // /* MW 11 */
+ 10566 "10101101" // /* MW 10 */
+ 10567 "00000000" // /* MW 9 */
+ 10568 "00010000" // /* MW 8 */
+ 10569 "01011100" // /* MW 7 */
+ 10570 "00001110" // /* MW 6 */
+ 10571 "00100000" // /* MW 5 */
+ 10572 "00000000" // /* MW 4 */
+ 10573 "11110000" // /* MW 3 */
+ 10574 "00101100" // /* MW 2 */
+ 10575 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_96
+.src_ref 10 "softfloat.c" 179 14
+.src_ref 10 "softfloat.c" 179 17 first
+.src_ref 10 "softfloat.c" 180 23
+.src_ref 10 "softfloat.c" 181 28
+ 10576 "01100100" // EXTEND.u16 r18, r2; MOV r16, #253 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10577 "11110101" // /* MW 5 */
+ 10578 "00100011" // /* MW 4 */
+ 10579 "00001000" // /* MW 3 */
+ 10580 "10010110" // /* MW 2 */
+ 10581 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 179 14
+ 10582 "10011000" // LT r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10583 "00001010" // /* MW 3 */
+ 10584 "10100101" // /* MW 2 */
+ 10585 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 179 4
+ 10586 "10000100" // JNZ r18, #10768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10768 delay_slots=5 */
+ 10587 "00000001" // /* MW 5 */
+ 10588 "01000000" // /* MW 4 */
+ 10589 "00001000" // /* MW 3 */
+ 10590 "00010101" // /* MW 2 */
+ 10591 "10010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 178 21 first
+.delay_slot
+ 10592 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10593 "01100100" // /* MW 3 */
+ 10594 "11100010" // /* MW 2 */
+ 10595 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+.src_ref 10 "softfloat.c" 128 31
+.delay_slot
+ 10596 "00011000" // MOVX r7, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10597 "01111101" // /* MW 3 */
+ 10598 "00001110" // /* MW 2 */
+ 10599 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 10600 "10011000" // LSHL r1, r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10601 "01111101" // /* MW 3 */
+ 10602 "01000010" // /* MW 2 */
+ 10603 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10605 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10607 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 181 28 first
+.src_ref 10 "softfloat.c" 182 40 first
+.src_ref 10 "softfloat.c" 182 59
+ 10608 "10111010" // MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10609 "10101000" // /* MW 9 */
+ 10610 "11001010" // /* MW 8 */
+ 10611 "10001000" // /* MW 7 */
+ 10612 "00111110" // /* MW 6 */
+ 10613 "00111000" // /* MW 5 */
+ 10614 "00000101" // /* MW 4 */
+ 10615 "00000000" // /* MW 3 */
+ 10616 "00010010" // /* MW 2 */
+ 10617 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 59
+ 10618 "10011000" // LT r20, r20, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10619 "00101010" // /* MW 3 */
+ 10620 "00101001" // /* MW 2 */
+ 10621 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 180 23 first
+ 10622 "10011000" // LT r16, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10623 "00101010" // /* MW 3 */
+ 10624 "00100000" // /* MW 2 */
+ 10625 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 18 first
+ 10626 "10011000" // AND r19, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10627 "01000100" // /* MW 3 */
+ 10628 "11100111" // /* MW 2 */
+ 10629 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 181 13 first
+ 10630 "10011000" // OR r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10631 "00000101" // /* MW 3 */
+ 10632 "11100111" // /* MW 2 */
+ 10633 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 180 8 first
+ 10634 "10000100" // JNZ r19, #10848 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10848 delay_slots=5 */
+ 10635 "00000001" // /* MW 5 */
+ 10636 "01000000" // /* MW 4 */
+ 10637 "00110000" // /* MW 3 */
+ 10638 "00010101" // /* MW 2 */
+ 10639 "10011000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 187 18
+.src_ref 10 "softfloat.c" 192 39
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+.delay_slot
+ 10640 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10641 "00000001" // /* MW 3 */
+ 10642 "00100000" // /* MW 2 */
+ 10643 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10647 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10649 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10651 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 187 18 first
+ 10652 "10011000" // GE r19, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10653 "00001001" // /* MW 3 */
+ 10654 "10100111" // /* MW 2 */
+ 10655 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 187 8
+ 10656 "10000100" // JNZ r19, #10784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10784 delay_slots=5 */
+ 10657 "00000001" // /* MW 5 */
+ 10658 "01000000" // /* MW 4 */
+ 10659 "00010000" // /* MW 3 */
+ 10660 "00010101" // /* MW 2 */
+ 10661 "10011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10663 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10665 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10671 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 192 39 first
+ 10672 "10011000" // SUB r2, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10673 "00100001" // /* MW 3 */
+ 10674 "00000100" // /* MW 2 */
+ 10675 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 10676 "10000100" // JZ r2, #10736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10736 delay_slots=5 */
+ 10677 "00000001" // /* MW 5 */
+ 10678 "00000000" // /* MW 4 */
+ 10679 "11111000" // /* MW 3 */
+ 10680 "00010100" // /* MW 2 */
+ 10681 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10683 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10685 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10687 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10689 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10691 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 10692 "01100100" // SUB r17, r16, r2; MOV r19, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10693 "10000001" // /* MW 5 */
+ 10694 "10100000" // /* MW 4 */
+ 10695 "00111001" // /* MW 3 */
+ 10696 "01000100" // /* MW 2 */
+ 10697 "10000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 10698 "10011000" // AND r7, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10699 "00010100" // /* MW 3 */
+ 10700 "11001111" // /* MW 2 */
+ 10701 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 10702 "10011000" // LSHL r7, r3, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10703 "01111101" // /* MW 3 */
+ 10704 "11001110" // /* MW 2 */
+ 10705 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 10706 "10011000" // LSHL r17, r3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10707 "00011101" // /* MW 3 */
+ 10708 "11100011" // /* MW 2 */
+ 10709 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 10710 "10011000" // LT r27, r2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10711 "00111010" // /* MW 3 */
+ 10712 "10110111" // /* MW 2 */
+ 10713 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 10714 "00011000" // NEZ r7, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10715 "11110000" // /* MW 3 */
+ 10716 "11001110" // /* MW 2 */
+ 10717 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 10718 "00011000" // NEZ r3, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10719 "11110000" // /* MW 3 */
+ 10720 "11000110" // /* MW 2 */
+ 10721 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 10722 "10011000" // OR r2, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10723 "00010101" // /* MW 3 */
+ 10724 "11000101" // /* MW 2 */
+ 10725 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 10726 "01111010" // NOPA; NOPS; SEL.EQZ r3, r3, r2, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10727 "00100010" // /* MW 9 */
+ 10728 "11000110" // /* MW 8 */
+ 10729 "00000000" // /* MW 7 */
+ 10730 "00000000" // /* MW 6 */
+ 10731 "01011011" // /* MW 5 */
+ 10732 "00000001" // /* MW 4 */
+ 10733 "11110000" // /* MW 3 */
+ 10734 "00101100" // /* MW 2 */
+ 10735 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_256
+ 10736 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */
+ 10737 "00000000" // /* MW 5 */
+ 10738 "00000000" // /* MW 4 */
+ 10739 "00010000" // /* MW 3 */
+ 10740 "00010101" // /* MW 2 */
+ 10741 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 194 29 first
+.delay_slot
+ 10742 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10743 "01100100" // /* MW 3 */
+ 10744 "11100010" // /* MW 2 */
+ 10745 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+.delay_slot
+ 10746 "00011000" // MOVX r2, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10747 "00000001" // /* MW 3 */
+ 10748 "00000100" // /* MW 2 */
+ 10749 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10751 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10754 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 10755 "00011100" // /* MW 13 */
+ 10756 "00000000" // /* MW 12 */
+ 10757 "00000000" // /* MW 11 */
+ 10758 "01010111" // /* MW 10 */
+ 10759 "00011010" // /* MW 9 */
+ 10760 "01000000" // /* MW 8 */
+ 10761 "00000000" // /* MW 7 */
+ 10762 "00000000" // /* MW 6 */
+ 10763 "10110110" // /* MW 5 */
+ 10764 "00000010" // /* MW 4 */
+ 10765 "11110000" // /* MW 3 */
+ 10766 "00101100" // /* MW 2 */
+ 10767 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_288
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+ 10768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10769 "00000000" // /* MW 15 */
+ 10770 "00000000" // /* MW 14 */
+ 10771 "01111000" // /* MW 13 */
+ 10772 "10100101" // /* MW 12 */
+ 10773 "00000001" // /* MW 11 */
+ 10774 "00001000" // /* MW 10 */
+ 10775 "00000000" // /* MW 9 */
+ 10776 "00000001" // /* MW 8 */
+ 10777 "01011011" // /* MW 7 */
+ 10778 "00000001" // /* MW 6 */
+ 10779 "00100000" // /* MW 5 */
+ 10780 "00000000" // /* MW 4 */
+ 10781 "11110000" // /* MW 3 */
+ 10782 "00101100" // /* MW 2 */
+ 10783 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_304
+.src_ref 10 "softfloat.c" 202 18 first
+.src_ref 10 "softfloat.c" 202 36
+.src_ref 10 "softfloat.c" 203 30 first
+ 10784 "10111010" // MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10785 "10101000" // /* MW 9 */
+ 10786 "11001010" // /* MW 8 */
+ 10787 "10101000" // /* MW 7 */
+ 10788 "00110100" // /* MW 6 */
+ 10789 "00110000" // /* MW 5 */
+ 10790 "00100010" // /* MW 4 */
+ 10791 "00000000" // /* MW 3 */
+ 10792 "00100000" // /* MW 2 */
+ 10793 "11111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 59
+.src_ref 10 "softfloat.c" 203 12
+.src_ref 10 "softfloat.c" 203 46
+ 10794 "10111010" // MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10795 "01011000" // /* MW 9 */
+ 10796 "11111111" // /* MW 8 */
+ 10797 "10001111" // /* MW 7 */
+ 10798 "00101100" // /* MW 6 */
+ 10799 "01100010" // /* MW 5 */
+ 10800 "00000110" // /* MW 4 */
+ 10801 "00000000" // /* MW 3 */
+ 10802 "11100011" // /* MW 2 */
+ 10803 "00000010" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 46
+ 10804 "00011000" // EQZ r6, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10805 "11010000" // /* MW 3 */
+ 10806 "10001100" // /* MW 2 */
+ 10807 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 202 36
+ 10808 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10809 "00001101" // /* MW 3 */
+ 10810 "01000000" // /* MW 2 */
+ 10811 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 12
+ 10812 "10011000" // XOR r4, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10813 "01000110" // /* MW 3 */
+ 10814 "10001000" // /* MW 2 */
+ 10815 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 205 4 first
+ 10816 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10817 "00000000" // /* MW 3 */
+ 10818 "00101000" // /* MW 2 */
+ 10819 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 9 first
+.delay_slot
+ 10820 "10011000" // AND r27, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10821 "00000100" // /* MW 3 */
+ 10822 "00110110" // /* MW 2 */
+ 10823 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 204 4 first
+.src_ref 10 "softfloat.c" 204 14 first
+.delay_slot
+ 10824 "00011000" // SEL.EQZ r2, r16, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10825 "00100010" // /* MW 3 */
+ 10826 "00000100" // /* MW 2 */
+ 10827 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 59 first
+.delay_slot
+ 10828 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10829 "00111101" // /* MW 3 */
+ 10830 "10000100" // /* MW 2 */
+ 10831 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 10832 "10011000" // ADD r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10833 "00100000" // /* MW 3 */
+ 10834 "01000100" // /* MW 2 */
+ 10835 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 66
+.delay_slot
+ 10836 "00110110" // NOPA; NOPB; NOPS; ADD r0, r27, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10837 "10000001" // /* MW 11 */
+ 10838 "10101101" // /* MW 10 */
+ 10839 "00000000" // /* MW 9 */
+ 10840 "00000100" // /* MW 8 */
+ 10841 "00000001" // /* MW 7 */
+ 10842 "00110110" // /* MW 6 */
+ 10843 "00100000" // /* MW 5 */
+ 10844 "00000000" // /* MW 4 */
+ 10845 "11110000" // /* MW 3 */
+ 10846 "00101100" // /* MW 2 */
+ 10847 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_368
+.src_ref 10 "softfloat.c" 185 12 first
+ 10848 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10849 "00000000" // /* MW 3 */
+ 10850 "00101000" // /* MW 2 */
+ 10851 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 10852 "01000100" // MOVXM r2, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10853 "00000000" // /* MW 5 */
+ 10854 "00100000" // /* MW 4 */
+ 10855 "00000001" // /* MW 3 */
+ 10856 "10000000" // /* MW 2 */
+ 10857 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38 first
+.delay_slot
+ 10858 "10011000" // ADD r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10859 "00100000" // /* MW 3 */
+ 10860 "01000110" // /* MW 2 */
+ 10861 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 185 68 first
+.delay_slot
+ 10862 "00011000" // EQZ r2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10863 "11010000" // /* MW 3 */
+ 10864 "01000100" // /* MW 2 */
+ 10865 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 185 49
+.delay_slot
+ 10866 "10011000" // SUB r0, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10867 "00100001" // /* MW 3 */
+ 10868 "11000000" // /* MW 2 */
+ 10869 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL19roundAndPackFloat32iij__end
+ 10871 "00000000" // /* MW 1 */
+.label _ZL28normalizeRoundAndPackFloat32iij
+.function normalizeRoundAndPackFloat32 _ZL28normalizeRoundAndPackFloat32iij
+.src_ref 10 "softfloat.c" 218 first
+.src_ref 10 "softfloat.c" 224 11 first
+.tail_call
+.function_start
+ 10880 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 10881 "00000000" // /* MW 5 */
+ 10882 "00000000" // /* MW 4 */
+ 10883 "01111000" // /* MW 3 */
+ 10884 "00010100" // /* MW 2 */
+ 10885 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 552 53 first
+.delay_slot
+ 10886 "00011000" // CLZ r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10887 "00110000" // /* MW 3 */
+ 10888 "11100000" // /* MW 2 */
+ 10889 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 223 45 first
+.delay_slot
+ 10890 "00011000" // ADD r16, r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10891 "11111111" // /* MW 3 */
+ 10892 "00100001" // /* MW 2 */
+ 10893 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 224 44 first
+.delay_slot
+ 10894 "10011000" // SUB r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10895 "00000001" // /* MW 3 */
+ 10896 "10000101" // /* MW 2 */
+ 10897 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 224 62
+.delay_slot
+ 10898 "10011000" // LSHL r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10899 "00001101" // /* MW 3 */
+ 10900 "11000111" // /* MW 2 */
+ 10901 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL28normalizeRoundAndPackFloat32iij__end
+ 10903 "00000000" // /* MW 1 */
+.label int32_to_float32
+.function int32_to_float32 int32_to_float32
+.src_ref 10 "softfloat.c" 477 first
+.src_ref 10 "softfloat.c" 481 4
+.src_ref 10 "softfloat.c" 481 11 first
+.function_start
+ 10912 "10000100" // JZ r1, #10992 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10992 delay_slots=5 */
+ 10913 "00000001" // /* MW 5 */
+ 10914 "00000000" // /* MW 4 */
+ 10915 "01111000" // /* MW 3 */
+ 10916 "00010101" // /* MW 2 */
+ 10917 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10919 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10927 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 11
+ 10928 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10929 "00000000" // /* MW 5 */
+ 10930 "00100000" // /* MW 4 */
+ 10931 "00001000" // /* MW 3 */
+ 10932 "00000000" // /* MW 2 */
+ 10933 "10000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 11 first
+ 10934 "10011000" // EQ r16, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10935 "00000111" // /* MW 3 */
+ 10936 "01100001" // /* MW 2 */
+ 10937 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 4
+ 10938 "10000100" // JNZ r16, #11008 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11008 delay_slots=5 */
+ 10939 "00000001" // /* MW 5 */
+ 10940 "01000000" // /* MW 4 */
+ 10941 "10000000" // /* MW 3 */
+ 10942 "00010101" // /* MW 2 */
+ 10943 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10945 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10947 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10949 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10951 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10953 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 484 11
+.src_ref 10 "softfloat.c" 484 11 first
+.tail_call
+ 10954 "10111010" // MOVA r2, #156; J #10880 /* MW 10 */ /* control_operation: words=10 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */
+ 10955 "00100000" // /* MW 9 */
+ 10956 "00000000" // /* MW 8 */
+ 10957 "00000000" // /* MW 7 */
+ 10958 "01010000" // /* MW 6 */
+ 10959 "00000101" // /* MW 5 */
+ 10960 "00000000" // /* MW 4 */
+ 10961 "00000000" // /* MW 3 */
+ 10962 "10000010" // /* MW 2 */
+ 10963 "00010011" // /* MW 1 */
+.src_ref 10 "softfloat.c" 484 60
+.src_ref 10 "softfloat.c" 484 62
+.delay_slot
+ 10964 "00011000" // ABS r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10965 "00010000" // /* MW 3 */
+ 10966 "01000111" // /* MW 2 */
+ 10967 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 483 16
+.delay_slot
+ 10968 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10969 "00000001" // /* MW 3 */
+ 10970 "00100000" // /* MW 2 */
+ 10971 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 483 16 first
+.delay_slot
+ 10972 "10011000" // LT r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10973 "00001010" // /* MW 3 */
+ 10974 "01000011" // /* MW 2 */
+ 10975 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10977 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10978 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 10979 "00011100" // /* MW 13 */
+ 10980 "00000000" // /* MW 12 */
+ 10981 "00000000" // /* MW 11 */
+ 10982 "01010111" // /* MW 10 */
+ 10983 "00011010" // /* MW 9 */
+ 10984 "01000000" // /* MW 8 */
+ 10985 "00000000" // /* MW 7 */
+ 10986 "00000000" // /* MW 6 */
+ 10987 "10110110" // /* MW 5 */
+ 10988 "00000010" // /* MW 4 */
+ 10989 "11110000" // /* MW 3 */
+ 10990 "00101100" // /* MW 2 */
+ 10991 "00000000" // /* MW 1 */
+.label TGT_Fint32_to_float32_80
+.src_ref 10 "softfloat.c" 481 18 first
+.return_address
+ 10992 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10993 "00000000" // /* MW 3 */
+ 10994 "00101000" // /* MW 2 */
+ 10995 "00010000" // /* MW 1 */
+.delay_slot
+ 10996 "00011000" // MOVX r0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10997 "00000001" // /* MW 3 */
+ 10998 "00000000" // /* MW 2 */
+ 10999 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11001 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11003 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11005 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11007 "00000000" // /* MW 1 */
+.label TGT_Fint32_to_float32_96
+.src_ref 10 "softfloat.c" 482 37 first
+ 11008 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11009 "00000000" // /* MW 3 */
+ 11010 "00101000" // /* MW 2 */
+ 11011 "00010000" // /* MW 1 */
+.delay_slot
+ 11012 "01000100" // MOVXM r0, #-822083584 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11013 "00000000" // /* MW 5 */
+ 11014 "00100000" // /* MW 4 */
+ 11015 "00000000" // /* MW 3 */
+ 11016 "00000000" // /* MW 2 */
+ 11017 "11001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11019 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11023 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label int32_to_float32__end
+ 11025 "00000000" // /* MW 1 */
+.label _ZL14addFloat32Sigsjji
+.function addFloat32Sigs _ZL14addFloat32Sigsjji
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 81 14
+.src_ref 10 "softfloat.c" 734 first
+.function_start
+ 11040 "10111010" // MOVA r18, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11041 "10010000" // /* MW 9 */
+ 11042 "11111111" // /* MW 8 */
+ 11043 "00001111" // /* MW 7 */
+ 11044 "11111110" // /* MW 6 */
+ 11045 "00011111" // /* MW 5 */
+ 11046 "00000000" // /* MW 4 */
+ 11047 "00000000" // /* MW 3 */
+ 11048 "00110010" // /* MW 2 */
+ 11049 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14 first
+ 11050 "10011000" // LSHL r17, r1, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11051 "00101101" // /* MW 3 */
+ 11052 "01100011" // /* MW 2 */
+ 11053 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14
+ 11054 "10011000" // LSHL r4, r2, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11055 "00101101" // /* MW 3 */
+ 11056 "10001001" // /* MW 2 */
+ 11057 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11058 "00011000" // EXTEND.u8 r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11059 "10010000" // /* MW 3 */
+ 11060 "01110110" // /* MW 2 */
+ 11061 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11062 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11063 "10010000" // /* MW 3 */
+ 11064 "00110010" // /* MW 2 */
+ 11065 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 744 19 first
+.src_ref 10 "softfloat.c" 747 11
+.src_ref 10 "softfloat.c" 761 22
+.src_ref 10 "softfloat.c" 772 35
+.src_ref 10 "softfloat.c" 788 24
+ 11066 "01100100" // SUB r17, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11067 "00000001" // /* MW 5 */
+ 11068 "00100000" // /* MW 4 */
+ 11069 "00111100" // /* MW 3 */
+ 11070 "01110010" // /* MW 2 */
+ 11071 "11011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 747 11 first
+ 11072 "10011000" // LT r4, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11073 "00011010" // /* MW 3 */
+ 11074 "00001001" // /* MW 2 */
+ 11075 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 747 4
+ 11076 "10000100" // JNZ r4, #11248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11248 delay_slots=5 */
+ 11077 "00000001" // /* MW 5 */
+ 11078 "01000000" // /* MW 4 */
+ 11079 "11111000" // /* MW 3 */
+ 11080 "00010101" // /* MW 2 */
+ 11081 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+.delay_slot
+ 11082 "10011000" // AND r19, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11083 "00000100" // /* MW 3 */
+ 11084 "01100111" // /* MW 2 */
+ 11085 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 745 9
+.src_ref 10 "softfloat.c" 746 9
+.delay_slot
+ 11086 "01100100" // AND r16, r2, r16; MOV r0, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11087 "00011001" // /* MW 5 */
+ 11088 "00100000" // /* MW 4 */
+ 11089 "10010000" // /* MW 3 */
+ 11090 "00100000" // /* MW 2 */
+ 11091 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 745 9 first
+.delay_slot
+ 11092 "10011000" // LSHL r19, r19, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11093 "00001101" // /* MW 3 */
+ 11094 "11100110" // /* MW 2 */
+ 11095 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 746 9 first
+.src_ref 10 "softfloat.c" 748 18
+.src_ref 10 "softfloat.c" 762 18
+.delay_slot
+ 11096 "01100100" // LSHL r16, r16, r0; MOV r20, #255 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11097 "11111101" // /* MW 5 */
+ 11098 "00100011" // /* MW 4 */
+ 11099 "10111010" // /* MW 3 */
+ 11100 "00000001" // /* MW 2 */
+ 11101 "10000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+.src_ref 10 "softfloat.c" 128 31
+.src_ref 10 "softfloat.c" 748 18 first
+.delay_slot
+ 11102 "01100100" // EQ r0, r27, r20; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11103 "01111101" // /* MW 5 */
+ 11104 "00100000" // /* MW 4 */
+ 11105 "11111001" // /* MW 3 */
+ 11106 "00101000" // /* MW 2 */
+ 11107 "11011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 761 22 first
+ 11108 "10011000" // GE r5, r17, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11109 "10001001" // /* MW 3 */
+ 11110 "01001011" // /* MW 2 */
+ 11111 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 761 9
+ 11112 "10000100" // JNZ r5, #11440 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11440 delay_slots=5 */
+ 11113 "00000001" // /* MW 5 */
+ 11114 "01000000" // /* MW 4 */
+ 11115 "01011000" // /* MW 3 */
+ 11116 "00010110" // /* MW 2 */
+ 11117 "00101000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 11118 "10011000" // LSHL r4, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11119 "00101101" // /* MW 3 */
+ 11120 "11001001" // /* MW 2 */
+ 11121 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11123 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11125 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11127 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11129 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 762 18 first
+ 11130 "10011000" // EQ r20, r25, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11131 "01000111" // /* MW 3 */
+ 11132 "01101001" // /* MW 2 */
+ 11133 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 762 8
+ 11134 "10000100" // JNZ r20, #11392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11392 delay_slots=5 */
+ 11135 "00000001" // /* MW 5 */
+ 11136 "01000000" // /* MW 4 */
+ 11137 "01000000" // /* MW 3 */
+ 11138 "00010110" // /* MW 2 */
+ 11139 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11141 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11143 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11145 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11147 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11149 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11150 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11151 "10100000" // /* MW 3 */
+ 11152 "01010001" // /* MW 2 */
+ 11153 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 787 4
+ 11154 "11111000" // MOV r2, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11155 "10100000" // /* MW 3 */
+ 11156 "10011100" // /* MW 2 */
+ 11157 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 767 12 first
+ 11158 "00011000" // ADD r0, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11159 "00000111" // /* MW 3 */
+ 11160 "01000000" // /* MW 2 */
+ 11161 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 766 8 first
+ 11162 "00011000" // SEL.EQZ r17, r0, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11163 "00010010" // /* MW 3 */
+ 11164 "00100011" // /* MW 2 */
+ 11165 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 772 35 first
+ 11166 "10011000" // SUB r17, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11167 "00010001" // /* MW 3 */
+ 11168 "00100011" // /* MW 2 */
+ 11169 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11170 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */
+ 11171 "00000001" // /* MW 5 */
+ 11172 "00000000" // /* MW 4 */
+ 11173 "00101000" // /* MW 3 */
+ 11174 "00010110" // /* MW 2 */
+ 11175 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 770 17
+.src_ref 10 "softfloat.c" 785 9
+.delay_slot
+ 11176 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11177 "00000000" // /* MW 5 */
+ 11178 "00100000" // /* MW 4 */
+ 11179 "00001010" // /* MW 3 */
+ 11180 "00000000" // /* MW 2 */
+ 11181 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 770 17 first
+.delay_slot
+ 11182 "10011000" // OR r3, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11183 "01000101" // /* MW 3 */
+ 11184 "11000111" // /* MW 2 */
+ 11185 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 766 8 first
+.delay_slot
+ 11186 "00011000" // SEL.EQZ r19, r19, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11187 "00110010" // /* MW 3 */
+ 11188 "11100110" // /* MW 2 */
+ 11189 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11191 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11193 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 11194 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11195 "10000001" // /* MW 5 */
+ 11196 "00100000" // /* MW 4 */
+ 11197 "00110000" // /* MW 3 */
+ 11198 "11100010" // /* MW 2 */
+ 11199 "11000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11200 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11201 "00100100" // /* MW 3 */
+ 11202 "11100101" // /* MW 2 */
+ 11203 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11204 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11205 "00101101" // /* MW 3 */
+ 11206 "11100101" // /* MW 2 */
+ 11207 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11208 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11209 "00001010" // /* MW 3 */
+ 11210 "01110110" // /* MW 2 */
+ 11211 "00010100" // /* MW 1 */
+ 11212 "10000100" // J #11344 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11344 delay_slots=5 */
+ 11213 "00000000" // /* MW 5 */
+ 11214 "00000000" // /* MW 4 */
+ 11215 "00101000" // /* MW 3 */
+ 11216 "00010110" // /* MW 2 */
+ 11217 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15 first
+.delay_slot
+ 11218 "10011000" // LSHL r3, r19, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11219 "00111101" // /* MW 3 */
+ 11220 "11000110" // /* MW 2 */
+ 11221 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57
+.delay_slot
+ 11222 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11223 "11110000" // /* MW 3 */
+ 11224 "10100100" // /* MW 2 */
+ 11225 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+.delay_slot
+ 11226 "00011000" // NEZ r17, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11227 "11110000" // /* MW 3 */
+ 11228 "11100010" // /* MW 2 */
+ 11229 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+.delay_slot
+ 11230 "10011000" // OR r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11231 "00100101" // /* MW 3 */
+ 11232 "11100101" // /* MW 2 */
+ 11233 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+.delay_slot
+ 11234 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11235 "01100000" // /* MW 13 */
+ 11236 "00101011" // /* MW 12 */
+ 11237 "00000000" // /* MW 11 */
+ 11238 "10101111" // /* MW 10 */
+ 11239 "00110100" // /* MW 9 */
+ 11240 "00000000" // /* MW 8 */
+ 11241 "00100010" // /* MW 7 */
+ 11242 "01100111" // /* MW 6 */
+ 11243 "00100100" // /* MW 5 */
+ 11244 "00000000" // /* MW 4 */
+ 11245 "11110000" // /* MW 3 */
+ 11246 "00101100" // /* MW 2 */
+ 11247 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_208
+.src_ref 10 "softfloat.c" 748 8 first
+ 11248 "10000100" // JNZ r0, #11504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11504 delay_slots=5 */
+ 11249 "00000001" // /* MW 5 */
+ 11250 "01000000" // /* MW 4 */
+ 11251 "01111000" // /* MW 3 */
+ 11252 "00010110" // /* MW 2 */
+ 11253 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 756 17
+.src_ref 10 "softfloat.c" 785 9
+.delay_slot
+ 11254 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11255 "00000000" // /* MW 5 */
+ 11256 "00100000" // /* MW 4 */
+ 11257 "00001010" // /* MW 3 */
+ 11258 "00000000" // /* MW 2 */
+ 11259 "00100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11261 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11263 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11265 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11267 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11268 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11269 "10100000" // /* MW 3 */
+ 11270 "01010001" // /* MW 2 */
+ 11271 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 753 12 first
+.src_ref 10 "softfloat.c" 787 4
+ 11272 "11100100" // ADD r3, r17, #-1; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11273 "01000001" // /* MW 5 */
+ 11274 "00111011" // /* MW 4 */
+ 11275 "11100001" // /* MW 3 */
+ 11276 "11111111" // /* MW 2 */
+ 11277 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8
+.src_ref 10 "softfloat.c" 752 18
+ 11278 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11279 "10100000" // /* MW 3 */
+ 11280 "11011100" // /* MW 2 */
+ 11281 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8 first
+.src_ref 10 "softfloat.c" 752 18 first
+ 11282 "00011000" // SEL.EQZ r17, r3, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11283 "00010010" // /* MW 3 */
+ 11284 "11100011" // /* MW 2 */
+ 11285 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11286 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */
+ 11287 "00000001" // /* MW 5 */
+ 11288 "00000000" // /* MW 4 */
+ 11289 "00101000" // /* MW 3 */
+ 11290 "00010110" // /* MW 2 */
+ 11291 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 756 17 first
+.delay_slot
+ 11292 "10011000" // OR r0, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11293 "00000101" // /* MW 3 */
+ 11294 "00000001" // /* MW 2 */
+ 11295 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8 first
+.src_ref 10 "softfloat.c" 752 18 first
+.delay_slot
+ 11296 "00011000" // SEL.EQZ r16, r16, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11297 "00000010" // /* MW 3 */
+ 11298 "00100000" // /* MW 2 */
+ 11299 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11301 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11303 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11305 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 11306 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11307 "10000001" // /* MW 5 */
+ 11308 "00100000" // /* MW 4 */
+ 11309 "00110000" // /* MW 3 */
+ 11310 "11100010" // /* MW 2 */
+ 11311 "11000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11312 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11313 "00100100" // /* MW 3 */
+ 11314 "11100101" // /* MW 2 */
+ 11315 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11316 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11317 "00101101" // /* MW 3 */
+ 11318 "00100101" // /* MW 2 */
+ 11319 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 11320 "10011000" // LSHL r3, r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11321 "00111101" // /* MW 3 */
+ 11322 "00000110" // /* MW 2 */
+ 11323 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11324 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11325 "00001010" // /* MW 3 */
+ 11326 "01110110" // /* MW 2 */
+ 11327 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 11328 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11329 "11110000" // /* MW 3 */
+ 11330 "10100100" // /* MW 2 */
+ 11331 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11332 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11333 "11110000" // /* MW 3 */
+ 11334 "00100000" // /* MW 2 */
+ 11335 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 11336 "10011000" // OR r17, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11337 "00100101" // /* MW 3 */
+ 11338 "11100011" // /* MW 2 */
+ 11339 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 11340 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11341 "00010010" // /* MW 3 */
+ 11342 "00100001" // /* MW 2 */
+ 11343 "00010100" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_304
+.src_ref 10 "softfloat.c" 785 9 first
+.src_ref 10 "softfloat.c" 786 26
+.src_ref 10 "softfloat.c" 787 4 first
+ 11344 "10111010" // MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11345 "11001000" // /* MW 9 */
+ 11346 "10111111" // /* MW 8 */
+ 11347 "00101000" // /* MW 7 */
+ 11348 "00101110" // /* MW 6 */
+ 11349 "00111010" // /* MW 5 */
+ 11350 "00100111" // /* MW 4 */
+ 11351 "00000000" // /* MW 3 */
+ 11352 "00110010" // /* MW 2 */
+ 11353 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 786 18 first
+.src_ref 10 "softfloat.c" 790 8 first
+ 11354 "00100100" // ADD r19, r19, r16; ADD.NC r16, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11355 "00000001" // /* MW 5 */
+ 11356 "00110001" // /* MW 4 */
+ 11357 "00011000" // /* MW 3 */
+ 11358 "11100000" // /* MW 2 */
+ 11359 "10011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 786 26
+ 11360 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11361 "00101101" // /* MW 3 */
+ 11362 "11100101" // /* MW 2 */
+ 11363 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 24 first
+ 11364 "10011000" // LT r27, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11365 "10001010" // /* MW 3 */
+ 11366 "10110111" // /* MW 2 */
+ 11367 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 4
+ 11368 "00011000" // SEL.EQZ r2, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11369 "00000010" // /* MW 3 */
+ 11370 "01000101" // /* MW 2 */
+ 11371 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 4
+ 11372 "00011000" // SEL.EQZ r3, r18, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11373 "00110010" // /* MW 3 */
+ 11374 "10000111" // /* MW 2 */
+ 11375 "00010100" // /* MW 1 */
+.label __ll1__ZL14addFloat32Sigsjji
+.src_ref 10 "softfloat.c" 793 11 first
+.tail_call
+ 11376 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 11377 "00000000" // /* MW 5 */
+ 11378 "00000000" // /* MW 4 */
+ 11379 "01111000" // /* MW 3 */
+ 11380 "00010100" // /* MW 2 */
+ 11381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11383 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11385 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11387 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11391 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_352
+.src_ref 10 "softfloat.c" 763 12 first
+.return_address
+ 11392 "10000100" // JNZ r16, #11536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11536 delay_slots=5 */
+ 11393 "00000001" // /* MW 5 */
+ 11394 "01000000" // /* MW 4 */
+ 11395 "10001000" // /* MW 3 */
+ 11396 "00010110" // /* MW 2 */
+ 11397 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11399 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11407 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 764 12 first
+ 11408 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11409 "00000000" // /* MW 3 */
+ 11410 "00101000" // /* MW 2 */
+ 11411 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 11412 "01000100" // MOVXM r16, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11413 "00000000" // /* MW 5 */
+ 11414 "00100000" // /* MW 4 */
+ 11415 "00001000" // /* MW 3 */
+ 11416 "10000000" // /* MW 2 */
+ 11417 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38 first
+.delay_slot
+ 11418 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11419 "00000000" // /* MW 3 */
+ 11420 "00000001" // /* MW 2 */
+ 11421 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11423 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11427 "00011100" // /* MW 13 */
+ 11428 "00000000" // /* MW 12 */
+ 11429 "00000000" // /* MW 11 */
+ 11430 "01010111" // /* MW 10 */
+ 11431 "00011010" // /* MW 9 */
+ 11432 "01000000" // /* MW 8 */
+ 11433 "00000000" // /* MW 7 */
+ 11434 "00000000" // /* MW 6 */
+ 11435 "10110110" // /* MW 5 */
+ 11436 "00000010" // /* MW 4 */
+ 11437 "11110000" // /* MW 3 */
+ 11438 "00101100" // /* MW 2 */
+ 11439 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_400
+.src_ref 10 "softfloat.c" 776 8 first
+ 11440 "10000100" // JNZ r0, #11552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11552 delay_slots=5 */
+ 11441 "00000001" // /* MW 5 */
+ 11442 "01000000" // /* MW 4 */
+ 11443 "10010000" // /* MW 3 */
+ 11444 "00010110" // /* MW 2 */
+ 11445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11451 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11455 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 8 first
+ 11456 "10000100" // JZ r27, #11600 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11600 delay_slots=5 */
+ 11457 "00000001" // /* MW 5 */
+ 11458 "00000000" // /* MW 4 */
+ 11459 "10101000" // /* MW 3 */
+ 11460 "00010110" // /* MW 2 */
+ 11461 "11011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11463 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11471 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11472 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11473 "10100000" // /* MW 3 */
+ 11474 "01010001" // /* MW 2 */
+ 11475 "00011000" // /* MW 1 */
+ 11476 "10000100" // J #11376 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11376 delay_slots=5 */
+ 11477 "00000000" // /* MW 5 */
+ 11478 "00000000" // /* MW 4 */
+ 11479 "00111000" // /* MW 3 */
+ 11480 "00010110" // /* MW 2 */
+ 11481 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 26
+.delay_slot
+ 11482 "01000100" // MOVXM r17, #1073741824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11483 "00000000" // /* MW 5 */
+ 11484 "10100000" // /* MW 4 */
+ 11485 "00001000" // /* MW 3 */
+ 11486 "00000000" // /* MW 2 */
+ 11487 "01000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 26 first
+.src_ref 10 "softfloat.c" 793 11
+.delay_slot
+ 11488 "11100100" // ADD r17, r19, r17; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11489 "01000001" // /* MW 5 */
+ 11490 "00111011" // /* MW 4 */
+ 11491 "00010001" // /* MW 3 */
+ 11492 "01100010" // /* MW 2 */
+ 11493 "10011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 33
+.delay_slot
+ 11494 "10011000" // ADD r3, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11495 "00000000" // /* MW 3 */
+ 11496 "01000111" // /* MW 2 */
+ 11497 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11500 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11501 "01100111" // /* MW 3 */
+ 11502 "00000001" // /* MW 2 */
+ 11503 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_464
+.src_ref 10 "softfloat.c" 749 12 first
+ 11504 "10000100" // JNZ r19, #11632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11632 delay_slots=5 */
+ 11505 "00000001" // /* MW 5 */
+ 11506 "01000000" // /* MW 4 */
+ 11507 "10111000" // /* MW 3 */
+ 11508 "00010110" // /* MW 2 */
+ 11509 "10011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11511 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11513 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11515 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11517 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11519 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 750 12 first
+ 11520 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11521 "00000000" // /* MW 3 */
+ 11522 "00101000" // /* MW 2 */
+ 11523 "00010000" // /* MW 1 */
+.delay_slot
+ 11524 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11525 "10100000" // /* MW 3 */
+ 11526 "00010000" // /* MW 2 */
+ 11527 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11535 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_496
+.src_ref 10 "softfloat.c" 763 31 first
+.tail_call
+ 11536 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11537 "00000000" // /* MW 5 */
+ 11538 "00000000" // /* MW 4 */
+ 11539 "01000000" // /* MW 3 */
+ 11540 "00010100" // /* MW 2 */
+ 11541 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11543 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11549 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11551 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_512
+.src_ref 10 "softfloat.c" 777 22 first
+.return_address
+ 11552 "10011000" // OR r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11553 "00000101" // /* MW 3 */
+ 11554 "11100001" // /* MW 2 */
+ 11555 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 777 12
+ 11556 "10000100" // JNZ r16, #11648 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11648 delay_slots=5 */
+ 11557 "00000001" // /* MW 5 */
+ 11558 "01000000" // /* MW 4 */
+ 11559 "11000000" // /* MW 3 */
+ 11560 "00010110" // /* MW 2 */
+ 11561 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11563 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11565 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11567 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11571 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 778 12 first
+ 11572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11573 "00000000" // /* MW 3 */
+ 11574 "00101000" // /* MW 2 */
+ 11575 "00010000" // /* MW 1 */
+.delay_slot
+ 11576 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11577 "10100000" // /* MW 3 */
+ 11578 "00010000" // /* MW 2 */
+ 11579 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11581 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11583 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11585 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11586 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11587 "00011100" // /* MW 13 */
+ 11588 "00000000" // /* MW 12 */
+ 11589 "00000000" // /* MW 11 */
+ 11590 "01010111" // /* MW 10 */
+ 11591 "00011010" // /* MW 9 */
+ 11592 "01000000" // /* MW 8 */
+ 11593 "00000000" // /* MW 7 */
+ 11594 "00000000" // /* MW 6 */
+ 11595 "10110110" // /* MW 5 */
+ 11596 "00000010" // /* MW 4 */
+ 11597 "11110000" // /* MW 3 */
+ 11598 "00101100" // /* MW 2 */
+ 11599 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_560
+.src_ref 10 "softfloat.c" 780 25 first
+.src_ref 10 "softfloat.c" 780 62 first
+ 11600 "10100100" // RET lr; ADD.NC r16, r19, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11601 "10000010" // /* MW 5 */
+ 11602 "00110011" // /* MW 4 */
+ 11603 "00001000" // /* MW 3 */
+ 11604 "00000000" // /* MW 2 */
+ 11605 "00000101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 70
+.delay_slot
+ 11606 "00011000" // MOVX r17, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11607 "11101001" // /* MW 3 */
+ 11608 "11100010" // /* MW 2 */
+ 11609 "00010111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 70
+.delay_slot
+ 11610 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11611 "00011101" // /* MW 3 */
+ 11612 "00100001" // /* MW 2 */
+ 11613 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 66 first
+.delay_slot
+ 11614 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11615 "00000000" // /* MW 3 */
+ 11616 "00000001" // /* MW 2 */
+ 11617 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11619 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11620 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 11621 "10000001" // /* MW 11 */
+ 11622 "10101101" // /* MW 10 */
+ 11623 "00000000" // /* MW 9 */
+ 11624 "00000000" // /* MW 8 */
+ 11625 "00000000" // /* MW 7 */
+ 11626 "00000000" // /* MW 6 */
+ 11627 "00100000" // /* MW 5 */
+ 11628 "00000000" // /* MW 4 */
+ 11629 "11110000" // /* MW 3 */
+ 11630 "00101100" // /* MW 2 */
+ 11631 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_592
+.src_ref 10 "softfloat.c" 749 31 first
+.tail_call
+ 11632 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11633 "00000000" // /* MW 5 */
+ 11634 "00000000" // /* MW 4 */
+ 11635 "01000000" // /* MW 3 */
+ 11636 "00010100" // /* MW 2 */
+ 11637 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11639 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11641 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11643 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11647 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_608
+.src_ref 10 "softfloat.c" 777 38 first
+.tail_call
+.return_address
+ 11648 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11649 "00000000" // /* MW 5 */
+ 11650 "00000000" // /* MW 4 */
+ 11651 "01000000" // /* MW 3 */
+ 11652 "00010100" // /* MW 2 */
+ 11653 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11655 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11659 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11661 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL14addFloat32Sigsjji__end
+ 11663 "00000000" // /* MW 1 */
+.label _ZL14subFloat32Sigsjji
+.function subFloat32Sigs _ZL14subFloat32Sigsjji
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 81 14
+.src_ref 10 "softfloat.c" 805 first
+.function_start
+ 11664 "10111010" // MOVA r17, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11665 "10010000" // /* MW 9 */
+ 11666 "11111111" // /* MW 8 */
+ 11667 "00001111" // /* MW 7 */
+ 11668 "11111110" // /* MW 6 */
+ 11669 "00011111" // /* MW 5 */
+ 11670 "00000000" // /* MW 4 */
+ 11671 "00000000" // /* MW 3 */
+ 11672 "00110001" // /* MW 2 */
+ 11673 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14 first
+ 11674 "10011000" // LSHL r4, r2, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11675 "00011101" // /* MW 3 */
+ 11676 "10001001" // /* MW 2 */
+ 11677 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14
+ 11678 "10011000" // LSHL r18, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11679 "00011101" // /* MW 3 */
+ 11680 "01100101" // /* MW 2 */
+ 11681 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+ 11682 "10011000" // AND r20, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11683 "00000100" // /* MW 3 */
+ 11684 "01101001" // /* MW 2 */
+ 11685 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21 first
+ 11686 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11687 "10010000" // /* MW 3 */
+ 11688 "00110010" // /* MW 2 */
+ 11689 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11690 "00011000" // EXTEND.u8 r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11691 "10010000" // /* MW 3 */
+ 11692 "10110110" // /* MW 2 */
+ 11693 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+.src_ref 10 "softfloat.c" 816 9
+.src_ref 10 "softfloat.c" 817 9
+ 11694 "01100100" // AND r16, r2, r16; MOV r19, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11695 "00011101" // /* MW 5 */
+ 11696 "10100000" // /* MW 4 */
+ 11697 "10011001" // /* MW 3 */
+ 11698 "00100000" // /* MW 2 */
+ 11699 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 816 9 first
+ 11700 "10011000" // LSHL r17, r20, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11701 "00111101" // /* MW 3 */
+ 11702 "00100011" // /* MW 2 */
+ 11703 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 815 19 first
+.src_ref 10 "softfloat.c" 818 11
+.src_ref 10 "softfloat.c" 819 17
+.src_ref 10 "softfloat.c" 843 31
+ 11704 "01100100" // SUB r18, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11705 "00000001" // /* MW 5 */
+ 11706 "00100000" // /* MW 4 */
+ 11707 "00111100" // /* MW 3 */
+ 11708 "10110010" // /* MW 2 */
+ 11709 "11011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 818 11 first
+ 11710 "10011000" // LT r5, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11711 "00101010" // /* MW 3 */
+ 11712 "00001011" // /* MW 2 */
+ 11713 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 818 4
+ 11714 "10000100" // JNZ r5, #11904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11904 delay_slots=5 */
+ 11715 "00000001" // /* MW 5 */
+ 11716 "01000000" // /* MW 4 */
+ 11717 "01000000" // /* MW 3 */
+ 11718 "00010111" // /* MW 2 */
+ 11719 "00101000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 817 9 first
+.delay_slot
+ 11720 "10011000" // LSHL r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11721 "00111101" // /* MW 3 */
+ 11722 "00100001" // /* MW 2 */
+ 11723 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 14
+.src_ref 10 "softfloat.c" 851 14
+.src_ref 10 "softfloat.c" 859 13
+.src_ref 10 "softfloat.c" 862 9
+.delay_slot
+ 11724 "10111010" // MOVA r0, #255; MOVXM r4, #1073741824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11725 "00010000" // /* MW 9 */
+ 11726 "00000000" // /* MW 8 */
+ 11727 "10001000" // /* MW 7 */
+ 11728 "00000000" // /* MW 6 */
+ 11729 "00000000" // /* MW 5 */
+ 11730 "00010000" // /* MW 4 */
+ 11731 "00000000" // /* MW 3 */
+ 11732 "11100000" // /* MW 2 */
+ 11733 "00011111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 851 14 first
+.delay_slot
+ 11734 "10011000" // EQ r20, r27, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11735 "00000111" // /* MW 3 */
+ 11736 "11101000" // /* MW 2 */
+ 11737 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 862 9 first
+.delay_slot
+ 11738 "10011000" // OR r19, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11739 "01000101" // /* MW 3 */
+ 11740 "01100110" // /* MW 2 */
+ 11741 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 859 13 first
+.delay_slot
+ 11742 "10011000" // OR r4, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11743 "00000101" // /* MW 3 */
+ 11744 "00001001" // /* MW 2 */
+ 11745 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 819 17 first
+ 11746 "10011000" // GE r6, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11747 "10001001" // /* MW 3 */
+ 11748 "10001101" // /* MW 2 */
+ 11749 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 819 4
+ 11750 "10000100" // JNZ r6, #12064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12064 delay_slots=5 */
+ 11751 "00000001" // /* MW 5 */
+ 11752 "01000000" // /* MW 4 */
+ 11753 "10010000" // /* MW 3 */
+ 11754 "00010111" // /* MW 2 */
+ 11755 "00110000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.src_ref 10 "softfloat.c" 835 34
+.delay_slot
+ 11756 "00011000" // MOVX r5, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11757 "00000101" // /* MW 3 */
+ 11758 "00001010" // /* MW 2 */
+ 11759 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 835 34 first
+.delay_slot
+ 11760 "10011000" // XOR r7, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11761 "01010110" // /* MW 3 */
+ 11762 "11001110" // /* MW 2 */
+ 11763 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11765 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11767 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11769 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 14 first
+ 11770 "10011000" // EQ r20, r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11771 "00000111" // /* MW 3 */
+ 11772 "01101000" // /* MW 2 */
+ 11773 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 4
+ 11774 "10000100" // JNZ r20, #12176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12176 delay_slots=5 */
+ 11775 "00000001" // /* MW 5 */
+ 11776 "01000000" // /* MW 4 */
+ 11777 "11001000" // /* MW 3 */
+ 11778 "00010111" // /* MW 2 */
+ 11779 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11781 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11783 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11789 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+ 11790 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11791 "10100000" // /* MW 3 */
+ 11792 "01010011" // /* MW 2 */
+ 11793 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 838 8 first
+ 11794 "00011000" // ADD r16, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11795 "00000111" // /* MW 3 */
+ 11796 "10100000" // /* MW 2 */
+ 11797 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 837 4 first
+ 11798 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11799 "00100010" // /* MW 3 */
+ 11800 "00100001" // /* MW 2 */
+ 11801 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 843 31 first
+ 11802 "10011000" // SUB r16, r24, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11803 "00000001" // /* MW 3 */
+ 11804 "00100001" // /* MW 2 */
+ 11805 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11806 "10000100" // JZ r16, #11872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11872 delay_slots=5 */
+ 11807 "00000001" // /* MW 5 */
+ 11808 "00000000" // /* MW 4 */
+ 11809 "00110000" // /* MW 3 */
+ 11810 "00010111" // /* MW 2 */
+ 11811 "10000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 837 4 first
+.delay_slot
+ 11812 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11813 "00110010" // /* MW 3 */
+ 11814 "01100011" // /* MW 2 */
+ 11815 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11817 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11819 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11821 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11823 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+.src_ref 10 "softfloat-macros" 50 48
+ 11824 "10111010" // MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11825 "01011000" // /* MW 9 */
+ 11826 "00011111" // /* MW 8 */
+ 11827 "01001000" // /* MW 7 */
+ 11828 "00001110" // /* MW 6 */
+ 11829 "00111000" // /* MW 5 */
+ 11830 "00110000" // /* MW 4 */
+ 11831 "00000000" // /* MW 3 */
+ 11832 "00010100" // /* MW 2 */
+ 11833 "00000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11834 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11835 "00100100" // /* MW 3 */
+ 11836 "11100101" // /* MW 2 */
+ 11837 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11838 "10011000" // LSHL r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11839 "00101101" // /* MW 3 */
+ 11840 "01100101" // /* MW 2 */
+ 11841 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11842 "00011000" // NEZ r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11843 "11110000" // /* MW 3 */
+ 11844 "01100110" // /* MW 2 */
+ 11845 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11846 "10011000" // LT r27, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11847 "01001010" // /* MW 3 */
+ 11848 "00110111" // /* MW 2 */
+ 11849 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15 first
+ 11850 "10011000" // LSHL r17, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11851 "00111101" // /* MW 3 */
+ 11852 "01100010" // /* MW 2 */
+ 11853 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57
+ 11854 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11855 "11110000" // /* MW 3 */
+ 11856 "10100100" // /* MW 2 */
+ 11857 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25
+ 11858 "10011000" // OR r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11859 "00100101" // /* MW 3 */
+ 11860 "01100001" // /* MW 2 */
+ 11861 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 11862 "01111010" // NOPA; NOPS; SEL.EQZ r17, r19, r16, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11863 "00000010" // /* MW 9 */
+ 11864 "11100011" // /* MW 8 */
+ 11865 "00000100" // /* MW 7 */
+ 11866 "00000000" // /* MW 6 */
+ 11867 "01011011" // /* MW 5 */
+ 11868 "00000001" // /* MW 4 */
+ 11869 "11110000" // /* MW 3 */
+ 11870 "00101100" // /* MW 2 */
+ 11871 "00000000" // /* MW 1 */
+.label __ll2__ZL14subFloat32Sigsjji
+ 11872 "10000100" // J #12032 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12032 delay_slots=5 */
+ 11873 "00000000" // /* MW 5 */
+ 11874 "00000000" // /* MW 4 */
+ 11875 "10000000" // /* MW 3 */
+ 11876 "00010111" // /* MW 2 */
+ 11877 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 846 16 first
+.delay_slot
+ 11878 "10011000" // SUB r3, r4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11879 "00010001" // /* MW 3 */
+ 11880 "00000111" // /* MW 2 */
+ 11881 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11883 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11885 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11887 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11889 "00000000" // /* MW 15 */
+ 11890 "00000000" // /* MW 14 */
+ 11891 "01111000" // /* MW 13 */
+ 11892 "10100101" // /* MW 12 */
+ 11893 "00000001" // /* MW 11 */
+ 11894 "00000000" // /* MW 10 */
+ 11895 "00000000" // /* MW 9 */
+ 11896 "00000000" // /* MW 8 */
+ 11897 "01011011" // /* MW 7 */
+ 11898 "00000001" // /* MW 6 */
+ 11899 "00100000" // /* MW 5 */
+ 11900 "00000000" // /* MW 4 */
+ 11901 "11110000" // /* MW 3 */
+ 11902 "00101100" // /* MW 2 */
+ 11903 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_240
+.src_ref 10 "softfloat.c" 851 4 first
+ 11904 "10000100" // JNZ r20, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */
+ 11905 "00000001" // /* MW 5 */
+ 11906 "01000000" // /* MW 4 */
+ 11907 "11100000" // /* MW 3 */
+ 11908 "00010111" // /* MW 2 */
+ 11909 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11911 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11913 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11915 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11917 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11919 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+ 11920 "11111000" // MOV r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11921 "10100000" // /* MW 3 */
+ 11922 "00011101" // /* MW 2 */
+ 11923 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+ 11924 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11925 "10100000" // /* MW 3 */
+ 11926 "01010001" // /* MW 2 */
+ 11927 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4
+.src_ref 10 "softfloat.c" 855 14
+ 11928 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11929 "10100000" // /* MW 3 */
+ 11930 "11011100" // /* MW 2 */
+ 11931 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+ 11932 "11111000" // MOV r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11933 "00100000" // /* MW 3 */
+ 11934 "01010000" // /* MW 2 */
+ 11935 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 856 8 first
+ 11936 "00011000" // ADD r17, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11937 "11111111" // /* MW 3 */
+ 11938 "10100011" // /* MW 2 */
+ 11939 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4 first
+.src_ref 10 "softfloat.c" 855 14 first
+ 11940 "00011000" // SEL.EQZ r17, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11941 "00100010" // /* MW 3 */
+ 11942 "01100011" // /* MW 2 */
+ 11943 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11944 "10000100" // JZ r17, #12016 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12016 delay_slots=5 */
+ 11945 "00000001" // /* MW 5 */
+ 11946 "00000000" // /* MW 4 */
+ 11947 "01111000" // /* MW 3 */
+ 11948 "00010111" // /* MW 2 */
+ 11949 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4 first
+.src_ref 10 "softfloat.c" 855 14 first
+.delay_slot
+ 11950 "00011000" // SEL.EQZ r16, r16, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11951 "01000010" // /* MW 3 */
+ 11952 "00100000" // /* MW 2 */
+ 11953 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11957 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11959 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11961 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+.src_ref 10 "softfloat-macros" 50 48
+ 11962 "10111010" // MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11963 "01011000" // /* MW 9 */
+ 11964 "00011111" // /* MW 8 */
+ 11965 "10001000" // /* MW 7 */
+ 11966 "10001110" // /* MW 6 */
+ 11967 "00101000" // /* MW 5 */
+ 11968 "00110001" // /* MW 4 */
+ 11969 "00000000" // /* MW 3 */
+ 11970 "00000011" // /* MW 2 */
+ 11971 "00000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11972 "10011000" // AND r20, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11973 "01000100" // /* MW 3 */
+ 11974 "10101001" // /* MW 2 */
+ 11975 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11976 "10011000" // LSHL r20, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11977 "01001101" // /* MW 3 */
+ 11978 "00101001" // /* MW 2 */
+ 11979 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 11980 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11981 "00101101" // /* MW 3 */
+ 11982 "00100101" // /* MW 2 */
+ 11983 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11984 "10011000" // LT r27, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11985 "00111010" // /* MW 3 */
+ 11986 "01110110" // /* MW 2 */
+ 11987 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 11988 "00011000" // NEZ r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11989 "11110000" // /* MW 3 */
+ 11990 "00101000" // /* MW 2 */
+ 11991 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11992 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11993 "11110000" // /* MW 3 */
+ 11994 "00100000" // /* MW 2 */
+ 11995 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 11996 "10011000" // OR r17, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11997 "01000101" // /* MW 3 */
+ 11998 "10100011" // /* MW 2 */
+ 11999 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 12000 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12001 "00000000" // /* MW 15 */
+ 12002 "00000000" // /* MW 14 */
+ 12003 "01111000" // /* MW 13 */
+ 12004 "10100101" // /* MW 12 */
+ 12005 "00000001" // /* MW 11 */
+ 12006 "10010000" // /* MW 10 */
+ 12007 "00001000" // /* MW 9 */
+ 12008 "00100001" // /* MW 8 */
+ 12009 "01011011" // /* MW 7 */
+ 12010 "00000001" // /* MW 6 */
+ 12011 "00100000" // /* MW 5 */
+ 12012 "00000000" // /* MW 4 */
+ 12013 "11110000" // /* MW 3 */
+ 12014 "00101100" // /* MW 2 */
+ 12015 "00000000" // /* MW 1 */
+.label __ll1__ZL14subFloat32Sigsjji
+.src_ref 10 "softfloat.c" 864 16 first
+ 12016 "11100001" // NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12017 "00000000" // /* MW 15 */
+ 12018 "00000000" // /* MW 14 */
+ 12019 "01111000" // /* MW 13 */
+ 12020 "10100101" // /* MW 12 */
+ 12021 "00000001" // /* MW 11 */
+ 12022 "00001100" // /* MW 10 */
+ 12023 "00111000" // /* MW 9 */
+ 12024 "00100110" // /* MW 8 */
+ 12025 "01011011" // /* MW 7 */
+ 12026 "00000001" // /* MW 6 */
+ 12027 "00100000" // /* MW 5 */
+ 12028 "00000000" // /* MW 4 */
+ 12029 "11110000" // /* MW 3 */
+ 12030 "00101100" // /* MW 2 */
+ 12031 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_368
+.src_ref 10 "softfloat.c" 868 11 first
+.tail_call
+ 12032 "10000100" // J #10880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */
+ 12033 "00000000" // /* MW 5 */
+ 12034 "00000000" // /* MW 4 */
+ 12035 "01000000" // /* MW 3 */
+ 12036 "00010101" // /* MW 2 */
+ 12037 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4 first
+.delay_slot
+ 12038 "00011000" // ADD r2, r25, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12039 "11111111" // /* MW 3 */
+ 12040 "01000101" // /* MW 2 */
+ 12041 "00010110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12043 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12045 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12047 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12048 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12049 "00000000" // /* MW 15 */
+ 12050 "00000000" // /* MW 14 */
+ 12051 "01111000" // /* MW 13 */
+ 12052 "10100101" // /* MW 12 */
+ 12053 "00000001" // /* MW 11 */
+ 12054 "00000000" // /* MW 10 */
+ 12055 "00000000" // /* MW 9 */
+ 12056 "00000000" // /* MW 8 */
+ 12057 "01011011" // /* MW 7 */
+ 12058 "00000001" // /* MW 6 */
+ 12059 "00100000" // /* MW 5 */
+ 12060 "00000000" // /* MW 4 */
+ 12061 "11110000" // /* MW 3 */
+ 12062 "00101100" // /* MW 2 */
+ 12063 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_400
+.src_ref 10 "softfloat.c" 820 4 first
+.return_address
+ 12064 "10000100" // JNZ r20, #12256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12256 delay_slots=5 */
+ 12065 "00000001" // /* MW 5 */
+ 12066 "01000000" // /* MW 4 */
+ 12067 "11110000" // /* MW 3 */
+ 12068 "00010111" // /* MW 2 */
+ 12069 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12073 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12075 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12077 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12079 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 829 14 first
+ 12080 "10011000" // LTU r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12081 "00011100" // /* MW 3 */
+ 12082 "00100111" // /* MW 2 */
+ 12083 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 829 4
+ 12084 "10000100" // JNZ r19, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */
+ 12085 "00000001" // /* MW 5 */
+ 12086 "01000000" // /* MW 4 */
+ 12087 "00001000" // /* MW 3 */
+ 12088 "00011000" // /* MW 2 */
+ 12089 "10011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4 first
+.delay_slot
+ 12090 "00011000" // SEL.EQZ r24, r5, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12091 "10010010" // /* MW 3 */
+ 12092 "01110001" // /* MW 2 */
+ 12093 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.delay_slot
+ 12094 "11111000" // MOV r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12095 "10100000" // /* MW 3 */
+ 12096 "10011101" // /* MW 2 */
+ 12097 "00011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.delay_slot
+ 12098 "00011000" // SEL.EQZ r25, r5, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12099 "00100010" // /* MW 3 */
+ 12100 "01110011" // /* MW 2 */
+ 12101 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12103 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12105 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 830 14 first
+ 12106 "10011000" // LTU r18, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12107 "00001100" // /* MW 3 */
+ 12108 "01100101" // /* MW 2 */
+ 12109 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 830 4
+ 12110 "10000100" // JNZ r18, #12336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12336 delay_slots=5 */
+ 12111 "00000001" // /* MW 5 */
+ 12112 "01000000" // /* MW 4 */
+ 12113 "00011000" // /* MW 3 */
+ 12114 "00011000" // /* MW 2 */
+ 12115 "10010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12117 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12119 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12121 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12123 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12125 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31
+ 12126 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12127 "01111101" // /* MW 3 */
+ 12128 "00100000" // /* MW 2 */
+ 12129 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 24
+ 12130 "01000100" // MOVXM p0, #509172 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12131 "11101000" // /* MW 5 */
+ 12132 "11001001" // /* MW 4 */
+ 12133 "11000000" // /* MW 3 */
+ 12134 "00000111" // /* MW 2 */
+ 12135 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 24 first
+ 12136 "10011000" // LDA r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12137 "01010110" // /* MW 3 */
+ 12138 "00000110" // /* MW 2 */
+ 12139 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12141 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 12142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12143 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 4
+ 12144 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12145 "00000000" // /* MW 3 */
+ 12146 "00101000" // /* MW 2 */
+ 12147 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 44
+.delay_slot
+ 12148 "00011000" // MOVX r17, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12149 "00001101" // /* MW 3 */
+ 12150 "00100010" // /* MW 2 */
+ 12151 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12153 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12155 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 44
+.delay_slot
+ 12156 "10011000" // EQ r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12157 "00100111" // /* MW 3 */
+ 12158 "01100011" // /* MW 2 */
+ 12159 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 12160 "11100001" // NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12161 "00000000" // /* MW 15 */
+ 12162 "00000000" // /* MW 14 */
+ 12163 "01111000" // /* MW 13 */
+ 12164 "10100101" // /* MW 12 */
+ 12165 "00000001" // /* MW 11 */
+ 12166 "01101100" // /* MW 10 */
+ 12167 "00001000" // /* MW 9 */
+ 12168 "00100010" // /* MW 8 */
+ 12169 "01011011" // /* MW 7 */
+ 12170 "00000001" // /* MW 6 */
+ 12171 "00100000" // /* MW 5 */
+ 12172 "00000000" // /* MW 4 */
+ 12173 "11110000" // /* MW 3 */
+ 12174 "00101100" // /* MW 2 */
+ 12175 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_512
+.src_ref 10 "softfloat.c" 834 8 first
+ 12176 "10000100" // JNZ r16, #12368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12368 delay_slots=5 */
+ 12177 "00000001" // /* MW 5 */
+ 12178 "01000000" // /* MW 4 */
+ 12179 "00101000" // /* MW 3 */
+ 12180 "00011000" // /* MW 2 */
+ 12181 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12183 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12185 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12187 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12189 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12191 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31
+ 12192 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12193 "01111101" // /* MW 3 */
+ 12194 "00100000" // /* MW 2 */
+ 12195 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 835 8 first
+ 12196 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12197 "00000000" // /* MW 3 */
+ 12198 "00101000" // /* MW 2 */
+ 12199 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 12200 "10011000" // LSHL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12201 "00001101" // /* MW 3 */
+ 12202 "11100001" // /* MW 2 */
+ 12203 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 12204 "01000100" // MOVXM r17, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12205 "00000000" // /* MW 5 */
+ 12206 "10100000" // /* MW 4 */
+ 12207 "00001000" // /* MW 3 */
+ 12208 "10000000" // /* MW 2 */
+ 12209 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 12210 "10011000" // ADD r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12211 "00000000" // /* MW 3 */
+ 12212 "01000001" // /* MW 2 */
+ 12213 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12215 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12216 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 12217 "00011100" // /* MW 7 */
+ 12218 "00000000" // /* MW 6 */
+ 12219 "00000000" // /* MW 5 */
+ 12220 "00000100" // /* MW 4 */
+ 12221 "11110000" // /* MW 3 */
+ 12222 "00101100" // /* MW 2 */
+ 12223 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_560
+.src_ref 10 "softfloat.c" 852 8 first
+ 12224 "10000100" // JNZ r17, #12384 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12384 delay_slots=5 */
+ 12225 "00000001" // /* MW 5 */
+ 12226 "01000000" // /* MW 4 */
+ 12227 "00110000" // /* MW 3 */
+ 12228 "00011000" // /* MW 2 */
+ 12229 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12233 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12235 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12237 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12239 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 853 8 first
+ 12240 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12241 "00000000" // /* MW 3 */
+ 12242 "00101000" // /* MW 2 */
+ 12243 "00010000" // /* MW 1 */
+.delay_slot
+ 12244 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12245 "10100000" // /* MW 3 */
+ 12246 "00010000" // /* MW 2 */
+ 12247 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12249 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12251 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12255 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_592
+.src_ref 10 "softfloat.c" 821 18 first
+ 12256 "10011000" // OR r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12257 "00000101" // /* MW 3 */
+ 12258 "01100001" // /* MW 2 */
+ 12259 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 821 8
+ 12260 "10000100" // JNZ r16, #12400 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12400 delay_slots=5 */
+ 12261 "00000001" // /* MW 5 */
+ 12262 "01000000" // /* MW 4 */
+ 12263 "00111000" // /* MW 3 */
+ 12264 "00011000" // /* MW 2 */
+ 12265 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12267 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12269 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12271 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12273 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12275 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 823 8 first
+ 12276 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12277 "00000000" // /* MW 3 */
+ 12278 "00101000" // /* MW 2 */
+ 12279 "00010000" // /* MW 1 */
+.delay_slot
+ 12280 "01000100" // MOVXM r0, #2147483647 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12281 "11111110" // /* MW 5 */
+ 12282 "00111111" // /* MW 4 */
+ 12283 "11110000" // /* MW 3 */
+ 12284 "11111111" // /* MW 2 */
+ 12285 "01111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12287 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12289 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12292 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12293 "10000001" // /* MW 11 */
+ 12294 "10101101" // /* MW 10 */
+ 12295 "00000000" // /* MW 9 */
+ 12296 "00000000" // /* MW 8 */
+ 12297 "00000000" // /* MW 7 */
+ 12298 "00000000" // /* MW 6 */
+ 12299 "00100000" // /* MW 5 */
+ 12300 "00000000" // /* MW 4 */
+ 12301 "11110000" // /* MW 3 */
+ 12302 "00101100" // /* MW 2 */
+ 12303 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_640
+ 12304 "10000100" // J #12016 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12016 delay_slots=5 */
+ 12305 "00000000" // /* MW 5 */
+ 12306 "00000000" // /* MW 4 */
+ 12307 "01111000" // /* MW 3 */
+ 12308 "00010111" // /* MW 2 */
+ 12309 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+.delay_slot
+ 12310 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12311 "10100000" // /* MW 3 */
+ 12312 "01010001" // /* MW 2 */
+ 12313 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 864 16
+.delay_slot
+ 12314 "11111000" // MOV r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12315 "10100000" // /* MW 3 */
+ 12316 "11011000" // /* MW 2 */
+ 12317 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12319 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12321 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12322 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12323 "00011100" // /* MW 13 */
+ 12324 "00000000" // /* MW 12 */
+ 12325 "00000000" // /* MW 11 */
+ 12326 "01010111" // /* MW 10 */
+ 12327 "00011010" // /* MW 9 */
+ 12328 "01000000" // /* MW 8 */
+ 12329 "00000000" // /* MW 7 */
+ 12330 "00000000" // /* MW 6 */
+ 12331 "10110110" // /* MW 5 */
+ 12332 "00000010" // /* MW 4 */
+ 12333 "11110000" // /* MW 3 */
+ 12334 "00101100" // /* MW 2 */
+ 12335 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_672
+ 12336 "10000100" // J #11872 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11872 delay_slots=5 */
+ 12337 "00000000" // /* MW 5 */
+ 12338 "00000000" // /* MW 4 */
+ 12339 "00110000" // /* MW 3 */
+ 12340 "00010111" // /* MW 2 */
+ 12341 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 846 16
+.delay_slot
+ 12342 "11111000" // MOV r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12343 "00100000" // /* MW 3 */
+ 12344 "00011000" // /* MW 2 */
+ 12345 "00011001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+.delay_slot
+ 12346 "11111000" // MOV r25, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12347 "00100000" // /* MW 3 */
+ 12348 "01011100" // /* MW 2 */
+ 12349 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+.delay_slot
+ 12350 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12351 "10100000" // /* MW 3 */
+ 12352 "01010011" // /* MW 2 */
+ 12353 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12355 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12356 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12357 "10000001" // /* MW 11 */
+ 12358 "10101101" // /* MW 10 */
+ 12359 "00000000" // /* MW 9 */
+ 12360 "00000000" // /* MW 8 */
+ 12361 "00000000" // /* MW 7 */
+ 12362 "00000000" // /* MW 6 */
+ 12363 "00100000" // /* MW 5 */
+ 12364 "00000000" // /* MW 4 */
+ 12365 "11110000" // /* MW 3 */
+ 12366 "00101100" // /* MW 2 */
+ 12367 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_704
+.src_ref 10 "softfloat.c" 834 27 first
+.tail_call
+ 12368 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12369 "00000000" // /* MW 5 */
+ 12370 "00000000" // /* MW 4 */
+ 12371 "01000000" // /* MW 3 */
+ 12372 "00010100" // /* MW 2 */
+ 12373 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12375 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12377 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12379 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12383 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_720
+.src_ref 10 "softfloat.c" 852 27 first
+.tail_call
+.return_address
+ 12384 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12385 "00000000" // /* MW 5 */
+ 12386 "00000000" // /* MW 4 */
+ 12387 "01000000" // /* MW 3 */
+ 12388 "00010100" // /* MW 2 */
+ 12389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12391 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12395 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12397 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12399 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_736
+.src_ref 10 "softfloat.c" 821 34 first
+.tail_call
+.return_address
+ 12400 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12401 "00000000" // /* MW 5 */
+ 12402 "00000000" // /* MW 4 */
+ 12403 "01000000" // /* MW 3 */
+ 12404 "00010100" // /* MW 2 */
+ 12405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL14subFloat32Sigsjji__end
+ 12415 "00000000" // /* MW 1 */
+.label float32_add
+.function float32_add float32_add
+.src_ref 10 "softfloat.c" 92 12
+.src_ref 10 "softfloat.c" 878 first
+.function_start
+ 12416 "00011000" // MOVX r16, #-31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12417 "10000101" // /* MW 3 */
+ 12418 "11100000" // /* MW 2 */
+ 12419 "00010111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 92 12 first
+ 12420 "10011000" // LSHL r3, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12421 "00001101" // /* MW 3 */
+ 12422 "01000111" // /* MW 2 */
+ 12423 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 92 12
+ 12424 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12425 "00001101" // /* MW 3 */
+ 12426 "10100001" // /* MW 2 */
+ 12427 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 884 15 first
+ 12428 "10011000" // EQ r16, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12429 "00000111" // /* MW 3 */
+ 12430 "11100001" // /* MW 2 */
+ 12431 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 884 4
+ 12432 "10000100" // JNZ r16, #12464 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12464 delay_slots=5 */
+ 12433 "00000001" // /* MW 5 */
+ 12434 "01000000" // /* MW 4 */
+ 12435 "01011000" // /* MW 3 */
+ 12436 "00011000" // /* MW 2 */
+ 12437 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12447 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 888 15 first
+.tail_call
+ 12448 "10000100" // J #11664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11664 delay_slots=5 */
+ 12449 "00000000" // /* MW 5 */
+ 12450 "00000000" // /* MW 4 */
+ 12451 "11001000" // /* MW 3 */
+ 12452 "00010110" // /* MW 2 */
+ 12453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12455 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12457 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12459 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12461 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12463 "00000000" // /* MW 1 */
+.label TGT_Ffloat32_add_48
+.src_ref 10 "softfloat.c" 885 15 first
+.tail_call
+.return_address
+ 12464 "10000100" // J #11040 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */
+ 12465 "00000000" // /* MW 5 */
+ 12466 "00000000" // /* MW 4 */
+ 12467 "10010000" // /* MW 3 */
+ 12468 "00010101" // /* MW 2 */
+ 12469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12475 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label float32_add__end
+ 12479 "00000000" // /* MW 1 */
+.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src"
+.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer"
+.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common"
+.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc"
+.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail"
+.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2"
+.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib"
+.dir 7 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p"
+.dir 8 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend"
+.dir 9 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib"
+.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/softfloat"
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.cmico
new file mode 100644
index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.cmico
@@ -0,0 +1 @@
++Mdec
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.lst
new file mode 100644
index 0000000000000000000000000000000000000000..da538ba51f010cb935d6faf7c98cc539440d5b5d
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.lst
@@ -0,0 +1,4815 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me
+
+// Release: ipp V-2024.06-TGT-241219
+
+.text_segment PM 2352
+.entry_point
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function_start
+ 2352 0x00 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p0]; MOV r0, r15
+ 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 2364 0xff 0x73 0xb0 0x01 0xe8 0x50 0x70 0x02 ST p7, [sp, #-8]; MOV r15, r1
+ 2372 0xff 0x82 0xb0 0x1f 0xa7 0x83 0xb0 0x60 0x79 0x3a ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0
+ 2382 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+ 2386 0x00 0x00 NOPX
+ 2388 0x00 0x00 NOPX
+ 2390 0x18 0x68 0x02 0x18 ADD.NC p0, r16, #4
+ 2394 0x00 0x1e 0x16 0x98 LDA r16, [p0], #4
+ 2398 0x00 0x3e 0x56 0x98 LDA r18, [p0], #12
+ 2402 0x00 0xee 0x36 0x98 LDA r17, [p0], #-8
+ 2406 0x00 0x07 0x76 0x98 LDA r27, [p0]
+ 2410 0x00 0x00 NOPX
+ 2412 0x00 0x00 NOPX
+ 2414 0x00 0x00 NOPX
+ 2416 0x00 0x00 NOPX
+ 2418 0x00 0x00 NOPX
+ 2420 0x00 0x00 NOPX
+ 2422 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27
+ 2426 0x08 0xd6 0x11 0x98 ST r16, [p0, #-12]
+ 2430 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 2434 0x00 0x00 NOPX
+ 2436 0x00 0x00 NOPX
+ 2438 0x00 0x00 NOPX
+ 2440 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26
+ 2444 0x00 0x00 NOPX
+ 2446 0x00 0x00 NOPX
+ 2448 0x00 0x00 NOPX
+ 2450 0x07 0x2c 0x1e 0x98 LDA p0, [p7], #8
+ 2454 0x07 0xfc 0x9e 0x98 LDA p1, [p7], #-4
+ 2458 0x07 0x05 0x1e 0x98 LDA p2, [p7]
+.no_stack_arguments
+ 2462 0x00 0x0e 0xb8 0x00 0x01 0x04 JL #7536
+.delay_slot
+ 2468 0x0f 0xf3 0x55 0x98 ST r26, [sp, #-16]
+.delay_slot
+.swstall delay_slot
+ 2472 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2474 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2476 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2478 0x00 0x00 NOPX
+.return_address
+ 2480 0x07 0xf6 0x16 0x98 LDA r16, [p7, #-4]
+ 2484 0x07 0xf3 0x51 0x18 LDA r26, [sp, #-16]
+ 2488 0x00 0x00 NOPX
+ 2490 0x00 0x00 NOPX
+ 2492 0x00 0x00 NOPX
+ 2494 0x00 0x00 NOPX
+ 2496 0x00 0x00 NOPX
+ 2498 0x18 0x68 0x08 0x18 ADD.NC p0, r16, #16
+ 2502 0x00 0x06 0x16 0x98 LDA r16, [p0]
+ 2506 0x10 0x22 0x05 0x18 MOVX r17, #1
+ 2510 0x00 0x00 NOPX
+ 2512 0x00 0x00 NOPX
+ 2514 0x00 0x00 NOPX
+ 2516 0x00 0x00 NOPX
+ 2518 0x00 0x00 NOPX
+ 2520 0x14 0x15 0x18 0x18 REL.COND r16, r17, r26
+ 2524 0xfe 0x87 0x2d 0xaf 0x41 0xd4 LDA lr, [sp, #-12]; MOV r27, r15
+ 2530 0x00 0xf6 0x16 0x98 LDA r16, [p0, #-4]
+ 2534 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+ 2538 0x00 0x00 NOPX
+ 2540 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+ 2544 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+ 2550 0x00 0x00 NOPX
+ 2552 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 2556 0x14 0x63 0x01 0x98 SUB r17, r17, r16
+.delay_slot
+ 2560 0x14 0x21 0x12 0x18 SEL.EQZ r16, r16, r17, r27
+.delay_slot
+ 2564 0x08 0xf6 0x11 0x98 ST r16, [p0, #-4]
+.delay_slot
+.swstall delay_slot
+ 2568 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2570 0x00 0x00 NOPX
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+
+.text_segment PM 2576
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.function_start
+ 2576 0x23 0x8e 0xd3 0x80 0x8b 0x3e 0x67 0x68 0x09 0x60 0x78 0x76 LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1
+ 2588 0x02 0x07 0x00 0x3e 0x25 0x09 0x30 0x07 0x08 0xba MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28
+ 2598 0x00 0x7e 0x00 0x3e 0x17 0xa8 0x08 0x60 0x78 0xba MOVA r30, #3; MOVX r1, #-3; MOV r0, p0
+ 2608 0xff 0xe5 0x00 0x00 0x00 0x3c 0x8f 0xfc 0x10 0xba MOVA r5, #-1; MOVXM r4, #65528
+ 2618 0xff 0x90 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r16, #-4; PADDXM [sp], #64
+ 2628 0x1c 0x60 0x17 0x18 ADD.NC p4, r0, #46
+ 2632 0x00 0x00 NOPX
+ 2634 0x08 0x1c 0x71 0x98 ST r3, [p0], #4
+ 2638 0x01 0x1f 0x56 0x98 LDA r26, [p1], #4
+ 2642 0x00 0x00 NOPX
+ 2644 0x00 0x00 NOPX
+ 2646 0x00 0x00 NOPX
+ 2648 0x00 0x00 NOPX
+ 2650 0x00 0x00 NOPX
+ 2652 0x00 0x00 NOPX
+ 2654 0x03 0xea 0x3d 0x44 0x89 0x5c ST r26, [p0], #4; AND r17, r26, r4
+ 2660 0x23 0xf6 0xd0 0x06 0x4d 0x7e 0xcc 0x48 0xa8 0xba LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4
+ 2670 0x16 0xa4 0x6d 0x98 LSHL r18, r26, r6
+ 2674 0x11 0x0c 0x1d 0x98 LSHL r6, r4, r1
+ 2678 0xd4 0x43 0xb0 0xb2 0xff 0x24 LSHL r17, r26, r1; ADD.NC r1, r18, #-1
+ 2684 0x00 0x00 NOPX
+ 2686 0x00 0x00 NOPX
+ 2688 0x00 0x00 NOPX
+ 2690 0x03 0xf6 0x3e 0x9c 0x4c 0x5c ST r29, [p0], #4; MAC r7, r7, r29, r2
+ 2696 0x23 0x8a 0xd7 0xff 0xb5 0x80 0x07 0x49 0xaf 0xfa LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26
+ 2706 0x10 0xe9 0xdf 0x98 MUL r20, r3, r29
+ 2710 0x10 0xf8 0x4f 0x98 MUL r28, r3, r4
+ 2714 0x17 0x6b 0xed 0x98 LSHL r21, r29, r30
+ 2718 0xec 0x8b 0xbd 0xb5 0xd0 0x24 LSHL r18, r29, r5; ADD.NC r27, r21, #-48
+ 2724 0x14 0xaf 0xff 0x18 ADD r23, r18, #-1
+ 2728 0x17 0x7b 0x6f 0x98 MUL r29, r29, r22
+ 2732 0x03 0x8a 0x3f 0x60 0x55 0x5c ST r2, [p0], #4; LT r24, r30, r2
+ 2738 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 2742 0x00 0x00 NOPX
+ 2744 0x00 0x00 NOPX
+ 2746 0x00 0x00 NOPX
+ 2748 0x00 0x00 NOPX
+ 2750 0x00 0x00 NOPX
+ 2752 0x00 0x00 NOPX
+ 2754 0x03 0x85 0x30 0x03 0xf0 0x0e 0x70 0x02 ST el0, [p0], #4; MOV r31, el0
+ 2762 0x01 0x04 0x0e 0x98 LDA eh0, [p1]
+ 2766 0x00 0x00 NOPX
+ 2768 0x00 0x00 NOPX
+ 2770 0x00 0x00 NOPX
+ 2772 0x00 0x00 NOPX
+ 2774 0x00 0x00 NOPX
+ 2776 0x00 0x00 NOPX
+ 2778 0x00 0x81 0x30 0x03 0x30 0x8e 0x70 0x02 ST eh0, [p0]; MOV r25, eh0
+ 2786 0x01 0x17 0xd6 0x98 LDA r30, [p1, #4]
+ 2790 0x00 0x00 NOPX
+ 2792 0xc0 0x05 0xb0 0x40 0x01 0x84 JNZ r24, #2912
+.delay_slot
+ 2798 0x17 0x27 0x0d 0x98 LSHL r19, r28, r16
+.delay_slot
+ 2802 0x17 0xf3 0x9f 0x98 MUL r25, r31, r25
+.delay_slot
+ 2806 0xa5 0x0b 0xb2 0xb1 0xff 0x24 LSHL r20, r20, r5; ADD.NC r5, r17, #-1
+.delay_slot
+ 2812 0x11 0x21 0x0d 0x98 LSHL r16, r4, r16
+.delay_slot
+ 2816 0x02 0xfa 0x3c 0xff 0xdf 0x5c ST r30, [p0, #4]; MUL r31, r25, r30
+ 2822 0x10 0x38 0x05 0x18 MOVX r28, #1
+ 2826 0x10 0xb9 0xc7 0x98 EQ r28, r2, r28
+ 2830 0xe0 0x07 0xe0 0x40 0x01 0x84 JNZ r28, #4032
+.delay_slot
+.swstall delay_slot
+ 2836 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2838 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2840 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2842 0x00 0x00 NOPX
+.delay_slot
+ 2844 0x10 0xed 0xff 0x18 ADD r22, r3, #-1
+ 2848 0x10 0x22 0x09 0x18 MOVX r17, #2
+ 2852 0x14 0x62 0x27 0x98 EQ r17, r17, r2
+ 2856 0x88 0x07 0xa0 0x40 0x01 0x84 JNZ r17, #3904
+.delay_slot
+.swstall delay_slot
+ 2862 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2864 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2866 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2868 0x00 0x00 NOPX
+.delay_slot
+ 2870 0x10 0x0e 0x0d 0x18 MOVX r7, #3
+ 2874 0x11 0xc4 0x27 0x98 EQ r2, r7, r2
+ 2878 0x10 0x07 0x50 0x40 0x01 0x84 JNZ r2, #3744
+.delay_slot
+.swstall delay_slot
+ 2884 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2886 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2888 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2890 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2892 0x00 0x00 NOPX
+ 2894 0x00 0x06 0xf0 0x00 0x00 0x84 J #3552
+.delay_slot
+ 2900 0x10 0x34 0x11 0x18 MOVX r26, #4
+.delay_slot
+.swstall delay_slot
+ 2904 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2906 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2908 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2910 0x00 0x00 NOPX
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336
+ 2912 0x10 0x3a 0x15 0x18 MOVX r29, #5
+ 2916 0x17 0x70 0x2a 0x98 LT r24, r29, r2
+ 2920 0xc0 0x06 0x50 0x40 0x01 0x84 JNZ r24, #3232
+.delay_slot
+.swstall delay_slot
+ 2926 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2928 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2930 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2932 0x00 0x00 NOPX
+.delay_slot
+ 2934 0x10 0x34 0x11 0x18 MOVX r26, #4
+ 2938 0x16 0xa2 0x27 0x98 EQ r17, r26, r2
+ 2942 0x88 0x06 0x10 0x40 0x01 0x84 JNZ r17, #3104
+.delay_slot
+.swstall delay_slot
+ 2948 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2950 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2952 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2954 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2956 0x00 0x00 NOPX
+ 2958 0x17 0x44 0x28 0x98 NE r2, r29, r2
+ 2962 0x10 0x06 0xf0 0x40 0x01 0x84 JNZ r2, #3552
+.delay_slot
+.swstall delay_slot
+ 2968 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2970 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2972 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2974 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2976 0x00 0x00 NOPX
+ 2978 0x83 0xd6 0xe0 0x00 0x22 0x08 0x07 0xec 0x58 0xba ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20
+ 2988 0x1f 0x9c 0xa0 0xf8 MOV r30, r25
+ 2992 0x00 0x00 NOPX
+ 2994 0x00 0x00 NOPX
+ 2996 0x00 0x00 NOPX
+ 2998 0x00 0x00 NOPX
+ 3000 0x00 0x00 NOPX
+ 3002 0x04 0x1c 0xf7 0x18 ST.s16 r7, [p4], #2
+ 3006 0x00 0x00 NOPX
+ 3008 0x00 0x00 NOPX
+ 3010 0x00 0x00 NOPX
+ 3012 0x00 0x00 NOPX
+ 3014 0x00 0x00 NOPX
+ 3016 0x00 0x00 NOPX
+ 3018 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2
+ 3022 0x00 0x00 NOPX
+ 3024 0x00 0x00 NOPX
+ 3026 0x00 0x00 NOPX
+ 3028 0x00 0x00 NOPX
+ 3030 0x00 0x00 NOPX
+ 3032 0x00 0x00 NOPX
+ 3034 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+ 3038 0x00 0x00 NOPX
+ 3040 0x00 0x00 NOPX
+ 3042 0x00 0x00 NOPX
+ 3044 0x00 0x00 NOPX
+ 3046 0x00 0x00 NOPX
+ 3048 0x00 0x00 NOPX
+ 3050 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2
+ 3054 0x00 0x00 NOPX
+ 3056 0x00 0x00 NOPX
+ 3058 0x00 0x00 NOPX
+ 3060 0x00 0x00 NOPX
+ 3062 0x00 0x00 NOPX
+ 3064 0x00 0x00 NOPX
+ 3066 0x04 0x08 0x57 0x18 ST.s16 r2, [p4], m0
+ 3070 0x00 0x00 NOPX
+ 3072 0x00 0x00 NOPX
+ 3074 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+.swstall delay_slot
+ 3080 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3082 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3084 0x00 0x00 NOPX
+.delay_slot
+ 3086 0x0c 0x06 0x51 0x98 ST r18, [p4]
+.delay_slot
+ 3090 0x00 0x2c 0xf8 0x29 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r6, [p4, #4]; NOPM; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528
+ 3104 0x83 0x92 0xe0 0x3e 0x67 0xa8 0x48 0x10 0x58 0xba ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16
+ 3114 0xfd 0x80 0x80 0x0c 0x22 0x33 0xd0 0x0e 0x78 0xba MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0
+ 3124 0x00 0x00 NOPX
+ 3126 0x00 0x00 NOPX
+ 3128 0x00 0x00 NOPX
+ 3130 0x00 0x00 NOPX
+ 3132 0x00 0x00 NOPX
+ 3134 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+ 3138 0x00 0x00 NOPX
+ 3140 0x00 0x00 NOPX
+ 3142 0x00 0x00 NOPX
+ 3144 0x00 0x00 NOPX
+ 3146 0x00 0x00 NOPX
+ 3148 0x00 0x00 NOPX
+ 3150 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2
+ 3154 0x00 0x00 NOPX
+ 3156 0x00 0x00 NOPX
+ 3158 0x00 0x00 NOPX
+ 3160 0x00 0x00 NOPX
+ 3162 0x00 0x00 NOPX
+ 3164 0x00 0x00 NOPX
+ 3166 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+ 3170 0x00 0x00 NOPX
+ 3172 0x00 0x00 NOPX
+ 3174 0x00 0x00 NOPX
+ 3176 0x00 0x00 NOPX
+ 3178 0x00 0x00 NOPX
+ 3180 0x00 0x00 NOPX
+ 3182 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2
+ 3186 0x00 0x00 NOPX
+ 3188 0x00 0x00 NOPX
+ 3190 0x00 0x00 NOPX
+ 3192 0x00 0x00 NOPX
+ 3194 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3196 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3198 0x04 0x08 0x37 0x18 ST.s16 r1, [p4], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3202 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3204 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3206 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3212 0x10 0x02 0x41 0x18 MOVX r1, #16
+.delay_slot
+.swstall delay_slot
+ 3216 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3218 0x00 0x00 NOPX
+.delay_slot
+ 3220 0x0c 0x14 0x71 0x98 ST r3, [p4, #4]
+.delay_slot
+ 3224 0x80 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p4]; NOPM
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656
+ 3232 0xff 0x8e 0x20 0x10 0x32 0x2c LDA r3, [sp, #-4]; MOVX r4, #6
+ 3238 0x10 0x88 0x47 0x98 EQ r4, r2, r4
+ 3242 0x20 0x06 0xa8 0x40 0x01 0x84 JNZ r4, #3408
+.delay_slot
+ 3248 0x10 0x02 0x41 0x18 MOVX r1, #16
+.delay_slot
+.swstall delay_slot
+ 3252 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3254 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3256 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3258 0x00 0x00 NOPX
+ 3260 0x10 0x06 0x1d 0x18 MOVX r3, #7
+ 3264 0x10 0xc4 0x28 0x98 NE r2, r3, r2
+ 3268 0x10 0x06 0xf0 0x40 0x01 0x84 JNZ r2, #3552
+.delay_slot
+.swstall delay_slot
+ 3274 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3276 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3278 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3280 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3282 0x00 0x00 NOPX
+ 3284 0x83 0x86 0xe0 0x26 0x2f 0xf8 0x07 0xec 0x58 0xba ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20
+ 3294 0xff 0x43 0x00 0x00 0x00 0x40 0x40 0x00 0x10 0xba MOVA r3, #-6; MOVXM dj0, #65536
+ 3304 0xe0 0xc7 0xbc 0x20 0x01 0x64 LSHL r3, r28, r3; MOV r24, #0
+ 3310 0x00 0x00 NOPX
+ 3312 0x00 0x00 NOPX
+ 3314 0x00 0x00 NOPX
+ 3316 0x00 0x00 NOPX
+ 3318 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2
+ 3322 0x00 0x00 NOPX
+ 3324 0x00 0x00 NOPX
+ 3326 0x00 0x00 NOPX
+ 3328 0x00 0x00 NOPX
+ 3330 0x00 0x00 NOPX
+ 3332 0x00 0x00 NOPX
+ 3334 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+ 3338 0x00 0x00 NOPX
+ 3340 0x00 0x00 NOPX
+ 3342 0x00 0x00 NOPX
+ 3344 0x00 0x00 NOPX
+ 3346 0x00 0x00 NOPX
+ 3348 0x00 0x00 NOPX
+ 3350 0x0c 0x1c 0x41 0x98 ST dj0, [p4], #4
+ 3354 0x04 0x0b 0x17 0x18 ST.s16 r24, [p4], m0
+ 3358 0x00 0x00 NOPX
+ 3360 0x00 0x00 NOPX
+ 3362 0x00 0x00 NOPX
+ 3364 0x00 0x00 NOPX
+ 3366 0x00 0x00 NOPX
+ 3368 0x00 0x00 NOPX
+ 3370 0x0c 0x07 0x51 0x98 ST r26, [p4]
+ 3374 0x0c 0x14 0x71 0x98 ST r3, [p4, #4]
+ 3378 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+ 3384 0x1f 0x9f 0xa0 0xf8 MOV r30, r31
+.delay_slot
+.swstall delay_slot
+ 3388 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3390 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3392 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3394 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832
+ 3408 0x83 0x86 0xe0 0x06 0x2b 0x70 0x48 0x10 0x58 0xba ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16
+ 3418 0xfd 0x80 0x80 0x3e 0x47 0xa8 0xd0 0x0e 0x78 0xba MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0
+ 3428 0x10 0xc8 0x4d 0x98 LSHL r4, r3, r4
+ 3432 0xf7 0x8d 0xf1 0xa4 0xff 0x24 MUL r30, r30, r6; ADD.NC r3, r4, #-1
+ 3438 0x00 0x00 NOPX
+ 3440 0x00 0x00 NOPX
+ 3442 0x00 0x00 NOPX
+ 3444 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2
+ 3448 0x00 0x00 NOPX
+ 3450 0x00 0x00 NOPX
+ 3452 0x00 0x00 NOPX
+ 3454 0x00 0x00 NOPX
+ 3456 0x00 0x00 NOPX
+ 3458 0x00 0x00 NOPX
+ 3460 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2
+ 3464 0x00 0x00 NOPX
+ 3466 0x00 0x00 NOPX
+ 3468 0x00 0x00 NOPX
+ 3470 0x00 0x00 NOPX
+ 3472 0x00 0x00 NOPX
+ 3474 0x00 0x00 NOPX
+ 3476 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+ 3480 0x00 0x00 NOPX
+ 3482 0x00 0x00 NOPX
+ 3484 0x00 0x00 NOPX
+ 3486 0x00 0x00 NOPX
+ 3488 0x00 0x00 NOPX
+ 3490 0x00 0x00 NOPX
+ 3492 0x04 0x1c 0x77 0x18 ST.s16 r3, [p4], #2
+ 3496 0x00 0x00 NOPX
+ 3498 0x00 0x00 NOPX
+ 3500 0x00 0x00 NOPX
+ 3502 0x00 0x00 NOPX
+ 3504 0x00 0x00 NOPX
+ 3506 0x00 0x00 NOPX
+ 3508 0x04 0x08 0x37 0x18 ST.s16 r1, [p4], m0
+ 3512 0x00 0x00 NOPX
+ 3514 0x00 0x00 NOPX
+ 3516 0x00 0x00 NOPX
+ 3518 0x00 0x00 NOPX
+ 3520 0x00 0x00 NOPX
+ 3522 0x00 0x00 NOPX
+ 3524 0x0c 0x06 0x31 0x98 ST r17, [p4]
+ 3528 0x82 0xd2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r20, [p4, #4]; NOPM
+.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ 3536 0x18 0x80 0x40 0xb8 MOV dj0, #32
+ 3540 0x60 0x7a 0xe0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX
+.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ 3552 0x03 0x08 0x80 0xc0 0x1e 0x14 MOVA m2, #24; ADD.NC p0, r0, #30
+ 3558 0x43 0x8a 0xd0 0x00 0x02 0x08 0x07 0xe2 0x58 0xba LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30
+ 3568 0x40 0x8e 0x52 0x00 0x99 0x54 LDA.s16 r3, [p2]; MOV m1, #38
+ 3574 0x02 0x14 0x36 0x98 LDA r1, [p2, #4]
+ 3578 0x00 0x00 NOPX
+ 3580 0x00 0x00 NOPX
+ 3582 0x00 0x2f 0xf7 0x18 ST.s16 r31, [p0], #4
+ 3586 0x00 0x00 NOPX
+ 3588 0x00 0x00 NOPX
+ 3590 0x00 0x00 NOPX
+ 3592 0x00 0x00 NOPX
+ 3594 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3596 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3598 0x00 0x5f 0x17 0x18 ST.s16 r24, [p0], #10
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3602 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3604 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3606 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3608 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3610 0x10 0x30 0x01 0x18 MOVX r24, #0
+ 3614 0x00 0x00 NOPX
+ 3616 0x00 0xcf 0x17 0x18 ST.s16 r24, [p0], #-8
+ 3620 0x00 0x48 0x9a 0x98 LDA.u16 r4, [p0], m2
+ 3624 0x00 0x00 NOPX
+ 3626 0x00 0x00 NOPX
+ 3628 0x00 0x00 NOPX
+ 3630 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3632 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3634 0x00 0xfc 0x17 0x18 ST.s16 r0, [p0], #-2
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3638 0x21 0x35 0xb2 0xa4 0xff 0x24 LSHL r4, r4, r26; ADD.NC r5, r4, #-1
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3644 0x10 0x00 0x41 0x98 SUB r0, r0, r4
+ 3648 0x00 0x00 NOPX
+ 3650 0x00 0x00 NOPX
+ 3652 0x00 0x00 NOPX
+ 3654 0x00 0x00 NOPX
+ 3656 0x00 0x08 0xb7 0x18 ST.s16 r5, [p0], m0
+ 3660 0x00 0x00 NOPX
+ 3662 0x00 0x00 NOPX
+ 3664 0x00 0x00 NOPX
+ 3666 0x00 0x00 NOPX
+ 3668 0x00 0x00 NOPX
+ 3670 0x00 0x00 NOPX
+ 3672 0x00 0x2a 0x77 0x18 ST.s16 r19, [p0], m1
+ 3676 0x00 0x00 NOPX
+ 3678 0x00 0x00 NOPX
+ 3680 0x00 0x00 NOPX
+ 3682 0x00 0x00 NOPX
+ 3684 0x00 0x00 NOPX
+ 3686 0x00 0x00 NOPX
+ 3688 0x00 0xec 0x47 0x18 ST.s8 r2, [p0], #-2
+ 3692 0x00 0x00 NOPX
+ 3694 0x00 0x00 NOPX
+ 3696 0x00 0x00 NOPX
+ 3698 0x00 0x00 NOPX
+ 3700 0x00 0x00 NOPX
+ 3702 0x00 0x00 NOPX
+ 3704 0x00 0x04 0x77 0x18 ST.s16 r3, [p0]
+ 3708 0x00 0x00 NOPX
+ 3710 0x00 0x00 NOPX
+ 3712 0x00 0x00 NOPX
+ 3714 0x00 0x00 NOPX
+ 3716 0x00 0x00 NOPX
+ 3718 0x00 0x00 NOPX
+ 3720 0x00 0xe4 0x27 0x18 ST.s8 r1, [p0, #-2]
+ 3724 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 3728 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 3734 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3736 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3738 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3740 0x00 0x01 0x67 0x98 NOPA
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168
+ 3744 0x83 0xd6 0xe0 0x00 0x00 0x3c 0xaf 0xf4 0x10 0xba ST.s16 r21, [p4], #2; MOVXM r5, #65512
+ 3754 0xff 0x8a 0x20 0x0a 0x7d 0x04 0x07 0xec 0x58 0xba LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20
+ 3764 0x00 0x9a 0x00 0x00 0x00 0x3c 0xcf 0xff 0x90 0xba MOVA r26, #4; MOVXM r6, #65535
+ 3774 0x10 0xe2 0x60 0x98 ADD r17, r3, r6
+ 3778 0x14 0x7a 0x46 0x18 MAC r29, r29, r17, r4
+ 3782 0x14 0x6a 0x4e 0x18 MSC r21, r21, r17, r4
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3786 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3788 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3792 0x11 0xc4 0x2f 0x98 MUL r2, r7, r2
+ 3796 0x00 0x00 NOPX
+ 3798 0x00 0x00 NOPX
+ 3800 0x00 0x00 NOPX
+ 3802 0x00 0x00 NOPX
+ 3804 0x00 0x00 NOPX
+ 3806 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2
+ 3810 0x00 0x00 NOPX
+ 3812 0x00 0x00 NOPX
+ 3814 0x00 0x00 NOPX
+ 3816 0x00 0x00 NOPX
+ 3818 0x00 0x00 NOPX
+ 3820 0x00 0x00 NOPX
+ 3822 0x04 0x1e 0xb7 0x18 ST.s16 r21, [p4], #2
+ 3826 0x00 0x00 NOPX
+ 3828 0x00 0x00 NOPX
+ 3830 0x00 0x00 NOPX
+ 3832 0x00 0x00 NOPX
+ 3834 0x00 0x00 NOPX
+ 3836 0x00 0x00 NOPX
+ 3838 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2
+ 3842 0x00 0x00 NOPX
+ 3844 0x00 0x00 NOPX
+ 3846 0x00 0x00 NOPX
+ 3848 0x00 0x00 NOPX
+ 3850 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3852 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3854 0x04 0x08 0x57 0x18 ST.s16 r2, [p4], m0
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3858 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3860 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3862 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3868 0x10 0x0a 0x41 0x18 MOVX r5, #16
+.delay_slot
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3872 0x11 0x45 0xd1 0x98 SUB r2, r5, r29
+.delay_slot
+ 3876 0x19 0xa1 0x1c 0xf8 MOV r6, eh0
+.delay_slot
+ 3880 0x80 0x8e 0x30 0x00 0x01 0xa5 0x70 0x02 ST r3, [p4]; NOPM
+.delay_slot
+ 3888 0x00 0x2c 0xf0 0x00 0x24 0x16 0x11 0xbd 0xe3 0x7c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328
+ 3904 0x83 0x92 0xe0 0x00 0x42 0x08 0x07 0xec 0x58 0xba ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20
+ 3914 0xff 0x86 0x20 0x06 0x2d 0x70 0x48 0x08 0x58 0xba LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8
+ 3924 0x00 0xc8 0x2d 0x20 0x11 0x64 MOVX r3, #16; MOV r26, #4
+ 3930 0x00 0x00 NOPX
+ 3932 0x00 0x00 NOPX
+ 3934 0x00 0x00 NOPX
+ 3936 0x00 0x00 NOPX
+ 3938 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2
+ 3942 0x00 0x00 NOPX
+ 3944 0x00 0x00 NOPX
+ 3946 0x00 0x00 NOPX
+ 3948 0x00 0x00 NOPX
+ 3950 0x00 0x00 NOPX
+ 3952 0x00 0x00 NOPX
+ 3954 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2
+ 3958 0x00 0x00 NOPX
+ 3960 0x00 0x00 NOPX
+ 3962 0x00 0x00 NOPX
+ 3964 0x00 0x00 NOPX
+ 3966 0x00 0x00 NOPX
+ 3968 0x00 0x00 NOPX
+ 3970 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2
+ 3974 0x00 0x00 NOPX
+ 3976 0x00 0x00 NOPX
+ 3978 0x00 0x00 NOPX
+ 3980 0x00 0x00 NOPX
+ 3982 0x00 0x00 NOPX
+ 3984 0x00 0x00 NOPX
+ 3986 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2
+ 3990 0x00 0x00 NOPX
+ 3992 0x00 0x00 NOPX
+ 3994 0x00 0x00 NOPX
+ 3996 0x00 0x00 NOPX
+ 3998 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 4000 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 4002 0x04 0x08 0x77 0x18 ST.s16 r3, [p4], m0
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4006 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4008 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4010 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4016 0x10 0x46 0x26 0x18 MAC r3, r3, r1, r2
+.delay_slot
+.swstall delay_slot
+ 4020 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4022 0x00 0x00 NOPX
+.delay_slot
+ 4024 0x0c 0x04 0xd1 0x98 ST r6, [p4]
+.delay_slot
+ 4028 0x0c 0x16 0x51 0x98 ST r18, [p4, #4]
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456
+ 4032 0x04 0x1e 0xb7 0x18 ST.s16 r21, [p4], #2
+ 4036 0x00 0x00 NOPX
+ 4038 0x00 0x00 NOPX
+ 4040 0x00 0x00 NOPX
+ 4042 0x00 0x00 NOPX
+ 4044 0x00 0x00 NOPX
+ 4046 0x00 0x00 NOPX
+ 4048 0x04 0x1c 0xf7 0x18 ST.s16 r7, [p4], #2
+ 4052 0x00 0x00 NOPX
+ 4054 0x00 0x00 NOPX
+ 4056 0x00 0x00 NOPX
+ 4058 0x00 0x00 NOPX
+ 4060 0x00 0x00 NOPX
+ 4062 0x00 0x00 NOPX
+ 4064 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2
+ 4068 0x00 0x00 NOPX
+ 4070 0x00 0x00 NOPX
+ 4072 0x00 0x00 NOPX
+ 4074 0x07 0xfc 0x71 0x18 LDA r3, [sp, #-4]
+ 4078 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 4080 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 4082 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4086 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4088 0x00 0x00 0xf0 0xbf 0xc0 0x44 MOVXM r1, #65504
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4094 0x10 0x45 0xa0 0x98 ADD r2, r1, r26
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4098 0x18 0x44 0xc0 0xa0 0x41 0x64 MAC r1, r1, r3, r2; MOV r1, #16
+ 4104 0x00 0x00 NOPX
+ 4106 0x00 0x00 NOPX
+ 4108 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2
+ 4112 0x00 0x00 NOPX
+ 4114 0x00 0x00 NOPX
+ 4116 0x00 0x00 NOPX
+ 4118 0x00 0x00 NOPX
+ 4120 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 4122 0x18 0x0f 0xd8 0xb8 MOV m0, #-20
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 4126 0x04 0x08 0xb7 0x18 ST.s16 r5, [p4], m0
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4130 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4132 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4134 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4140 0xb1 0x49 0xc2 0xa0 0x41 0x64 MSC r5, r5, r22, r4; MOV r5, #16
+.delay_slot
+.swstall delay_slot
+ 4146 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4148 0x00 0x00 NOPX
+.delay_slot
+ 4150 0x0c 0x06 0x91 0x98 ST r20, [p4]
+.delay_slot
+ 4154 0x82 0xc6 0x30 0x01 0xa0 0x8b 0xd0 0x8e 0x79 0x3a ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0
+
+.text_segment PM 4176
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+.function_start
+ 4176 0x42 0x82 0xd0 0x3e 0x47 0xc8 0x87 0xe8 0x58 0xba LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24
+ 4186 0x45 0x86 0xd0 0x3e 0x27 0xaa 0x08 0x06 0x58 0xba LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6
+ 4196 0x4f 0x96 0xd0 0x01 0x80 0x08 0x68 0x60 0x78 0xba LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0
+ 4206 0x02 0x2a 0x52 0x98 LDA.s16 r18, [p2], m1
+ 4210 0x02 0x1c 0xd6 0x98 LDA r6, [p2], #4
+ 4214 0x02 0x2c 0xf6 0x98 LDA r7, [p2], #8
+ 4218 0x02 0x06 0x36 0x98 LDA r17, [p2]
+ 4222 0x10 0x26 0x4e 0x98 ASHL r19, r0, r4
+ 4226 0x02 0x24 0x96 0x98 LDA r4, [p2, #8]
+ 4230 0x11 0x68 0x2e 0x98 ASHL r20, r5, r2
+ 4234 0x18 0x49 0x72 0xf8 VBCST.16 x0, r18
+ 4238 0x00 0x00 NOPX
+ 4240 0x14 0xe5 0x4f 0x98 MUL r18, r19, r20
+ 4244 0x10 0x67 0x11 0x98 SUB r19, r1, r17
+ 4248 0x14 0xe7 0x2f 0x98 MUL r19, r19, r18
+ 4252 0x14 0x63 0x2f 0x98 MUL r17, r17, r18
+ 4256 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+ 4260 0xc4 0x23 0x34 0xc3 0x82 0xa4 GE r16, r24, r17; ADD.NC p2, r3, r16
+ 4266 0x80 0x08 0xa0 0x40 0x01 0x84 JNZ r16, #4416
+.delay_slot
+ 4272 0x18 0x00 0x92 0xf8 VMOV bmll0, x0
+.delay_slot
+.swstall delay_slot
+ 4276 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4278 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4280 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4282 0x00 0x00 NOPX
+ 4284 0x00 0x00 0x11 0xe2 0x60 0x44 MOVXM ls, #4400
+ 4290 0x00 0x00 0x16 0xe2 0x60 0x44 MOVXM le, #4400
+ 4296 0x00 0x2b 0x60 0x02 0xbc 0x50 0x70 0x02 NOPS; MOV lc, r17
+ 4304 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4336 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4352 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4384 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224
+.loop_nesting 1
+.begin_of_loop
+.end_of_loop
+ 4400 0x00 0x2c 0xf0 0x00 0x22 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240
+.loop_nesting 0
+ 4416 0x00 0x86 0x00 0x0b 0x00 0xfe 0x29 0xcc 0xa8 0xba MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6
+ 4426 0x04 0x62 0x32 0x87 0xff 0x24 SUB r17, r0, r17; ADD.NC dn1, r7, #-1
+ 4432 0x14 0x62 0x6d 0x98 LSHL r17, r17, r6
+ 4436 0x04 0x4e 0x32 0x11 0x10 0x24 SUB r17, r0, r7; ADD.NC m1, r17, #16
+ 4442 0x11 0xe1 0x0f 0x98 MUL r16, r7, r16
+ 4446 0x14 0x4c 0x6d 0x98 LSHL r6, r17, r6
+ 4450 0x81 0x85 0xd4 0xc3 0x32 0xa4 ASHL r6, r16, r2; ADD.NC p2, r3, r6
+ 4456 0x16 0x0e 0x69 0x98 GE r7, r24, r6
+ 4460 0x38 0x09 0x08 0x40 0x01 0x84 JNZ r7, #4624
+.delay_slot
+.swstall delay_slot
+ 4466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4468 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4470 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4472 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4474 0x00 0x00 NOPX
+ 4476 0x00 0x07 0x80 0x00 0x00 0x04 0x79 0x00 0x10 0xba MOVA dc1, #0; MOVXM ls, #4608
+ 4486 0x02 0x06 0x80 0x00 0x00 0x05 0xb9 0x00 0x10 0xba MOVA dj1, #16; MOVXM le, #4608
+ 4496 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb9 0x90 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV
+ 4512 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4528 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4544 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4560 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4576 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4592 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432
+.loop_nesting 1
+.begin_of_loop
+.end_of_loop
+ 4608 0x00 0x2c 0xf0 0x00 0x22 0x30 0x2e 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448
+.loop_nesting 0
+ 4624 0x7f 0xff 0xf3 0xbf 0xf0 0x44 MOVXM r7, #2147483640
+ 4630 0x11 0xce 0x44 0x98 AND r7, r7, r4
+ 4634 0x11 0x4e 0x71 0x98 SUB r7, r5, r7
+ 4638 0x11 0xce 0x0f 0x98 MUL r7, r7, r0
+ 4642 0x11 0x04 0x2e 0x98 ASHL r2, r4, r2
+ 4646 0x11 0x48 0x41 0x98 SUB r4, r5, r4
+ 4650 0x10 0x84 0x0f 0x98 MUL r2, r2, r0
+ 4654 0x11 0x00 0x0f 0x98 MUL r0, r4, r0
+ 4658 0x08 0x45 0xf3 0x20 0x05 0x64 MUL r1, r1, r2; MOV r6, #1
+ 4664 0x10 0x00 0x6d 0x98 LSHL r0, r0, r6
+ 4668 0xc0 0x03 0x34 0xc3 0x02 0xa4 GE r0, r24, r1; ADD.NC p2, r3, r0
+ 4674 0x00 0x09 0x70 0x40 0x01 0x84 JNZ r0, #4832
+.delay_slot
+.swstall delay_slot
+ 4680 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4682 0x00 0x00 NOPX
+.delay_slot
+ 4684 0x11 0xc8 0x6d 0x98 LSHL r4, r7, r6
+.delay_slot
+ 4688 0x18 0x02 0x08 0x18 ADD.NC m0, r4, #16
+.delay_slot
+ 4692 0x18 0x41 0x7f 0x98 ADD.NC dn0, r2, #-1
+ 4696 0x00 0x03 0x80 0x00 0x00 0x04 0x79 0x68 0x10 0xba MOVA dc0, #0; MOVXM ls, #4816
+ 4706 0x02 0x02 0x80 0x00 0x00 0x05 0xb9 0x68 0x10 0xba MOVA dj0, #16; MOVXM le, #4816
+ 4716 0x1d 0x70 0xa0 0xf8 MOV lc, r1
+ 4720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4784 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4800 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640
+.loop_nesting 1
+.begin_of_loop
+.end_of_loop
+ 4816 0x00 0x2c 0xf0 0x00 0x22 0x10 0x2e 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656
+.loop_nesting 0
+ 4832 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+ 4836 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4838 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4840 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4842 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4844 0x00 0x00 NOPX
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0
+
+.text_segment PM 4848
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.function_start
+ 4848 0x18 0xd4 0xc0 0xf8 MOV r3, p2
+ 4852 0x6b 0x91 0x60 0x03 0xb0 0xcb 0x00 0x02 MOVS dn3, p7; ADD.NC p7, r3, #44
+ 4860 0x07 0x8c 0x1a 0x98 LDA.u16 r0, [p7], #-16
+ 4864 0x00 0x00 NOPX
+ 4866 0x00 0x00 NOPX
+ 4868 0x00 0x00 NOPX
+ 4870 0x00 0x00 NOPX
+ 4872 0x00 0x00 NOPX
+ 4874 0x00 0x00 NOPX
+ 4876 0x00 0x09 0xf0 0x40 0x01 0x84 JNZ r0, #5088
+.delay_slot
+ 4882 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+ 4886 0x18 0xc2 0x72 0xf8 VBCST.32 x1, r16
+.delay_slot
+.swstall delay_slot
+ 4890 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4892 0x00 0x00 NOPX
+.delay_slot
+ 4894 0x00 0x20 0x00 0x00 0x01 0xc4 PADDXM [sp], #256
+ 4900 0x1a 0x80 0x48 0xb8 MOV dj2, #36
+ 4904 0x02 0x40 0x36 0x98 LDA r1, [p2, dj2]
+ 4908 0x00 0x00 NOPX
+ 4910 0x00 0x00 NOPX
+ 4912 0x00 0x00 NOPX
+ 4914 0x00 0x00 NOPX
+ 4916 0x00 0x00 NOPX
+ 4918 0x00 0x00 NOPX
+ 4920 0x14 0x04 0x19 0x98 GE r2, r16, r1
+ 4924 0x10 0x09 0xf0 0x40 0x01 0x84 JNZ r2, #5088
+.delay_slot
+ 4930 0x1a 0x02 0x92 0xf8 VMOV bmll2, x1
+.delay_slot
+.swstall delay_slot
+ 4934 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4936 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4938 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4940 0x00 0x00 NOPX
+ 4942 0x00 0x2c 0xf3 0x84 0x8b 0x00 0x00 0x04 0x79 0xe8 0x10 0x76 NOPA; MOVS p3, p1; MOVXM ls, #5072
+ 4954 0x00 0x00 0x16 0xe7 0xa0 0x44 MOVXM le, #5072
+ 4960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb8 0x50 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV
+ 4976 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4992 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 5008 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 5024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 5040 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 5056 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224
+.loop_nesting 1
+.begin_of_loop
+.end_of_loop
+ 5072 0x00 0x2c 0xf0 0x00 0x23 0x1d 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240
+.loop_nesting 0
+ 5088 0x1c 0x00 0x40 0xb8 MOV m4, #32
+ 5092 0x07 0x8a 0x3a 0x98 LDA.u16 r17, [p7], m4
+ 5096 0xff 0xda 0x5a 0x1f 0x19 0x54 LDA.s16 r22, [p7], #-2; MOV m5, #-58
+ 5102 0xf5 0x6b 0x51 0x00 0xb9 0x54 LDA.u16 r26, [p7], m5; MOV dj0, #46
+ 5108 0xe0 0x52 0x59 0xbd 0x81 0xd4 LDA.s16 r20, [p7, dj0]; MOV r19, p7
+ 5114 0xe0 0x4e 0x56 0xd3 0x38 0x14 LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56
+ 5120 0x03 0xde 0xb2 0x98 LDA.s16 r21, [p3], #-6
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 5124 0x03 0xff 0x9a 0x98 LDA.u16 r28, [p3], #-2
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 5128 0x67 0xc6 0x50 0x1c 0x12 0x2c LDA.s16 r17, [p3], #6; MOVX r7, #2
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5134 0x7e 0xca 0x50 0x3f 0x27 0xca 0x60 0x00 0x58 0xba LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5144 0xe0 0x1e 0x52 0x10 0x4b 0x23 0x29 0x6c 0xc8 0x01 0x58 0x76 LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5156 0x60 0xef 0x52 0x5a 0x0b 0x2c 0x73 0xec 0x48 0x3c 0x58 0x76 LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5168 0x08 0x0a 0x83 0x84 0x8b 0x29 0x43 0x6d 0x01 0xd0 0x78 0x76 MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5180 0x6a 0x12 0xb0 0x27 0x33 0x6e 0x85 0x10 0x78 0xba VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5190 0x15 0x41 0x30 0x2b 0x33 0x6f 0x04 0xd0 0x78 0xba VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5200 0x8c 0x4d 0xba 0xf2 0xfe 0x24 LSHL r17, r17, r6; ADD.NC lc, r18, #-2
+ 5206 0x94 0x4d 0xb1 0x11 0x41 0xe4 LSHL r17, r18, r6; MOV dj0, r17
+ 5212 0x19 0x01 0x30 0x10 0x4b 0x0e 0x63 0x6c 0x04 0xd0 0x78 0x76 VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19
+ 5224 0x0b 0x81 0x67 0x03 0x20 0xe4 0x14 0x30 0x3d 0x4a MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2
+ 5234 0x1d 0x21 0x34 0x5b 0x0b 0x02 0x44 0x50 0x72 0xba VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17
+ 5244 0x03 0x31 0x33 0x93 0x01 0xd4 VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 5250 0x15 0x41 0x30 0x04 0x11 0x80 0x3d 0x62 VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5258 0x03 0x50 0x95 0x98 VLDA.2D bmll1, [p3], d2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5262 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5264 0x00 0x00 0x00 0x8f 0x4c 0x02 0x10 0x28 0x3d 0x5a MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5274 0x19 0x01 0x37 0x10 0x01 0xd4 VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5280 0x08 0x06 0x80 0x00 0x24 0x84 0x8b 0x00 0x44 0x08 0x82 0x00 0x78 0xa1 0x81 0xeb MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5296 0x1d 0x21 0x30 0x00 0x21 0x5a 0x0b 0x00 0x00 0x05 0xba 0x90 0x10 0x90 0x61 0xeb VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5312 0x00 0x19 0x89 0x98 VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5316 0x6a 0x12 0xb0 0x00 0x20 0x00 0xad 0x8e 0x11 0x80 0x3d 0x66 VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5328 0x15 0x41 0x30 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5344 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5360 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x81 0x41 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5376 0x19 0x01 0x30 0x00 0x24 0x31 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5392 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0xa1 0x81 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5408 0x1d 0x21 0x30 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x90 0x61 0xeb VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2
+.loop_nesting 0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5424 0xec 0x07 0x50 0x00 0x00 0x0c 0xaf 0xc0 0x10 0xba LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5434 0x03 0x31 0x32 0x15 0x72 0xe2 0x11 0x80 0x3d 0x4a VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5444 0x18 0x41 0x72 0xf8 VBCST.16 x0, r16
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5448 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5450 0x10 0x28 0x3d 0x48 VADD.f dm0, dm1, dm2, r2
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5454 0x0c 0x31 0x06 0x98 VST.2D bmll2, [p4], d1
+ 5458 0x00 0x00 NOPX
+ 5460 0x00 0x02 0x5f 0xf9 0x12 0x0c 0x3d 0x62 ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2
+ 5468 0x11 0x40 0x08 0x98 NE r0, r5, r0
+ 5472 0x00 0x0c 0x70 0x40 0x01 0x84 JNZ r0, #6368
+.delay_slot
+.swstall delay_slot
+ 5478 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5480 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5482 0x00 0x00 NOPX
+.delay_slot
+ 5484 0x0c 0x31 0x06 0x98 VST.2D bmll2, [p4], d1
+.delay_slot
+.swstall delay_slot
+ 5488 0x00 0x00 NOPX
+ 5490 0x46 0x9a 0xd0 0x14 0x1a 0x2c LDA r6, [p2, #12]; MOVX r5, #3
+ 5496 0x00 0x00 NOPX
+ 5498 0x00 0x00 NOPX
+ 5500 0x00 0x00 NOPX
+ 5502 0x00 0x00 NOPX
+ 5504 0x00 0x00 NOPX
+ 5506 0x00 0x00 NOPX
+ 5508 0x11 0x4e 0x69 0x98 GE r7, r5, r6
+ 5512 0x38 0x0e 0x40 0x40 0x01 0x84 JNZ r7, #7296
+.delay_slot
+ 5518 0x10 0x00 0x11 0x18 MOVX r0, #4
+.delay_slot
+.swstall delay_slot
+ 5522 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5524 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5526 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5528 0x00 0x00 NOPX
+ 5530 0x11 0x8a 0x08 0x98 NE r5, r6, r0
+ 5534 0x28 0x0c 0xb8 0x40 0x01 0x84 JNZ r5, #6512
+.delay_slot
+.swstall delay_slot
+ 5540 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5542 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5544 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5546 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5548 0x00 0x00 NOPX
+ 5550 0x24 0x40 0xa9 0x83 0xc1 0xe4 MOVX r17, #257; MOV dc4, lr
+ 5556 0x00 0x00 0xfa 0xbf 0xfe 0x44 MOVXM r21, #65535
+ 5562 0x00 0x2c 0xf0 0x50 0x02 0x2c NOPA; MOVX r20, #0
+.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 5568 0x08 0x0a 0x82 0x83 0x0b 0x00 0x52 0x08 0x48 0x3c 0x58 0x76 MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60
+ 5580 0x48 0x1a 0x50 0x11 0x02 0x2c LDA.s16 r6, [p2, dj2]; MOVX r4, #32
+ 5586 0x00 0x00 NOPX
+ 5588 0x00 0x00 NOPX
+ 5590 0x00 0x00 NOPX
+ 5592 0x00 0x00 NOPX
+ 5594 0x00 0x00 NOPX
+ 5596 0x00 0x01 0x67 0x98 NOPA
+ 5600 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0c 0x52 0xf4 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV
+.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 5616 0x04 0x8a 0x87 0xfd 0xa5 0x80 0x01 0xf3 0xb2 0x78 0x10 0x76 MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168
+ 5628 0xe0 0xdc 0x57 0xfa 0x65 0x80 0x50 0x08 0x8b 0x39 0x78 0x76 LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5
+ 5640 0x48 0x1a 0xd7 0x84 0x8b 0x3f 0x67 0xe8 0x02 0x49 0x78 0x76 LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2
+ 5652 0x03 0xf8 0x00 0x02 0xd2 0x01 0x02 0x49 0x78 0xba MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2
+ 5662 0x02 0x19 0x00 0x00 0x00 0x04 0x7b 0x40 0x10 0xba MOVA r25, #16; MOVXM ls, #5760
+ 5672 0xff 0x94 0xb0 0x00 0x00 0x05 0xbc 0x60 0x10 0xba VLDA wl2, [sp, #-32]; MOVXM le, #6336
+ 5682 0x10 0x74 0x01 0x18 MOVX r26, #64
+ 5686 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 5688 0x15 0xfa 0x80 0x18 MOVX crRnd, r23
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 5692 0x08 0x02 0xc0 0x02 0xb9 0x80 0x00 0x02 VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5700 0x19 0xa0 0x92 0xf8 VMOV x3, x0
+ 5704 0x02 0xa6 0x92 0xe6 0x10 0x40 0x83 0x62 VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2
+ 5712 0x1b 0x2a 0x92 0xf8 VMOV x6, x5
+ 5716 0x00 0x00 NOPX
+ 5718 0x00 0x00 NOPX
+ 5720 0x00 0x00 NOPX
+ 5722 0x00 0x00 NOPX
+ 5724 0x09 0xc0 0x16 0x18 VCONV.bf16.fp32 wl3, bmll0
+ 5728 0x00 0x00 NOPX
+ 5730 0x10 0x06 0x83 0x48 VMSC.f dm0, dm0, x3, x4, r2
+ 5734 0x00 0x00 NOPX
+ 5736 0x00 0x00 NOPX
+ 5738 0x00 0x00 NOPX
+ 5740 0x00 0x00 NOPX
+ 5742 0x00 0x00 NOPX
+ 5744 0x00 0x2c 0xf0 0x00 0x22 0xc0 0x16 0x00 0x71 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912
+.loop_nesting 1
+.begin_of_loop
+ 5760 0x23 0xbe 0x89 0xa5 0x25 0xf4 VLDB x7, [p1], #64; VMOV bmhh4, x9
+ 5766 0x1b 0xd6 0x92 0xf8 VMOV bmhh3, x11
+ 5770 0x1f 0x1e 0xc0 0xf8 MOV r28, p7
+ 5774 0x17 0x3b 0x84 0x98 AND r29, r28, r24
+ 5778 0xee 0xc9 0x5e 0x3d 0xe0 0x24 LT r27, r29, r4; ADD.NC r28, r29, #-32
+ 5784 0x15 0xbd 0xdd 0x98 LSHL r30, r22, r29
+ 5788 0x16 0xbf 0xd1 0x98 SUB r31, r26, r29
+ 5792 0x2f 0xbc 0x48 0x70 0xcd 0xa4 SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25
+ 5798 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8
+ 5802 0x1c 0x4e 0x22 0xf8 VMOV wl8, wh7
+ 5806 0x1d 0x4f 0x22 0xf8 VMOV wl10, wl7
+ 5810 0x1c 0x90 0x92 0xf8 VMOV bmhl4, x8
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5814 0x1b 0x94 0x92 0xf8 VMOV bmhl3, x10
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5818 0x02 0x12 0x8a 0xe6 0x13 0x28 0x3d 0x62 VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5826 0x19 0x0e 0x8a 0xf8 VMOV cml1, cmh3
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5830 0x04 0x4e 0x22 0xe6 0x12 0x50 0x3d 0x62 VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5838 0x1a 0x0e 0x92 0xf8 VMOV bmll2, x7
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5842 0x1c 0xc0 0x66 0xd8 VSHIFT x9, x8, x0, r25
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5846 0x01 0x10 0x92 0xe6 0x14 0x30 0x3d 0x62 VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5854 0x1c 0x12 0x92 0xf8 VMOV bmll4, x9
+ 5858 0x1c 0x2c 0x12 0xf8 VMOV x8, bmll3
+ 5862 0x1c 0xd1 0x22 0xf8 VMOV wl9, wl8
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5866 0x1c 0x48 0x66 0xd8 VSHIFT x8, x9, x0, r25
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5870 0x01 0x10 0x92 0xe6 0x11 0x64 0x3d 0x62 VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5878 0x1b 0x12 0x92 0xf8 VMOV bmll3, x9
+ 5882 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 5886 0x1d 0x40 0x1e 0xd8 VSHIFT x10, x8, x0, r7
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5890 0x04 0x30 0x12 0xe6 0x12 0x4c 0x3d 0x62 VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 5898 0x1b 0x14 0x92 0xf8 VMOV bmll3, x10
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5902 0x04 0x40 0x1e 0xc6 0x13 0x8c 0x3d 0x62 VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5910 0x1b 0x10 0x92 0xf8 VMOV bmll3, x8
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 5914 0x1c 0x24 0x12 0xf8 VMOV x8, bmll1
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 5918 0x04 0x40 0x1e 0xc6 0x11 0x30 0x3d 0x62 VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5926 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 5930 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 5934 0x04 0x40 0x02 0xc6 0x12 0x50 0x3d 0x62 VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5942 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8
+ 5946 0x1c 0x2c 0x12 0xf8 VMOV x8, bmll3
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 5950 0x1c 0x40 0x02 0xd8 VSHIFT x8, x8, x0, r0
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5954 0x05 0x24 0x12 0xe6 0x13 0x70 0x3d 0x62 VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2
+.aggressive_scheduled_block_id 10
+.nohwbrkpt
+.noswbrkpt
+ 5962 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5966 0x05 0x50 0x02 0xc6 0x10 0x30 0x3d 0x62 VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5974 0x1c 0x14 0x92 0xf8 VMOV bmll4, x10
+ 5978 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2
+ 5982 0x1d 0xe2 0x01 0xb8 VEXTRACT.32 r23, x8, #0, vaddSign0
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 5986 0x1d 0x2c 0x12 0xf8 VMOV x10, bmll3
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 5990 0xe2 0xd0 0x83 0x54 0x03 0x74 VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 5996 0x1d 0xa0 0x12 0xf8 VMOV x11, bmll0
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6000 0xe0 0xd4 0x8a 0xb4 0x06 0xb4 VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6006 0x1c 0xd4 0xa0 0x38 VSEL.32 x9, x10, x9, r20
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6010 0x1d 0x10 0xd1 0x78 VINSERT.32 x10, x2, #0, r6
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6014 0x1c 0x12 0xf1 0x78 VINSERT.32 x8, x2, #0, r23
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6018 0x1d 0xd3 0x22 0xf8 VMOV wl11, wl9
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6022 0x1d 0x93 0x22 0xf8 VMOV wh11, wl9
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6026 0x1c 0x15 0x22 0xf8 VMOV wh8, wl10
+ 6030 0x1c 0x5c 0x00 0x38 VSEL.32 x8, x11, x8, r16
+ 6034 0x1c 0x0c 0x08 0x38 VSEL.32 x8, x1, x8, r17
+ 6038 0x1b 0xc3 0xa8 0x38 VSEL.32 x7, x8, x7, r21
+ 6042 0x18 0x0e 0x92 0xf8 VMOV bmll0, x7
+ 6046 0x1c 0xac 0x92 0xf8 VMOV x9, x6
+ 6050 0x68 0x02 0xc0 0x01 0x07 0x49 0x70 0x02 VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7
+ 6058 0x1c 0x32 0x92 0xf8 VMOV x8, x9
+ 6062 0x05 0xbb 0xcd 0xed 0xea 0x0f 0x12 0x4c 0x83 0x5a LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id first
+ 6072 0x00 0x0b 0x3e 0x91 0x11 0xec 0xa1 0x62 SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2
+.aggressive_scheduled_block_id 12
+.noswbrkpt
+ 6080 0x05 0xa5 0xe2 0x33 0x09 0x2f 0x10 0xec 0x61 0x5a SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6090 0x13 0xec 0x01 0x48 VMUL.f dm3, x6, x0, r2
+ 6094 0x00 0x00 NOPX
+ 6096 0x00 0x00 NOPX
+ 6098 0x0c 0xc1 0x16 0x18 VCONV.bf16.fp32 wl9, bmll2
+ 6102 0x00 0x00 NOPX
+ 6104 0x12 0x52 0x83 0x48 VMSC.f dm2, dm2, x9, x4, r2
+ 6108 0x00 0x00 NOPX
+ 6110 0x00 0x00 NOPX
+ 6112 0x00 0x00 NOPX
+ 6114 0x00 0x00 NOPX
+ 6116 0x00 0x00 NOPX
+ 6118 0x0c 0x41 0x16 0x18 VCONV.bf16.fp32 wl8, bmll2
+ 6122 0x00 0x00 NOPX
+ 6124 0x14 0xf0 0xa1 0x48 VMUL.f dm4, x8, x5, r2
+ 6128 0x12 0xf0 0x61 0x48 VMUL.f dm2, x8, x3, r2
+ 6132 0x00 0x00 NOPX
+ 6134 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id first
+ 6136 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 13
+.noswbrkpt
+ 6138 0x12 0xf2 0xa1 0x48 VMUL.f dm2, x9, x5, r2
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6142 0x19 0x70 0x12 0xf8 VMOV lfl0, bmll4
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6146 0x14 0x88 0x3d 0x48 VADD.f dm4, dm4, dm2, r2
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6150 0x1c 0x05 0x92 0xf8 VMOV bmll4, lfl0
+ 6154 0x00 0x00 NOPX
+ 6156 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id first
+ 6158 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 14
+.noswbrkpt
+ 6160 0x12 0xe1 0x01 0x48 VMUL.f dm2, x0, x8, r2
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6164 0x18 0x70 0x12 0xf8 VMOV lfh0, bmll4
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6168 0x14 0x88 0x3d 0x48 VADD.f dm4, dm4, dm2, r2
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6172 0x1c 0x01 0x92 0xf8 VMOV bmll4, lfh0
+ 6176 0x00 0x00 NOPX
+ 6178 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id first
+ 6180 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 15
+.noswbrkpt
+ 6182 0x11 0xf2 0x61 0x48 VMUL.f dm1, x9, x3, r2
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6186 0x19 0x70 0x12 0xf8 VMOV lfl0, bmll4
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6190 0x14 0x84 0x3d 0x48 VADD.f dm4, dm4, dm1, r2
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6194 0x1c 0x05 0x92 0xf8 VMOV bmll4, lfl0
+ 6198 0x00 0x00 NOPX
+ 6200 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id first
+ 6202 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 16
+.noswbrkpt
+ 6204 0x11 0xf2 0x01 0x48 VMUL.f dm1, x9, x0, r2
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6208 0x18 0x70 0x12 0xf8 VMOV lfh0, bmll4
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6212 0x14 0x84 0x3d 0x48 VADD.f dm4, dm4, dm1, r2
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6216 0x1c 0x01 0x92 0xf8 VMOV bmll4, lfh0
+ 6220 0x00 0x00 NOPX
+ 6222 0x00 0x00 NOPX
+ 6224 0x00 0x00 NOPX
+ 6226 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id first
+ 6228 0x1d 0x70 0x12 0xf8 VMOV lfl1, bmll4
+.aggressive_scheduled_block_id 17
+.noswbrkpt
+ 6232 0x12 0x88 0x3d 0x48 VADD.f dm2, dm4, dm2, r2
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6236 0x1c 0x15 0x92 0xf8 VMOV bmll4, lfl1
+ 6240 0x00 0x00 NOPX
+ 6242 0x00 0x00 NOPX
+ 6244 0x00 0x00 NOPX
+ 6246 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id first
+ 6248 0x1c 0x68 0x12 0xf8 VMOV lfh1, bmll2
+.aggressive_scheduled_block_id 18
+.noswbrkpt
+ 6252 0x12 0x44 0x3d 0x48 VADD.f dm2, dm2, dm1, r2
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6256 0x1a 0x11 0x92 0xf8 VMOV bmll2, lfh1
+ 6260 0x00 0x00 NOPX
+ 6262 0x00 0x00 NOPX
+ 6264 0x00 0x00 NOPX
+ 6266 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id first
+ 6268 0x1d 0x68 0x12 0xf8 VMOV lfl1, bmll2
+.aggressive_scheduled_block_id 19
+.noswbrkpt
+ 6272 0x10 0x20 0x3d 0x48 VADD.f dm0, dm1, dm0, r2
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6276 0x19 0x15 0x92 0xf8 VMOV bmll1, lfl1
+ 6280 0x00 0x00 NOPX
+ 6282 0x00 0x00 NOPX
+ 6284 0x00 0x00 NOPX
+ 6286 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id first
+ 6288 0x1c 0x60 0x12 0xf8 VMOV lfh1, bmll0
+.aggressive_scheduled_block_id 20
+.noswbrkpt
+ 6292 0x10 0x0c 0x3d 0x48 VADD.f dm0, dm0, dm3, r2
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6296 0x18 0x11 0x92 0xf8 VMOV bmll0, lfh1
+ 6300 0x00 0x00 NOPX
+ 6302 0x00 0x00 NOPX
+ 6304 0x00 0x00 NOPX
+ 6306 0x00 0x00 NOPX
+ 6308 0x0d 0xc0 0x16 0x18 VCONV.bf16.fp32 wl11, bmll0
+ 6312 0x00 0x00 NOPX
+ 6314 0x1d 0x85 0xfe 0xd8 VSHIFT x11, x0, x11, r31
+ 6318 0x1d 0xd5 0xcc 0x38 VSEL.8 x11, x10, x11, r19:r18
+ 6322 0x00 0x00 NOPX
+ 6324 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x8b 0x65 0x41 0x36 NOPA; NOPB; VST wh11, [p7, #32]; NOPX
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488
+.end_of_loop
+ 6336 0x00 0x2c 0xf0 0x00 0x27 0x8a 0xea 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV
+.loop_nesting 0
+ 6352 0x00 0x0c 0x78 0x00 0x00 0x84 J #6384
+.delay_slot
+.swstall delay_slot
+ 6358 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6360 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6362 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6364 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6366 0x00 0x00 NOPX
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520
+ 6368 0xff 0xb4 0xb0 0xb4 0x80 0x5c ST dn3, [sp, #-4]; MOVX vaddSign0, #1
+ 6374 0x00 0x2c 0xf7 0xf8 0x3d 0x80 0x00 0x00 0x00 0x7a NOPA; ST lr, [sp, #-8]; NOPX
+.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 6384 0x1f 0x61 0x91 0x18 ADD.NC p7, r3, #34
+ 6388 0xe0 0x8f 0x5b 0x64 0xc1 0xd4 LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id first
+ 6394 0x18 0x7b 0x60 0xf8 MOV crSCDEn, crMCDEn
+.aggressive_scheduled_block_id 21
+.noswbrkpt
+ 6398 0x07 0x04 0x77 0x18 ST.s16 r3, [p7]
+.aggressive_scheduled_block_id 21
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6402 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6408 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6410 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6412 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6414 0x10 0xc6 0x07 0x18 ADD r3, r3, #1
+.delay_slot
+ 6418 0x00 0x2c 0xf0 0x00 0x20 0xc0 0xb0 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM
+.return_address
+ 6432 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8]
+ 6436 0x07 0xfc 0x99 0x18 LDA p1, [sp, #-4]
+ 6440 0x07 0x54 0x77 0x18 ST.s16 r3, [p7, #10]
+ 6444 0xff 0xe0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-256
+ 6450 0x00 0x00 NOPX
+ 6452 0x00 0x00 NOPX
+ 6454 0x00 0x00 NOPX
+ 6456 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 6460 0x1f 0x62 0xc0 0xf8 MOV p7, p1
+.delay_slot
+.swstall delay_slot
+ 6464 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6468 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6470 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM
+.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 6480 0x1f 0xf4 0x00 0x00 0x02 0xb8 0x00 0x00 0x20 0xba MOVA r20, #255; J #5568
+.delay_slot
+ 6490 0x10 0x2a 0x01 0x18 MOVX r21, #0
+.delay_slot
+.swstall delay_slot
+ 6494 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6496 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6498 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6500 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664
+ 6512 0x10 0x0a 0x15 0x18 MOVX r5, #5
+ 6516 0x11 0x4a 0x67 0x98 EQ r5, r5, r6
+ 6520 0x28 0x0e 0x30 0x40 0x01 0x84 JNZ r5, #7264
+.delay_slot
+.swstall delay_slot
+ 6526 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6528 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6530 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6532 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6534 0x00 0x00 NOPX
+ 6536 0x10 0x0e 0x19 0x18 MOVX r7, #6
+ 6540 0x11 0xce 0x67 0x98 EQ r7, r7, r6
+ 6544 0x38 0x0e 0xa8 0x40 0x01 0x84 JNZ r7, #7504
+.delay_slot
+ 6550 0x10 0x0a 0x41 0x18 MOVX r5, #16
+.delay_slot
+.swstall delay_slot
+ 6554 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6556 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6558 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6560 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 6576 0x48 0x1e 0x50 0x01 0x10 0xea 0x60 0xf0 0x78 0xba LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr
+ 6586 0x89 0x8d 0x18 0xa4 0x05 0x64 NE r6, r17, r6; MOV r17, #257
+ 6592 0x30 0x0e 0x20 0x40 0x01 0x84 JNZ r6, #7232
+.delay_slot
+.swstall delay_slot
+ 6598 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6600 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6602 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6604 0x00 0x00 NOPX
+.delay_slot
+ 6606 0x11 0xca 0x5e 0x98 ASHL r5, r7, r5
+ 6610 0x04 0x8a 0x80 0x84 0x8b 0x00 0x00 0x04 0x7d 0x08 0x10 0x76 MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672
+ 6622 0x48 0x1e 0xd7 0xfd 0xa5 0x80 0x00 0x05 0xbd 0x38 0x10 0x76 LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768
+ 6634 0x00 0x1d 0x15 0x98 VLDA bmll2, [p0], #64
+ 6638 0x00 0x00 NOPX
+ 6640 0x1c 0xc2 0x92 0xf8 VMOV bmhh4, x1
+ 6644 0x00 0x00 NOPX
+ 6646 0x03 0x13 0x12 0xe6 0x11 0x68 0x3d 0x62 VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2
+ 6654 0x00 0x00 NOPX
+ 6656 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb9 0xff 0xc8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824
+.loop_nesting 1
+.begin_of_loop
+ 6672 0x03 0xa2 0xb0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV
+ 6688 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 6704 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 6720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id first
+ 6736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0x62 0x09 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV
+.aggressive_scheduled_block_id 22
+.noswbrkpt
+ 6752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x8b 0x41 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920
+.end_of_loop
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x89 0x89 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV
+.loop_nesting 0
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id first
+ 6784 0x02 0x10 0x00 0x00 0x01 0xf3 0xb2 0x78 0x10 0xba MOVA r16, #16; MOVXM p7, #509168
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6794 0xe0 0x90 0x50 0x00 0x61 0x08 0x98 0x01 0x58 0xba LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6804 0x10 0x22 0x05 0x18 MOVX r17, #1
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6808 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6810 0x1c 0xc4 0x12 0xf8 VMOV bmhh4, bmll1
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6814 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6816 0x19 0x33 0x12 0xf8 VMOV x2, bmhh4
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6820 0x19 0x10 0x12 0xd8 VSHIFT x2, x2, x0, r4
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6824 0x01 0x3a 0x80 0x00 0x49 0x2f 0x10 0x40 0x3d 0x5a MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6834 0x1a 0x13 0x12 0xf8 VMOV bmll2, bmhh4
+ 6838 0x19 0x16 0x72 0xf8 VBCST.32 x2, r5
+ 6842 0x19 0x04 0x92 0xf8 VMOV bmll1, x2
+ 6846 0x00 0x00 NOPX
+ 6848 0x1a 0x04 0x12 0xf8 VMOV bmll2, bmll1
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id first
+ 6852 0x19 0x20 0x12 0xf8 VMOV x2, bmll0
+.aggressive_scheduled_block_id 24
+.noswbrkpt
+ 6856 0x01 0x10 0x42 0xc6 0x10 0x0c 0x3d 0x62 VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6864 0x1b 0x04 0x92 0xf8 VMOV bmll3, x2
+ 6868 0x19 0x20 0x92 0xf8 VMOV x2, x0
+ 6872 0x00 0x00 NOPX
+ 6874 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1
+ 6878 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id first
+ 6880 0x01 0xa0 0x12 0xe6 0x14 0x40 0x83 0x62 VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2
+.aggressive_scheduled_block_id 25
+.noswbrkpt
+ 6888 0x01 0x98 0x1a 0xc6 0x10 0x08 0x3d 0x62 VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6896 0x1a 0x06 0x92 0xf8 VMOV bmll2, x3
+ 6900 0x19 0xa4 0x92 0xf8 VMOV x3, x2
+ 6904 0x00 0x00 NOPX
+ 6906 0x00 0x00 NOPX
+ 6908 0x09 0x42 0x16 0x18 VCONV.bf16.fp32 wl2, bmll4
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id first
+ 6912 0x1a 0xa0 0x12 0xf8 VMOV x5, bmll0
+.aggressive_scheduled_block_id 26
+.noswbrkpt
+ 6916 0x03 0x28 0x02 0xc6 0x10 0x08 0x3d 0x62 VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6924 0x1a 0x0c 0x92 0xf8 VMOV bmll2, x6
+ 6928 0x1a 0xa6 0x92 0xf8 VMOV x5, x3
+ 6932 0x00 0x00 NOPX
+ 6934 0x00 0x00 NOPX
+ 6936 0x00 0x00 NOPX
+ 6938 0x1b 0x20 0x12 0xf8 VMOV x6, bmll0
+ 6942 0x18 0x1a 0x01 0xb8 VEXTRACT.32 r0, x6, #0, vaddSign0
+ 6946 0x00 0x00 NOPX
+ 6948 0x1b 0x00 0x11 0x78 VINSERT.32 x6, x0, #0, r0
+ 6952 0x18 0x8b 0x08 0x38 VSEL.32 x1, x1, x6, r17
+ 6956 0x1a 0x02 0x92 0xf8 VMOV bmll2, x1
+ 6960 0x18 0xaa 0x92 0xf8 VMOV x1, x5
+ 6964 0x58 0x22 0xc0 0x01 0x91 0x49 0x70 0x02 VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1
+ 6972 0x00 0x00 NOPX
+ 6974 0x11 0x4a 0x83 0x48 VMSC.f dm1, dm2, x5, x4, r2
+ 6978 0x10 0xea 0x41 0x48 VMUL.f dm0, x5, x2, r2
+ 6982 0x00 0x00 NOPX
+ 6984 0x00 0x00 NOPX
+ 6986 0x00 0x00 NOPX
+ 6988 0x00 0x00 NOPX
+ 6990 0x08 0xc0 0x96 0x18 VCONV.bf16.fp32 wl1, bmll1
+ 6994 0x14 0x84 0x83 0x48 VMSC.f dm4, dm4, x2, x4, r2
+ 6998 0x13 0x22 0x83 0x48 VMSC.f dm3, dm1, x1, x4, r2
+ 7002 0x00 0x00 NOPX
+ 7004 0x00 0x00 NOPX
+ 7006 0x00 0x00 NOPX
+ 7008 0x00 0x00 NOPX
+ 7010 0x09 0xc2 0x16 0x18 VCONV.bf16.fp32 wl3, bmll4
+ 7014 0x0b 0x41 0x96 0x18 VCONV.bf16.fp32 wl6, bmll3
+ 7018 0x00 0x00 NOPX
+ 7020 0x12 0xec 0x61 0x48 VMUL.f dm2, x6, x3, r2
+ 7024 0x13 0xec 0x41 0x48 VMUL.f dm3, x6, x2, r2
+ 7028 0x00 0x00 NOPX
+ 7030 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id first
+ 7032 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 27
+.noswbrkpt
+ 7034 0x13 0xe2 0x61 0x48 VMUL.f dm3, x1, x3, r2
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7038 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7042 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7046 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0
+ 7050 0x00 0x00 NOPX
+ 7052 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id first
+ 7054 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 28
+.noswbrkpt
+ 7056 0x13 0xea 0x61 0x48 VMUL.f dm3, x5, x3, r2
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7060 0x18 0x68 0x12 0xf8 VMOV lfh0, bmll2
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7064 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7068 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0
+ 7072 0x00 0x00 NOPX
+ 7074 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id first
+ 7076 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 29
+.noswbrkpt
+ 7078 0x13 0xe2 0x41 0x48 VMUL.f dm3, x1, x2, r2
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7082 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7086 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7090 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0
+ 7094 0x00 0x00 NOPX
+ 7096 0x00 0x00 NOPX
+ 7098 0x00 0x00 NOPX
+ 7100 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id first
+ 7102 0x18 0x68 0x12 0xf8 VMOV lfh0, bmll2
+.aggressive_scheduled_block_id 30
+.noswbrkpt
+ 7106 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7110 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0
+ 7114 0x00 0x00 NOPX
+ 7116 0x00 0x00 NOPX
+ 7118 0x13 0xe0 0xc1 0x48 VMUL.f dm3, x0, x6, r2
+ 7122 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id first
+ 7124 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2
+.aggressive_scheduled_block_id 31
+.noswbrkpt
+ 7128 0x13 0x4c 0x3d 0x48 VADD.f dm3, dm2, dm3, r2
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7132 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0
+ 7136 0x00 0x00 NOPX
+ 7138 0x00 0x00 NOPX
+ 7140 0x11 0xe2 0x01 0x48 VMUL.f dm1, x1, x0, r2
+ 7144 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id first
+ 7146 0x18 0x6c 0x12 0xf8 VMOV lfh0, bmll3
+.aggressive_scheduled_block_id 32
+.noswbrkpt
+ 7150 0x11 0x44 0x3d 0x48 VADD.f dm1, dm2, dm1, r2
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7154 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0
+ 7158 0x00 0x00 NOPX
+ 7160 0x00 0x00 NOPX
+ 7162 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id first
+ 7164 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 33
+.noswbrkpt
+ 7166 0x00 0x24 0x12 0xe6 0x10 0x40 0x3d 0x62 VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7174 0x02 0x00 0x92 0xe6 0x14 0xea 0x01 0x62 VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2
+ 7182 0x00 0x00 NOPX
+ 7184 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id first
+ 7186 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 34
+.noswbrkpt
+ 7188 0x10 0x50 0x3d 0x48 VADD.f dm0, dm2, dm4, r2
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 0x1a 0x00 0x12 0xf8 VMOV bmll2, bmll0
+ 7196 0x00 0x00 NOPX
+ 7198 0x00 0x0c 0x78 0x00 0x00 0x84 J #6384
+.delay_slot
+ 7204 0x0f 0xfa 0x65 0x98 ST dc4, [sp, #-8]
+.delay_slot
+.swstall delay_slot
+ 7208 0x00 0x00 NOPX
+.delay_slot
+ 7210 0x1a 0x00 0x12 0xf8 VMOV bmll2, bmll0
+.delay_slot
+.swstall delay_slot
+ 7214 0x00 0x00 NOPX
+.delay_slot
+ 7216 0x00 0x2c 0xf0 0x00 0x21 0x05 0x12 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384
+ 7232 0x00 0x14 0x00 0x00 0x02 0xbe 0x00 0x00 0x20 0xba MOVA r20, #0; J #5616
+.delay_slot
+ 7242 0x10 0x2a 0x01 0x18 MOVX r21, #0
+.delay_slot
+.swstall delay_slot
+ 7246 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7248 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7250 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7252 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416
+ 7264 0x00 0x0e 0x90 0x00 0x00 0x84 J #7456
+.delay_slot
+ 7270 0xff 0x93 0xb0 0x02 0x60 0xf0 0x70 0x02 ST p1, [sp, #-4]; MOV dc4, lr
+.delay_slot
+.swstall delay_slot
+ 7278 0x00 0x00 NOPX
+.delay_slot
+ 7280 0x0f 0xf0 0x33 0x18 VST x0, [sp, #-256]
+.delay_slot
+ 7284 0x0f 0xf5 0x33 0x18 VST x4, [sp, #-192]
+.delay_slot
+ 7288 0xff 0x0e 0x60 0x00 0x01 0xa5 0x70 0x02 VST x1, [sp, #-128]; NOPM
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448
+ 7296 0x10 0x22 0x05 0x18 MOVX r17, #1
+ 7300 0x14 0x62 0x67 0x98 EQ r17, r17, r6
+ 7304 0x88 0x0e 0x90 0x40 0x01 0x84 JNZ r17, #7456
+.delay_slot
+.swstall delay_slot
+ 7310 0x00 0x00 NOPX
+.delay_slot
+ 7312 0x0f 0xf0 0x33 0x18 VST x0, [sp, #-256]
+.delay_slot
+ 7316 0x0f 0xf5 0x33 0x18 VST x4, [sp, #-192]
+.delay_slot
+ 7320 0x0f 0xf8 0x73 0x18 VST x1, [sp, #-128]
+.delay_slot
+ 7324 0xff 0x93 0xb0 0x00 0x70 0x4a 0x60 0xf0 0x79 0x3a ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr
+ 7334 0x11 0xce 0x67 0x98 EQ r7, r7, r6
+ 7338 0x38 0x0e 0x80 0x40 0x01 0x84 JNZ r7, #7424
+.delay_slot
+.swstall delay_slot
+ 7344 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7346 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7348 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7350 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7352 0x00 0x00 NOPX
+ 7354 0x11 0x4e 0x67 0x98 EQ r7, r5, r6
+ 7358 0x38 0x0e 0x70 0x40 0x01 0x84 JNZ r7, #7392
+.delay_slot
+ 7364 0x10 0x0a 0x41 0x18 MOVX r5, #16
+.delay_slot
+.swstall delay_slot
+ 7368 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7370 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7372 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7374 0x00 0x00 NOPX
+ 7376 0x00 0x0c 0xd8 0x00 0x00 0x84 J #6576
+.delay_slot
+.swstall delay_slot
+ 7382 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7384 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7386 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7388 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7390 0x00 0x00 NOPX
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544
+ 7392 0x20 0x31 0x00 0x00 0x02 0xb8 0x00 0x00 0x20 0xba MOVA r17, #257; J #5568
+.delay_slot
+ 7402 0x05 0x40 0x28 0x00 0x41 0x64 MOVX r21, #0; MOV m4, #16
+.delay_slot
+ 7408 0x10 0x28 0x01 0x18 MOVX r20, #0
+.delay_slot
+.swstall delay_slot
+ 7412 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7414 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7416 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576
+ 7424 0x00 0x0c 0xa8 0x00 0x00 0x84 J #6480
+.delay_slot
+ 7430 0x00 0x00 0xf8 0xbf 0xfe 0x44 MOVXM r17, #65535
+.delay_slot
+ 7436 0x1c 0x00 0x20 0xb8 MOV m4, #16
+.delay_slot
+.swstall delay_slot
+ 7440 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7442 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7444 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 7456 0xfe 0x07 0x70 0x00 0x02 0xb8 0x00 0x00 0x20 0xba VLDA x0, [sp, #-256]; J #5568
+.delay_slot
+ 7466 0xfe 0xa7 0x70 0x00 0x00 0x8a 0x88 0x00 0x58 0xba VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0
+.delay_slot
+ 7476 0xff 0x93 0x20 0x00 0x00 0x3e 0x0f 0xff 0x90 0xba LDA p1, [sp, #-4]; MOVXM r16, #65535
+.delay_slot
+ 7486 0x05 0x40 0x28 0x00 0x81 0x64 MOVX r21, #0; MOV m4, #32
+.delay_slot
+ 7492 0x11 0x22 0x05 0x18 MOVX r17, #257
+.delay_slot
+ 7496 0xff 0x0f 0x70 0x04 0x00 0x00 0x1c 0x22 VLDA x1, [sp, #-128]; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656
+ 7504 0x00 0x0c 0xa8 0x00 0x00 0x84 J #6480
+.delay_slot
+ 7510 0x1c 0xc1 0xe0 0xf8 MOV dc4, lr
+.delay_slot
+ 7514 0x00 0x00 0xf8 0xbf 0xfe 0x44 MOVXM r17, #65535
+.delay_slot
+ 7520 0x1c 0x00 0x20 0xb8 MOV m4, #16
+.delay_slot
+.swstall delay_slot
+ 7524 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7526 0x00 0x00 NOPX
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0
+
+.text_segment PM 7536
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 7536 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7542 0xfd 0xf3 0xb0 0x00 0x01 0xf3 0xb2 0x60 0x11 0x3a ST p7, [sp, #-20]; MOVXM p7, #509120
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7552 0xe0 0xc2 0xd7 0xe7 0x1d 0x82 0x0d 0x70 0x72 0xba LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7562 0xff 0x2e 0xb0 0x21 0x04 0x81 0x68 0xf0 0x79 0x3a ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7572 0xfe 0x3e 0xb8 0x47 0xf6 0x5c ST r15, [sp, #-16]; ADD r17, r16, #-2
+ 7578 0x0f 0xe9 0xb5 0x98 ST r13, [sp, #-24]
+ 7582 0x00 0x00 NOPX
+ 7584 0x00 0x00 NOPX
+ 7586 0x00 0x00 NOPX
+ 7588 0x80 0x0f 0xf0 0x40 0x01 0x84 JNZ r16, #8160
+.delay_slot
+ 7594 0x0f 0xfd 0x95 0x98 ST r12, [sp, #-4]
+.delay_slot
+ 7598 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12]
+.delay_slot
+ 7602 0x0f 0xe0 0x1d 0x98 ST p0, [sp, #-32]
+.delay_slot
+ 7606 0x00 0x07 0xcc 0xc9 0x90 0x44 MOVXM p6, #509128
+.delay_slot
+ 7612 0x0e 0x06 0x31 0x98 ST r17, [p6]
+ 7616 0x00 0x31 0x07 0x88 0x8b 0x00 0x01 0xf1 0x32 0x76 0x10 0x76 MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7628 0x40 0xc6 0x30 0x00 0x01 0xf1 0x32 0x78 0x11 0x3a ST r17, [p2]; MOVXM p2, #509168
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7638 0x40 0xc0 0xec 0xc5 0x81 0xd4 ST.s8 r16, [p2]; MOV p6, p1
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 7644 0x00 0x05 0x08 0x00 0x01 0x04 JL #2576
+.delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7650 0x00 0x07 0xc0 0xc8 0x80 0x44 MOVXM p0, #508992
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7656 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7658 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7660 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+.swstall delay_slot
+ 7664 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.return_address
+ 7680 0x00 0x11 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r17, #0; MOVXM p2, #508992
+ 7690 0x40 0xba 0xd0 0x00 0x01 0xf1 0x32 0x64 0x10 0xba LDA r14, [p2]; MOVXM p2, #509128
+ 7700 0x40 0xca 0xd0 0x00 0x01 0xf1 0x32 0x22 0x10 0xba LDA r18, [p2]; MOVXM p2, #508996
+ 7710 0x43 0xb6 0xd0 0x00 0x01 0xf1 0xb2 0x68 0x10 0xba LDA r13, [p2], #4; MOVXM p3, #509136
+ 7720 0x42 0x85 0xd0 0x00 0x01 0xf0 0xb2 0x66 0x10 0xba LDA el0, [p2, #4]; MOVXM p1, #509132
+ 7730 0x40 0xbe 0xd8 0x39 0x81 0xd4 LDA r15, [p2]; MOV r16, p6
+ 7736 0x1a 0x68 0x14 0x18 ADD.NC p2, r16, #40
+ 7740 0x00 0x07 0xcc 0xca 0x00 0x44 MOVXM p6, #509184
+ 7746 0x00 0x07 0xc0 0xc9 0xd0 0x44 MOVXM p0, #509160
+ 7752 0x13 0xa5 0x2f 0x98 MUL r18, r14, r18
+ 7756 0x80 0x00 0x08 0x20 0x00 0x44 MOVXM r16, #-2147483648
+ 7762 0x60 0x85 0x36 0xca 0x5f 0x5c ST el0, [p3]; MUL r18, r13, r18
+ 7768 0x00 0x00 NOPX
+ 7770 0x13 0xe5 0x2f 0x98 MUL r18, r15, r18
+ 7774 0x00 0x00 NOPX
+ 7776 0x09 0x06 0x51 0x98 ST r18, [p1]
+ 7780 0x02 0x4c 0x2e 0x98 LDA el0, [p2], #16
+ 7784 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7788 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7792 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7796 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7800 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7804 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7808 0x08 0x04 0x29 0x98 ST el0, [p0]
+ 7812 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7816 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7820 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7824 0x02 0xdc 0x36 0x98 LDA r1, [p2], #-12
+ 7828 0x00 0x00 NOPX
+ 7830 0x00 0x00 NOPX
+ 7832 0x00 0x00 NOPX
+ 7834 0x00 0x00 NOPX
+ 7836 0x00 0x00 NOPX
+ 7838 0x00 0x00 NOPX
+ 7840 0x10 0x63 0x0b 0x98 GEU r17, r1, r16
+ 7844 0x88 0x0f 0x78 0x40 0x01 0x84 JNZ r17, #7920
+.delay_slot
+ 7850 0x1b 0x1e 0xc0 0xf8 MOV r12, p7
+.delay_slot
+ 7854 0x0f 0xd9 0x1d 0x98 ST p2, [sp, #-40]
+.delay_slot
+.swstall delay_slot
+ 7858 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7860 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7862 0x00 0x00 NOPX
+.no_stack_arguments
+ 7864 0x00 0x15 0x50 0x00 0x01 0x04 JL #10912
+.delay_slot
+ 7870 0x0f 0xdd 0x95 0x98 ST r12, [sp, #-36]
+.delay_slot
+.swstall delay_slot
+ 7874 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7876 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7878 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7880 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV
+.return_address
+ 7888 0x00 0x0f 0x98 0x00 0x00 0x84 J #7984
+.delay_slot
+ 7894 0x00 0x07 0xce 0xc9 0xe0 0x44 MOVXM p7, #509168
+.delay_slot
+.swstall delay_slot
+ 7900 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7902 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7904 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7906 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384
+.no_stack_arguments
+ 7920 0x00 0x15 0x50 0x00 0x01 0x04 JL #10912
+.delay_slot
+.swstall delay_slot
+ 7926 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7928 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7930 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7932 0x00 0x01 0x67 0x98 NOPA
+.delay_slot
+ 7936 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x02 0x18 0x0c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV
+.return_address
+.no_stack_arguments
+ 7952 0x00 0x18 0x40 0x00 0x01 0x04 JL #12416
+.delay_slot
+ 7958 0x18 0x50 0x20 0xf8 MOV r1, r0
+.delay_slot
+ 7962 0x00 0x07 0xce 0xc9 0xe0 0x44 MOVXM p7, #509168
+.delay_slot
+ 7968 0x4f 0x00 0x01 0x20 0x00 0x44 MOVXM r2, #1325400064
+.delay_slot
+ 7974 0x0f 0xdd 0x95 0x98 ST r12, [sp, #-36]
+.delay_slot
+.swstall delay_slot
+ 7978 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.return_address
+ 7984 0xe0 0xc0 0x50 0x02 0xd2 0x00 0x00 0x08 0xb8 0xba LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0
+ 7994 0xfb 0x40 0x80 0x01 0x80 0x08 0x00 0x49 0x78 0xba MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0
+ 8004 0xfb 0x23 0x20 0x00 0x01 0xf1 0xb2 0x6a 0x10 0xba LDA p2, [sp, #-40]; MOVXM p3, #509140
+ 8014 0x00 0x07 0xc2 0xc9 0xb0 0x44 MOVXM p1, #509144
+ 8020 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 8026 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 8028 0x06 0x1e 0x17 0x18 ST.s16 r16, [p6], #2
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8032 0x14 0x3a 0x80 0x18 MOVX crRnd, r16
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8036 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8040 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8042 0x1c 0x01 0x01 0xb8 VEXTRACT.16 r16, x0, #0, vaddSign0
+ 8046 0x00 0x00 NOPX
+ 8048 0x00 0x00 NOPX
+ 8050 0x06 0x0b 0x07 0x18 ST.s8 r24, [p6], m0
+ 8054 0x00 0x00 NOPX
+ 8056 0x00 0x00 NOPX
+ 8058 0x00 0x00 NOPX
+ 8060 0x00 0x00 NOPX
+ 8062 0x00 0x00 NOPX
+ 8064 0x00 0x00 NOPX
+ 8066 0x0e 0x1d 0xd1 0x98 ST r14, [p6], #4
+ 8070 0x0e 0x05 0xf1 0x98 ST r15, [p6]
+ 8074 0x0e 0x15 0xb1 0x98 ST r13, [p6, #4]
+ 8078 0x02 0x1c 0x2e 0x98 LDA el0, [p2], #4
+ 8082 0x00 0x00 NOPX
+ 8084 0x00 0x00 NOPX
+ 8086 0x00 0x00 NOPX
+ 8088 0x00 0x00 NOPX
+ 8090 0x00 0x00 NOPX
+ 8092 0x00 0x00 NOPX
+ 8094 0x0b 0x04 0x29 0x98 ST el0, [p3]
+ 8098 0x02 0x04 0x2e 0x98 LDA el0, [p2]
+ 8102 0x00 0x00 NOPX
+ 8104 0x00 0x00 NOPX
+ 8106 0x00 0x00 NOPX
+ 8108 0x00 0x00 NOPX
+ 8110 0x00 0x00 NOPX
+ 8112 0x00 0x00 NOPX
+ 8114 0x09 0x04 0x29 0x98 ST el0, [p1]
+ 8118 0x02 0x14 0x2e 0x98 LDA el0, [p2, #4]
+ 8122 0x00 0x00 NOPX
+ 8124 0x00 0x0f 0xf8 0x00 0x00 0x84 J #8176
+.delay_slot
+ 8130 0x00 0x07 0xc0 0xc9 0xb8 0x44 MOVXM p0, #509148
+.delay_slot
+.swstall delay_slot
+ 8136 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8138 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8140 0x00 0x01 0x67 0x98 NOPA
+.delay_slot
+ 8144 0x00 0x2c 0xf0 0x00 0x20 0x04 0x29 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624
+ 8160 0xfb 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0x68 0x11 0x3a ST p2, [sp, #-36]; MOVXM p7, #509136
+ 8170 0x00 0x2c 0xf6 0x29 0x81 0xd4 NOPA; MOV r12, p2
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640
+ 8176 0xe0 0xc2 0xd0 0x44 0x0a 0x2c LDA r16, [p7]; MOVX r17, #1
+ 8182 0x00 0x00 NOPX
+ 8184 0x00 0x00 NOPX
+ 8186 0x00 0x00 NOPX
+ 8188 0x00 0x00 NOPX
+ 8190 0x00 0x00 NOPX
+ 8192 0x00 0x00 NOPX
+ 8194 0x14 0x63 0x08 0x98 NE r17, r17, r16
+ 8198 0x88 0x10 0x58 0x40 0x01 0x84 JNZ r17, #8368
+.delay_slot
+ 8204 0x1e 0x66 0x06 0x18 ADD.NC p6, r12, #12
+.delay_slot
+.swstall delay_slot
+ 8208 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8210 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8212 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8214 0x00 0x00 NOPX
+ 8216 0x00 0x07 0xc4 0xc9 0x88 0x44 MOVXM p2, #509124
+ 8222 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p2]; MOVXM p2, #509024
+ 8232 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2]
+ 8236 0x00 0x00 NOPX
+ 8238 0x00 0x00 NOPX
+.no_stack_arguments
+ 8240 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+ 8246 0x10 0x1a 0x01 0x18 MOVX r13, #0
+.delay_slot
+.swstall delay_slot
+ 8250 0x00 0x00 NOPX
+.delay_slot
+ 8252 0x14 0x36 0xda 0x98 LT r27, r16, r13
+.delay_slot
+ 8256 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27
+.delay_slot
+ 8262 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27
+.return_address
+ 8272 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15
+ 8278 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 8282 0x80 0x10 0x50 0x40 0x01 0x84 JNZ r16, #8352
+.delay_slot
+.swstall delay_slot
+ 8288 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8290 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8292 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8294 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8296 0x00 0x00 NOPX
+ 8298 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6
+ 8304 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4
+ 8308 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+ 8312 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 8316 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 8318 0x02 0x46 0x36 0x98 LDA r17, [p2, #16]
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8322 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8324 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8326 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8328 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8330 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8334 0x0a 0x06 0x31 0x98 ST r17, [p2]
+ 8338 0x00 0x00 NOPX
+ 8340 0x00 0x00 NOPX
+ 8342 0x00 0x00 NOPX
+ 8344 0x00 0x00 NOPX
+ 8346 0x00 0x2c 0xf8 0xa6 0x10 0x2c NOPA; ACQ r17, r16
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816
+ 8352 0x00 0x00 NOPX
+ 8354 0x00 0x00 NOPX
+ 8356 0x00 0x00 NOPX
+ 8358 0xe0 0xc2 0xd0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba LDA r16, [p7]; NOPB; NOPM
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832
+ 8368 0x10 0x1c 0x09 0x18 MOVX r14, #2
+ 8372 0x00 0x00 NOPX
+ 8374 0x00 0x00 NOPX
+ 8376 0x00 0x00 NOPX
+ 8378 0x00 0x00 NOPX
+ 8380 0x00 0x00 NOPX
+ 8382 0x13 0xa1 0x08 0x98 NE r16, r14, r16
+ 8386 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544
+.delay_slot
+.swstall delay_slot
+ 8392 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8394 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8396 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8398 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8400 0x00 0x00 NOPX
+ 8402 0x00 0x07 0xc4 0xc9 0xc0 0x44 MOVXM p2, #509152
+ 8408 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p2]; MOVXM p2, #509024
+ 8418 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2]
+ 8422 0x00 0x00 NOPX
+ 8424 0x00 0x00 NOPX
+.no_stack_arguments
+ 8426 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+ 8432 0x10 0x1a 0x01 0x18 MOVX r13, #0
+.delay_slot
+.swstall delay_slot
+ 8436 0x00 0x00 NOPX
+.delay_slot
+ 8438 0x14 0x36 0xda 0x98 LT r27, r16, r13
+.delay_slot
+ 8442 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27
+.delay_slot
+ 8448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV
+.return_address
+ 8464 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15
+ 8470 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 8474 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544
+.delay_slot
+.swstall delay_slot
+ 8480 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8482 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8484 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8486 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8488 0x00 0x00 NOPX
+ 8490 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6
+ 8496 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4
+ 8500 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+ 8504 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 8508 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 8510 0x02 0x46 0x36 0x98 LDA r17, [p2, #16]
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8514 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8516 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8518 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8520 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8522 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8526 0x0a 0x06 0x31 0x98 ST r17, [p2]
+ 8530 0x00 0x00 NOPX
+ 8532 0x00 0x00 NOPX
+ 8534 0x00 0x00 NOPX
+ 8536 0x00 0x00 NOPX
+ 8538 0x00 0x2c 0xf8 0xa6 0x10 0x2c NOPA; ACQ r17, r16
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008
+ 8544 0x00 0x00 NOPX
+ 8546 0x00 0x00 NOPX
+ 8548 0x00 0x00 NOPX
+ 8550 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x72 0x10 0xba LDA r16, [p7]; MOVXM p7, #509156
+ 8560 0x00 0x00 NOPX
+ 8562 0x00 0x00 NOPX
+ 8564 0x00 0x00 NOPX
+ 8566 0x00 0x00 NOPX
+ 8568 0x00 0x00 NOPX
+ 8570 0x10 0x24 0x11 0x18 MOVX r18, #4
+ 8574 0x14 0xa1 0x08 0x98 NE r16, r18, r16
+ 8578 0x80 0x11 0x20 0x40 0x01 0x84 JNZ r16, #8768
+.delay_slot
+ 8584 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024
+.delay_slot
+.swstall delay_slot
+ 8590 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8592 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8594 0x00 0x00 NOPX
+.delay_slot
+ 8596 0x10 0x22 0x01 0x18 MOVX r17, #0
+ 8600 0xe0 0xc2 0xd0 0x34 0x02 0x2c LDA r16, [p7]; MOVX r13, #0
+ 8606 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2]
+ 8610 0x00 0x00 NOPX
+ 8612 0x00 0x00 NOPX
+.no_stack_arguments
+ 8614 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+.swstall delay_slot
+ 8620 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8622 0x00 0x00 NOPX
+.delay_slot
+ 8624 0x14 0x37 0x1a 0x98 LT r27, r16, r17
+.delay_slot
+ 8628 0x8c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r17, r16; MOV r15, r27
+.delay_slot
+ 8634 0x00 0x2c 0xf8 0x02 0x24 0x2c NOPA; SEL.EQZ r0, r16, r17, r27
+.return_address
+ 8640 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15
+ 8646 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 8650 0x80 0x11 0x10 0x40 0x01 0x84 JNZ r16, #8736
+.delay_slot
+.swstall delay_slot
+ 8656 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8658 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8660 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8662 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8664 0x00 0x00 NOPX
+ 8666 0xdf 0xee 0xd0 0x3f 0x17 0xea 0x08 0x01 0x58 0xba LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1
+ 8676 0x06 0xfe 0x56 0x98 LDA r18, [p6], #-4
+ 8680 0x06 0xfe 0x76 0x98 LDA r19, [p6], #-4
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 8684 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 8686 0x06 0x46 0x56 0x98 LDA r18, [p6, #16]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8690 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8692 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8694 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8696 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8698 0x14 0xe5 0x22 0x18 SEL.EQZ r18, r19, r18, r27
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8702 0x0e 0x06 0x51 0x98 ST r18, [p6]
+ 8706 0x00 0x00 NOPX
+ 8708 0x00 0x00 NOPX
+ 8710 0x00 0x11 0x28 0x00 0x00 0x84 J #8784
+.delay_slot
+.swstall delay_slot
+ 8716 0x00 0x00 NOPX
+.delay_slot
+ 8718 0x14 0x93 0x18 0x18 ACQ r18, r17
+.delay_slot
+.swstall delay_slot
+ 8722 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8724 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8726 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200
+ 8736 0x00 0x11 0x28 0x00 0x00 0x84 J #8784
+.delay_slot
+ 8742 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+.swstall delay_slot
+ 8746 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8748 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8750 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232
+ 8768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x28 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248
+ 8784 0xfc 0x73 0x20 0x00 0x01 0xf3 0x32 0x66 0x10 0xba LDA p7, [sp, #-32]; MOVXM p6, #509132
+ 8794 0xc0 0xd6 0xd0 0x00 0x01 0xf1 0x32 0x68 0x10 0xba LDA r21, [p6]; MOVXM p2, #509136
+ 8804 0x40 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x60 0x10 0xba LDA r17, [p2]; MOVXM p6, #509120
+ 8814 0x06 0x06 0x96 0x98 LDA r20, [p6]
+ 8818 0x00 0x00 NOPX
+ 8820 0x00 0x00 NOPX
+ 8822 0x00 0x00 NOPX
+ 8824 0x07 0x06 0x76 0x98 LDA r19, [p7]
+ 8828 0x15 0x6b 0x0d 0x98 LSHL r21, r21, r16
+ 8832 0x14 0x61 0x07 0x98 EQ r16, r17, r16
+ 8836 0x80 0x12 0x08 0x40 0x01 0x84 JNZ r16, #9232
+.delay_slot
+ 8842 0x15 0x28 0x07 0x18 ADD r20, r20, #1
+.delay_slot
+ 8846 0x0e 0x06 0x91 0x98 ST r20, [p6]
+.delay_slot
+.swstall delay_slot
+ 8850 0x00 0x00 NOPX
+.delay_slot
+ 8852 0x18 0x69 0xd5 0x58 ADD.NC p0, r19, r21
+.delay_slot
+ 8856 0xf7 0x83 0xb0 0x48 0x22 0x5c ST p0, [sp, #-68]; MOVX r18, #4
+ 8862 0x14 0x61 0x27 0x98 EQ r16, r17, r18
+ 8866 0x80 0x11 0xc0 0x40 0x01 0x84 JNZ r16, #9088
+.delay_slot
+.swstall delay_slot
+ 8872 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8874 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8876 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8878 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8880 0x00 0x00 NOPX
+ 8882 0x14 0x60 0xe8 0x98 NE r16, r17, r14
+ 8886 0x80 0x11 0xa8 0x40 0x01 0x84 JNZ r16, #9040
+.delay_slot
+ 8892 0x00 0x07 0xcc 0xc9 0xc0 0x44 MOVXM p6, #509152
+.delay_slot
+.swstall delay_slot
+ 8898 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8900 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8902 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8904 0x00 0x00 NOPX
+ 8906 0xc0 0xca 0xd0 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r18, [p6]; MOVXM p6, #509000
+ 8916 0xc0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6a 0x10 0xba LDA r16, [p6]; MOVXM p6, #509140
+ 8926 0xc0 0xc6 0xd0 0x60 0x02 0x2c LDA r17, [p6]; MOVX r24, #0
+ 8932 0x00 0x00 NOPX
+ 8934 0x00 0x00 NOPX
+ 8936 0x00 0x00 NOPX
+ 8938 0x00 0x00 NOPX
+ 8940 0x00 0x00 NOPX
+ 8942 0x14 0xa5 0x0f 0x98 MUL r18, r18, r16
+ 8946 0x00 0x00 NOPX
+ 8948 0x8c 0xe4 0x3a 0x32 0x82 0xa4 SUB r19, r17, r18; ADD.NC r20, r18, r16
+ 8954 0x15 0x37 0x1c 0x98 LTU r27, r20, r17
+ 8958 0x14 0xe7 0x02 0x18 SEL.EQZ r19, r19, r16, r27
+ 8962 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17
+ 8966 0x16 0x23 0x32 0x18 SEL.EQZ r17, r24, r19, r27
+ 8970 0x14 0x25 0x11 0x98 SUB r18, r16, r17
+ 8974 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 8978 0x80 0x12 0x40 0x40 0x01 0x84 JNZ r16, #9344
+.delay_slot
+ 8984 0x00 0x07 0xcc 0xca 0x20 0x44 MOVXM p6, #509200
+.delay_slot
+ 8990 0x0e 0x06 0x51 0x98 ST r18, [p6]
+.delay_slot
+.swstall delay_slot
+ 8994 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8996 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8998 0x00 0x00 NOPX
+ 9000 0x00 0x11 0xf8 0x00 0x00 0x84 J #9200
+.delay_slot
+ 9006 0x00 0x4e 0x00 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba MOVA r14, #2; MOVXM p7, #509136
+.delay_slot
+ 9016 0x00 0x2f 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r15, #1; MOVXM p2, #508992
+.delay_slot
+ 9026 0x10 0x1a 0x01 0x18 MOVX r13, #0
+.delay_slot
+.swstall delay_slot
+ 9030 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9032 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504
+ 9040 0x00 0x11 0xf8 0x00 0x00 0x84 J #9200
+.delay_slot
+ 9046 0x00 0x4e 0x00 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba MOVA r14, #2; MOVXM p7, #509136
+.delay_slot
+ 9056 0x00 0x2f 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r15, #1; MOVXM p2, #508992
+.delay_slot
+ 9066 0x10 0x1a 0x01 0x18 MOVX r13, #0
+.delay_slot
+.swstall delay_slot
+ 9070 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9072 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552
+ 9088 0x00 0x0d 0x00 0x00 0x01 0xf3 0x32 0x72 0x10 0xba MOVA r13, #0; MOVXM p6, #509156
+ 9098 0xc0 0xca 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r18, [p6]; MOVXM p2, #508992
+ 9108 0x40 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6c 0x10 0xba LDA r16, [p2]; MOVXM p6, #509144
+ 9118 0xc0 0xc6 0xd0 0x3c 0x0a 0x2c LDA r17, [p6]; MOVX r15, #1
+ 9124 0x00 0x00 NOPX
+ 9126 0x00 0x00 NOPX
+ 9128 0x00 0x00 NOPX
+ 9130 0x00 0x00 NOPX
+ 9132 0x00 0x00 NOPX
+ 9134 0x14 0xa5 0x0f 0x98 MUL r18, r18, r16
+ 9138 0x00 0x00 NOPX
+ 9140 0x8c 0xe4 0x3a 0x32 0x82 0xa4 SUB r19, r17, r18; ADD.NC r20, r18, r16
+ 9146 0x15 0x37 0x1c 0x98 LTU r27, r20, r17
+ 9150 0x14 0xe7 0x02 0x18 SEL.EQZ r19, r19, r16, r27
+ 9154 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17
+ 9158 0x13 0x63 0x32 0x18 SEL.EQZ r17, r13, r19, r27
+ 9162 0x14 0x25 0x11 0x98 SUB r18, r16, r17
+ 9166 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 9170 0x80 0x12 0x40 0x40 0x01 0x84 JNZ r16, #9344
+.delay_slot
+ 9176 0x00 0x07 0xcc 0xca 0x30 0x44 MOVXM p6, #509208
+.delay_slot
+ 9182 0x0e 0x06 0x51 0x98 ST r18, [p6]
+.delay_slot
+.swstall delay_slot
+ 9186 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9188 0x00 0x00 NOPX
+.delay_slot
+ 9190 0x00 0x2c 0xf0 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba NOPA; MOVXM p7, #509136
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664
+ 9200 0xd1 0x81 0x60 0x00 0x04 0x98 0x00 0x00 0x21 0x3a MOVS p6, r12; J #9408
+.delay_slot
+ 9210 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28]
+.delay_slot
+.swstall delay_slot
+ 9214 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9216 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9218 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9220 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696
+ 9232 0x00 0x07 0xcc 0xc9 0x88 0x44 MOVXM p6, #509124
+ 9238 0xc0 0xce 0xd0 0x00 0x01 0xf3 0x32 0x22 0x10 0xba LDA r19, [p6]; MOVXM p6, #508996
+ 9248 0xc0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6e 0x10 0xba LDA r16, [p6]; MOVXM p6, #509148
+ 9258 0x06 0x06 0x56 0x98 LDA r18, [p6]
+ 9262 0x00 0x00 NOPX
+ 9264 0x00 0x00 NOPX
+ 9266 0x00 0x00 NOPX
+ 9268 0x00 0x00 NOPX
+ 9270 0x00 0x00 NOPX
+ 9272 0x14 0xe7 0x0f 0x98 MUL r19, r19, r16
+ 9276 0x00 0x00 NOPX
+ 9278 0x95 0x26 0x3a 0xb3 0x82 0xa4 SUB r20, r18, r19; ADD.NC r21, r19, r16
+ 9284 0x15 0x77 0x2c 0x98 LTU r27, r21, r18
+ 9288 0x15 0x29 0x02 0x18 SEL.EQZ r20, r20, r16, r27
+ 9292 0x9e 0xe5 0x98 0xa0 0x01 0x64 LTU r27, r19, r18; MOV r17, #0
+ 9298 0x14 0x63 0x42 0x18 SEL.EQZ r17, r17, r20, r27
+ 9302 0x14 0x25 0x11 0x98 SUB r18, r16, r17
+ 9306 0x14 0x61 0x07 0x98 EQ r16, r17, r16
+ 9310 0x80 0x13 0xe0 0x40 0x01 0x84 JNZ r16, #10176
+.delay_slot
+ 9316 0x00 0x07 0xcc 0xca 0x40 0x44 MOVXM p6, #509216
+.delay_slot
+ 9322 0x0e 0x06 0x51 0x98 ST r18, [p6]
+.delay_slot
+.swstall delay_slot
+ 9326 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9328 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 9330 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 9344 0xf7 0x83 0x26 0x8c 0x0b 0x00 0xe0 0x49 0xe8 0x01 0x58 0x76 LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9356 0x07 0xbc 0x99 0x18 LDA p1, [sp, #-68]
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9360 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28]
+.aggressive_scheduled_block_id 7
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9364 0x00 0x08 0x28 0x00 0x01 0x04 JL #4176
+.delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9370 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+ 9374 0x10 0x1a 0x01 0x18 MOVX r13, #0
+.delay_slot
+.swstall delay_slot
+ 9378 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9380 0x00 0x00 NOPX
+.delay_slot
+ 9382 0x00 0x2c 0xf0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba NOPA; MOVXM p2, #509184
+.return_address
+ 9392 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x20 0x11 0x3a MOVS p0, p7; MOVXM p2, #508992
+ 9402 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136
+.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 9408 0x06 0x5c 0x9e 0x98 LDA p1, [p6], #20
+.no_stack_arguments
+ 9412 0x00 0x09 0x78 0x00 0x01 0x04 JL #4848
+.delay_slot
+.swstall delay_slot
+ 9418 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9420 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9422 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9424 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9426 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.return_address
+ 9440 0x07 0x06 0x16 0x98 LDA r16, [p7]
+ 9444 0x00 0x00 NOPX
+ 9446 0x00 0x00 NOPX
+ 9448 0x00 0x00 NOPX
+ 9450 0x00 0x00 NOPX
+ 9452 0x00 0x00 NOPX
+ 9454 0x00 0x00 NOPX
+ 9456 0x13 0xe3 0x08 0x98 NE r17, r15, r16
+ 9460 0x88 0x12 0xe0 0x40 0x01 0x84 JNZ r17, #9664
+.delay_slot
+.swstall delay_slot
+ 9466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9468 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9470 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9472 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9474 0x00 0x00 NOPX
+ 9476 0x00 0x07 0xce 0xc9 0x88 0x44 MOVXM p7, #509124
+ 9482 0xe0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p7]; MOVXM p2, #509024
+ 9492 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2]
+ 9496 0x00 0x00 NOPX
+ 9498 0x00 0x00 NOPX
+ 9500 0x00 0x00 NOPX
+.no_stack_arguments
+ 9502 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+.swstall delay_slot
+ 9508 0x00 0x00 NOPX
+.delay_slot
+ 9510 0x14 0x20 0x07 0x18 ADD r16, r16, #1
+.delay_slot
+ 9514 0xe0 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p7]; LT r27, r16, r13
+.delay_slot
+ 9520 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27
+.delay_slot
+ 9526 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27
+.return_address
+ 9536 0xfb 0xa3 0x20 0x1b 0x01 0x8f 0x6b 0xd0 0x78 0xba LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15
+ 9546 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 9550 0x80 0x12 0xd0 0x40 0x01 0x84 JNZ r16, #9632
+.delay_slot
+ 9556 0x10 0x1e 0x05 0x18 MOVX r15, #1
+.delay_slot
+.swstall delay_slot
+ 9560 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9562 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9564 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9566 0x00 0x00 NOPX
+ 9568 0x4a 0xc2 0xde 0x0b 0x63 0x0c LDA r16, [p2, #20]; ST r13, [p7]
+ 9574 0x00 0x00 NOPX
+ 9576 0x00 0x00 NOPX
+ 9578 0x00 0x00 NOPX
+ 9580 0x00 0x00 NOPX
+ 9582 0x00 0x00 NOPX
+ 9584 0x00 0x00 NOPX
+ 9586 0x14 0x10 0xf8 0x18 REL r16, r15
+ 9590 0xdc 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba LDA r16, [p6, #-8]; MOVXM p7, #509136
+ 9600 0x00 0x00 NOPX
+ 9602 0x00 0x00 NOPX
+ 9604 0x00 0x12 0xd8 0x00 0x00 0x84 J #9648
+.delay_slot
+.swstall delay_slot
+ 9610 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9612 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9614 0x00 0x00 NOPX
+.delay_slot
+ 9616 0x13 0xe1 0x01 0x98 SUB r16, r15, r16
+.delay_slot
+ 9620 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x73 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p6, #-8]; NOPX
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096
+ 9632 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf3 0xb2 0x68 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112
+ 9648 0xe0 0xc2 0xd0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128
+ 9664 0x10 0x22 0x01 0x18 MOVX r17, #0
+ 9668 0x00 0x00 NOPX
+ 9670 0x00 0x00 NOPX
+ 9672 0x00 0x00 NOPX
+ 9674 0x00 0x00 NOPX
+ 9676 0x00 0x00 NOPX
+ 9678 0x13 0xa1 0x08 0x98 NE r16, r14, r16
+ 9682 0x80 0x13 0x48 0x40 0x01 0x84 JNZ r16, #9872
+.delay_slot
+ 9688 0x00 0x07 0xce 0xc9 0xc0 0x44 MOVXM p7, #509152
+.delay_slot
+ 9694 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024
+.delay_slot
+.swstall delay_slot
+ 9700 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9702 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9704 0x00 0x00 NOPX
+ 9706 0x07 0x06 0x16 0x98 LDA r16, [p7]
+ 9710 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2]
+ 9714 0x00 0x00 NOPX
+ 9716 0x00 0x00 NOPX
+ 9718 0x00 0x00 NOPX
+.no_stack_arguments
+ 9720 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+.swstall delay_slot
+ 9726 0x00 0x00 NOPX
+.delay_slot
+ 9728 0x14 0x20 0x07 0x18 ADD r16, r16, #1
+.delay_slot
+ 9732 0xe0 0xc2 0x38 0x6e 0x35 0x5c ST r16, [p7]; LT r27, r16, r17
+.delay_slot
+ 9738 0x8c 0x60 0x37 0x3b 0x41 0xe4 SUB r17, r17, r16; MOV r14, r27
+.delay_slot
+ 9744 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV
+.return_address
+ 9760 0xfb 0x93 0x20 0x1b 0x01 0x8f 0x6b 0x90 0x78 0xba LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14
+ 9770 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 9774 0x80 0x13 0x38 0x40 0x01 0x84 JNZ r16, #9840
+.delay_slot
+ 9780 0x00 0x07 0xc4 0xc9 0xa0 0x44 MOVXM p2, #509136
+.delay_slot
+.swstall delay_slot
+ 9786 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9788 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9790 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9792 0x00 0x00 NOPX
+ 9794 0x2a 0xc2 0xde 0x0b 0x63 0x0c LDA r16, [p1, #20]; ST r13, [p7]
+ 9800 0x00 0x00 NOPX
+ 9802 0x00 0x00 NOPX
+ 9804 0x00 0x00 NOPX
+ 9806 0x00 0x00 NOPX
+ 9808 0x00 0x00 NOPX
+ 9810 0x00 0x00 NOPX
+ 9812 0x14 0x10 0xf8 0x18 REL r16, r15
+ 9816 0x06 0xe6 0x16 0x98 LDA r16, [p6, #-8]
+ 9820 0x00 0x00 NOPX
+ 9822 0x00 0x00 NOPX
+ 9824 0x00 0x00 NOPX
+ 9826 0x00 0x00 NOPX
+ 9828 0x00 0x00 NOPX
+ 9830 0x00 0x00 NOPX
+ 9832 0x13 0xe1 0x01 0x98 SUB r16, r15, r16
+ 9836 0x0e 0xe6 0x11 0x98 ST r16, [p6, #-8]
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304
+ 9840 0x00 0x13 0x50 0x00 0x00 0x84 J #9888
+.delay_slot
+ 9846 0x1f 0x62 0xc0 0xf8 MOV p7, p1
+.delay_slot
+.swstall delay_slot
+ 9850 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9852 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9854 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9856 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336
+ 9872 0xfb 0xf3 0x20 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0x32 0x68 0x10 0x00 0x00 0xe1 LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352
+ 9888 0x40 0xc2 0xd0 0x44 0x22 0x2c LDA r16, [p2]; MOVX r17, #4
+ 9894 0x00 0x00 NOPX
+ 9896 0x00 0x00 NOPX
+ 9898 0x00 0x00 NOPX
+ 9900 0x00 0x00 NOPX
+ 9902 0x00 0x00 NOPX
+ 9904 0x00 0x00 NOPX
+ 9906 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 9910 0x80 0x13 0xa8 0x40 0x01 0x84 JNZ r16, #10064
+.delay_slot
+ 9916 0x00 0x07 0xc4 0xc9 0xc8 0x44 MOVXM p2, #509156
+.delay_slot
+.swstall delay_slot
+ 9922 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9924 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9926 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9928 0x00 0x00 NOPX
+ 9930 0x40 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x30 0x10 0xba LDA r16, [p2]; MOVXM p1, #509024
+ 9940 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1]
+ 9944 0x00 0x00 NOPX
+ 9946 0x00 0x00 NOPX
+ 9948 0x00 0x00 NOPX
+.no_stack_arguments
+ 9950 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+.swstall delay_slot
+ 9956 0x00 0x00 NOPX
+.delay_slot
+ 9958 0x14 0x20 0x07 0x18 ADD r16, r16, #1
+.delay_slot
+ 9962 0x40 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p2]; LT r27, r16, r13
+.delay_slot
+ 9968 0x6c 0x60 0x37 0x3b 0x41 0xe4 SUB r17, r13, r16; MOV r14, r27
+.delay_slot
+ 9974 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27
+.return_address
+ 9984 0x6c 0x06 0x3d 0xae 0x41 0xe4 SUB r16, r13, r3; MOV r27, r14
+ 9990 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 9994 0x80 0x13 0xa8 0x40 0x01 0x84 JNZ r16, #10064
+.delay_slot
+ 10000 0x00 0x07 0xc4 0xc9 0xc8 0x44 MOVXM p2, #509156
+.delay_slot
+.swstall delay_slot
+ 10006 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10008 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10010 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10012 0x00 0x00 NOPX
+ 10014 0xea 0xc2 0xd4 0x0b 0x63 0x0c LDA r16, [p7, #20]; ST r13, [p2]
+ 10020 0x00 0x00 NOPX
+ 10022 0x00 0x00 NOPX
+ 10024 0x00 0x00 NOPX
+ 10026 0x00 0x00 NOPX
+ 10028 0x00 0x00 NOPX
+ 10030 0x00 0x00 NOPX
+ 10032 0x14 0x10 0xf8 0x18 REL r16, r15
+ 10036 0x06 0xe6 0x16 0x98 LDA r16, [p6, #-8]
+ 10040 0x00 0x00 NOPX
+ 10042 0x00 0x00 NOPX
+ 10044 0x00 0x00 NOPX
+ 10046 0x00 0x00 NOPX
+ 10048 0x00 0x00 NOPX
+ 10050 0x00 0x00 NOPX
+ 10052 0x13 0xe1 0x01 0x98 SUB r16, r15, r16
+ 10056 0xdc 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p6, #-8]; NOPM
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528
+ 10064 0x00 0x07 0xcc 0xc9 0x80 0x44 MOVXM p6, #509120
+ 10070 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x74 0x10 0xba LDA r16, [p6]; MOVXM p2, #509160
+ 10080 0x02 0x06 0x36 0x98 LDA r17, [p2]
+ 10084 0x00 0x00 NOPX
+ 10086 0x00 0x00 NOPX
+ 10088 0x00 0x00 NOPX
+ 10090 0x00 0x00 NOPX
+ 10092 0x00 0x00 NOPX
+ 10094 0x00 0x00 NOPX
+ 10096 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 10100 0x80 0x13 0xc8 0x40 0x01 0x84 JNZ r16, #10128
+.delay_slot
+ 10106 0x07 0xef 0x99 0x18 LDA p7, [sp, #-20]
+.delay_slot
+ 10110 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16]
+.delay_slot
+ 10114 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12]
+.delay_slot
+.swstall delay_slot
+ 10118 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10120 0x00 0x00 NOPX
+ 10122 0x00 0x2c 0xfc 0x0b 0x63 0x0c NOPA; ST r13, [p6]
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 10128 0xff 0x2e 0x2e 0xeb 0x41 0xd4 LDA r11, [sp, #-8]; MOV lr, r11
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 10134 0x07 0xfd 0x91 0x18 LDA r12, [sp, #-4]
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10138 0x07 0xe9 0xb1 0x18 LDA r13, [sp, #-24]
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10142 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10146 0x1e 0x66 0x20 0xf8 MOV p6, r12
+.delay_slot
+ 10150 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 10156 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10158 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640
+ 10176 0x00 0x0d 0x06 0x8c 0x0b 0x00 0x04 0x98 0x00 0x00 0x20 0x76 MOVA r13, #0; MOVS p6, r12; J #9408
+.delay_slot
+ 10188 0x03 0xc0 0xa7 0x20 0x09 0x64 MOVX r15, #1; MOV r14, #2
+.delay_slot
+ 10194 0x00 0x07 0xc4 0xc8 0x80 0x44 MOVXM p2, #508992
+.delay_slot
+ 10200 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136
+.delay_slot
+ 10206 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28]
+.delay_slot
+.swstall delay_slot
+ 10210 0x00 0x00 NOPX
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 10224
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function_start
+ 10224 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0
+ 10230 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10234 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10238 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10242 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10246 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10250 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10254 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10258 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10262 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10266 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10270 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10274 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10278 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10282 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10286 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10290 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10294 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10298 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10302 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10306 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10310 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10314 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10318 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10322 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10326 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10330 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10334 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10338 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10342 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10346 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 10350 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 10354 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 10358 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 10362 0x18 0x9f 0xa0 0xf8 MOV r2, r31
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+
+.text_segment PM 10368
+.label _ZL19propagateFloat32NaNjj
+.function_start
+ 10368 0xfd 0x43 0x00 0x3f 0xc0 0x02 0x48 0x00 0x10 0xba MOVA r3, #-22; MOVXM r18, #-16777216
+ 10378 0x3f 0xe7 0x00 0x00 0x10 0x00 0x08 0x00 0x10 0xba MOVA r7, #511; MOVXM r0, #4194304
+ 10388 0x00 0x30 0x00 0x02 0x40 0x2c 0xa9 0xfe 0x58 0xba MOVA r16, #1; OR r4, r1, r0; MOV r5, #510
+ 10398 0x10 0x80 0x05 0x98 OR r0, r2, r0
+ 10402 0x10 0x4c 0x3d 0x98 LSHL r6, r1, r3
+ 10406 0x10 0x86 0x3d 0x98 LSHL r3, r2, r3
+ 10410 0x11 0xc6 0x34 0x98 AND r3, r7, r3
+ 10414 0x11 0xcc 0x64 0x98 AND r6, r7, r6
+ 10418 0x11 0x4c 0x67 0x98 EQ r6, r5, r6
+ 10422 0x10 0xa3 0x0d 0x98 LSHL r17, r2, r16
+ 10426 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17
+ 10430 0x11 0x22 0x02 0x18 SEL.EQZ r17, r4, r0, r27
+ 10434 0x00 0x3f 0xf8 0x3f 0xfe 0x44 MOVXM r16, #4194303
+ 10440 0x10 0x85 0x04 0x98 AND r2, r2, r16
+ 10444 0x10 0x84 0xf0 0x18 NEZ r2, r2
+ 10448 0x10 0x43 0x04 0x98 AND r1, r1, r16
+ 10452 0x10 0x42 0xf0 0x18 NEZ r1, r1
+ 10456 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10460 0x10 0x76 0x64 0x98 AND r27, r1, r6
+.delay_slot
+ 10464 0x10 0xc2 0x57 0x98 EQ r1, r3, r5
+.delay_slot
+ 10468 0x14 0x46 0x42 0x18 SEL.EQZ r3, r17, r4, r27
+.delay_slot
+ 10472 0x10 0x76 0x24 0x98 AND r27, r1, r2
+.delay_slot
+ 10476 0x10 0xc0 0x02 0x18 SEL.EQZ r0, r3, r0, r27
+.label _ZL19propagateFloat32NaNjj__end
+.label _ZL19roundAndPackFloat32iij
+.function_start
+ 10480 0x08 0x00 0x00 0x00 0x01 0xf0 0x32 0x7a 0x10 0xba MOVA r0, #64; MOVXM p0, #509172
+ 10490 0x00 0x92 0xd0 0x99 0xfa 0x2c LDA r4, [p0]; MOVX r6, #127
+.swstall __RAW__R_1948
+ 10496 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 10498 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 10500 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 10502 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 10504 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 10506 0x00 0x00 NOPX
+ 10508 0x20 0x14 0xa8 0x00 0x01 0x84 JZ r4, #10576
+.delay_slot
+ 10514 0x10 0x4a 0x01 0x18 MOVX r5, #64
+.delay_slot
+.swstall delay_slot
+ 10518 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10520 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10522 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10524 0x00 0x00 NOPX
+ 10526 0x00 0x70 0x00 0x00 0x70 0x4b 0x08 0x00 0x58 0xba MOVA r16, #3; MOVX r7, #2; MOV r24, #0
+ 10536 0x3e 0xc8 0xf2 0xa0 0x05 0x64 EQ r27, r7, r4; MOV r5, #1
+ 10542 0x11 0x8f 0x82 0x18 SEL.EQZ r7, r6, r24, r27
+ 10546 0x11 0x37 0x07 0x98 EQ r27, r4, r16
+ 10550 0x34 0x30 0x4d 0xa1 0x41 0xe4 SEL.EQZ r16, r6, r24, r27; MOV r27, r1
+ 10556 0x14 0x0e 0x72 0x18 SEL.EQZ r7, r16, r7, r27
+ 10560 0x11 0x76 0x47 0x98 EQ r27, r5, r4
+ 10564 0x00 0x2c 0xf0 0x00 0x20 0x0e 0x5c 0x10 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27
+.label TGT_F_ZL19roundAndPackFloat32iij_96
+ 10576 0x14 0x96 0x08 0x23 0xf5 0x64 EXTEND.u16 r18, r2; MOV r16, #253
+ 10582 0x14 0xa5 0x0a 0x98 LT r18, r18, r16
+ 10586 0x90 0x15 0x08 0x40 0x01 0x84 JNZ r18, #10768
+.delay_slot
+ 10592 0x10 0xe2 0x64 0x98 AND r17, r3, r6
+.delay_slot
+ 10596 0x10 0x0e 0x7d 0x18 MOVX r7, #31
+.delay_slot
+ 10600 0x10 0x42 0x7d 0x98 LSHL r1, r1, r7
+.delay_slot
+.swstall delay_slot
+ 10604 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10606 0x00 0x00 NOPX
+ 10608 0x00 0x12 0x00 0x05 0x38 0x3e 0x88 0xca 0xa8 0xba MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5
+ 10618 0x15 0x29 0x2a 0x98 LT r20, r20, r18
+ 10622 0x14 0x20 0x2a 0x98 LT r16, r16, r2
+ 10626 0x14 0xe7 0x44 0x98 AND r19, r19, r20
+ 10630 0x14 0xe7 0x05 0x98 OR r19, r19, r16
+ 10634 0x98 0x15 0x30 0x40 0x01 0x84 JNZ r19, #10848
+.delay_slot
+ 10640 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+.swstall delay_slot
+ 10644 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10646 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10648 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10650 0x00 0x00 NOPX
+ 10652 0x10 0xa7 0x09 0x98 GE r19, r2, r16
+ 10656 0x98 0x15 0x10 0x40 0x01 0x84 JNZ r19, #10784
+.delay_slot
+.swstall delay_slot
+ 10662 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10664 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10666 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10668 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10670 0x00 0x00 NOPX
+ 10672 0x14 0x04 0x21 0x98 SUB r2, r16, r2
+ 10676 0x10 0x14 0xf8 0x00 0x01 0x84 JZ r2, #10736
+.delay_slot
+.swstall delay_slot
+ 10682 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10684 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10686 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10688 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10690 0x00 0x00 NOPX
+ 10692 0x84 0x44 0x39 0xa0 0x81 0x64 SUB r17, r16, r2; MOV r19, #32
+ 10698 0x11 0xcf 0x14 0x98 AND r7, r7, r17
+ 10702 0x10 0xce 0x7d 0x98 LSHL r7, r3, r7
+ 10706 0x10 0xe3 0x1d 0x98 LSHL r17, r3, r17
+ 10710 0x10 0xb7 0x3a 0x98 LT r27, r2, r19
+ 10714 0x11 0xce 0xf0 0x18 NEZ r7, r7
+ 10718 0x10 0xc6 0xf0 0x18 NEZ r3, r3
+ 10722 0x11 0xc5 0x15 0x98 OR r2, r7, r17
+ 10726 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x00 0xc6 0x22 0x7a NOPA; NOPS; SEL.EQZ r3, r3, r2, r27
+.label TGT_F_ZL19roundAndPackFloat32iij_256
+ 10736 0x00 0x15 0x10 0x00 0x00 0x84 J #10784
+.delay_slot
+ 10742 0x10 0xe2 0x64 0x98 AND r17, r3, r6
+.delay_slot
+ 10746 0x10 0x04 0x01 0x18 MOVX r2, #0
+.delay_slot
+.swstall delay_slot
+ 10750 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10752 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10754 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_ZL19roundAndPackFloat32iij_288
+ 10768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV
+.label TGT_F_ZL19roundAndPackFloat32iij_304
+ 10784 0xff 0x20 0x00 0x22 0x30 0x34 0xa8 0xca 0xa8 0xba MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5
+ 10794 0x02 0xe3 0x00 0x06 0x62 0x2c 0x8f 0xff 0x58 0xba MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1
+ 10804 0x11 0x8c 0xd0 0x18 EQZ r6, r6
+ 10808 0x11 0x40 0x0d 0x98 LSHL r0, r5, r0
+ 10812 0x11 0x88 0x46 0x98 XOR r4, r6, r4
+ 10816 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10820 0x11 0x36 0x04 0x98 AND r27, r4, r0
+.delay_slot
+ 10824 0x14 0x04 0x22 0x18 SEL.EQZ r2, r16, r2, r27
+.delay_slot
+ 10828 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3
+.delay_slot
+ 10832 0x10 0x44 0x20 0x98 ADD r2, r1, r2
+.delay_slot
+ 10836 0x00 0x2c 0xf0 0x00 0x20 0x36 0x01 0x04 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; ADD r0, r27, r2
+.label TGT_F_ZL19roundAndPackFloat32iij_368
+ 10848 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10852 0x7f 0x80 0x01 0x20 0x00 0x44 MOVXM r2, #2139095040
+.delay_slot
+ 10858 0x10 0x46 0x20 0x98 ADD r3, r1, r2
+.delay_slot
+ 10862 0x11 0x44 0xd0 0x18 EQZ r2, r5
+.delay_slot
+ 10866 0x10 0xc0 0x21 0x98 SUB r0, r3, r2
+.delay_slot
+.swstall delay_slot
+ 10870 0x00 0x00 NOPX
+.label _ZL19roundAndPackFloat32iij__end
+
+.text_segment PM 10880
+.label _ZL28normalizeRoundAndPackFloat32iij
+.tail_call
+.function_start
+ 10880 0x00 0x14 0x78 0x00 0x00 0x84 J #10480
+.delay_slot
+ 10886 0x10 0xe0 0x30 0x18 CLZ r16, r3
+.delay_slot
+ 10890 0x14 0x21 0xff 0x18 ADD r16, r16, #-1
+.delay_slot
+ 10894 0x10 0x85 0x01 0x98 SUB r2, r2, r16
+.delay_slot
+ 10898 0x10 0xc7 0x0d 0x98 LSHL r3, r3, r16
+.delay_slot
+.swstall delay_slot
+ 10902 0x00 0x00 NOPX
+.label _ZL28normalizeRoundAndPackFloat32iij__end
+
+.text_segment PM 10912
+.label int32_to_float32
+.function_start
+ 10912 0x08 0x15 0x78 0x00 0x01 0x84 JZ r1, #10992
+.delay_slot
+.swstall delay_slot
+ 10918 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10920 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10922 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10924 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10926 0x00 0x00 NOPX
+ 10928 0x80 0x00 0x08 0x20 0x00 0x44 MOVXM r16, #-2147483648
+ 10934 0x10 0x61 0x07 0x98 EQ r16, r1, r16
+ 10938 0x80 0x15 0x80 0x40 0x01 0x84 JNZ r16, #11008
+.delay_slot
+.swstall delay_slot
+ 10944 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10946 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10948 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10950 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10952 0x00 0x00 NOPX
+.tail_call
+ 10954 0x13 0x82 0x00 0x00 0x05 0x50 0x00 0x00 0x20 0xba MOVA r2, #156; J #10880
+.delay_slot
+ 10964 0x10 0x47 0x10 0x18 ABS r3, r1
+.delay_slot
+ 10968 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+ 10972 0x10 0x43 0x0a 0x98 LT r1, r1, r16
+.delay_slot
+.swstall delay_slot
+ 10976 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10978 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_Fint32_to_float32_80
+.return_address
+ 10992 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10996 0x10 0x00 0x01 0x18 MOVX r0, #0
+.delay_slot
+.swstall delay_slot
+ 11000 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11002 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11004 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11006 0x00 0x00 NOPX
+.label TGT_Fint32_to_float32_96
+ 11008 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 11012 0xcf 0x00 0x00 0x20 0x00 0x44 MOVXM r0, #-822083584
+.delay_slot
+.swstall delay_slot
+ 11018 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11020 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11022 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11024 0x00 0x00 NOPX
+.label int32_to_float32__end
+
+.text_segment PM 11040
+.label _ZL14addFloat32Sigsjji
+.function_start
+ 11040 0xfd 0x32 0x00 0x00 0x1f 0xfe 0x0f 0xff 0x90 0xba MOVA r18, #-23; MOVXM r16, #8388607
+ 11050 0x10 0x63 0x2d 0x98 LSHL r17, r1, r18
+ 11054 0x10 0x89 0x2d 0x98 LSHL r4, r2, r18
+ 11058 0x14 0x76 0x90 0x18 EXTEND.u8 r27, r17
+ 11062 0x11 0x32 0x90 0x18 EXTEND.u8 r25, r4
+ 11066 0xdc 0x72 0x3c 0x20 0x01 0x64 SUB r17, r27, r25; MOV r24, #0
+ 11072 0x16 0x09 0x1a 0x98 LT r4, r24, r17
+ 11076 0x20 0x15 0xf8 0x40 0x01 0x84 JNZ r4, #11248
+.delay_slot
+ 11082 0x10 0x67 0x04 0x98 AND r19, r1, r16
+.delay_slot
+ 11086 0x14 0x20 0x90 0x20 0x19 0x64 AND r16, r2, r16; MOV r0, #6
+.delay_slot
+ 11092 0x14 0xe6 0x0d 0x98 LSHL r19, r19, r0
+.delay_slot
+ 11096 0x84 0x01 0xba 0x23 0xfd 0x64 LSHL r16, r16, r0; MOV r20, #255
+.delay_slot
+ 11102 0xd8 0x28 0xf9 0x20 0x7d 0x64 EQ r0, r27, r20; MOV r18, #31
+ 11108 0x14 0x4b 0x89 0x98 GE r5, r17, r24
+ 11112 0x28 0x16 0x58 0x40 0x01 0x84 JNZ r5, #11440
+.delay_slot
+ 11118 0x10 0xc9 0x2d 0x98 LSHL r4, r3, r18
+.delay_slot
+.swstall delay_slot
+ 11122 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11124 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11126 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11128 0x00 0x00 NOPX
+ 11130 0x16 0x69 0x47 0x98 EQ r20, r25, r20
+ 11134 0xa0 0x16 0x40 0x40 0x01 0x84 JNZ r20, #11392
+.delay_slot
+.swstall delay_slot
+ 11140 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11142 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11144 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11146 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11148 0x00 0x00 NOPX
+ 11150 0x18 0x51 0xa0 0xf8 MOV r1, r3
+ 11154 0x18 0x9c 0xa0 0xf8 MOV r2, r25
+ 11158 0x14 0x40 0x07 0x18 ADD r0, r17, #1
+ 11162 0x10 0x23 0x12 0x18 SEL.EQZ r17, r0, r17, r27
+ 11166 0x16 0x23 0x11 0x98 SUB r17, r24, r17
+ 11170 0x88 0x16 0x28 0x00 0x01 0x84 JZ r17, #11344
+.delay_slot
+ 11176 0x20 0x00 0x0a 0x20 0x00 0x44 MOVXM r20, #536870912
+.delay_slot
+ 11182 0x14 0xc7 0x45 0x98 OR r3, r19, r20
+.delay_slot
+ 11186 0x14 0xe6 0x32 0x18 SEL.EQZ r19, r19, r3, r27
+.delay_slot
+.swstall delay_slot
+ 11190 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11192 0x00 0x00 NOPX
+ 11194 0xc0 0xe2 0x30 0x20 0x81 0x64 SUB r3, r24, r17; MOV r0, #32
+ 11200 0x10 0xe5 0x24 0x98 AND r18, r3, r18
+ 11204 0x14 0xe5 0x2d 0x98 LSHL r18, r19, r18
+ 11208 0x14 0x76 0x0a 0x98 LT r27, r17, r0
+ 11212 0x00 0x16 0x28 0x00 0x00 0x84 J #11344
+.delay_slot
+ 11218 0x14 0xc6 0x3d 0x98 LSHL r3, r19, r3
+.delay_slot
+ 11222 0x14 0xa4 0xf0 0x18 NEZ r18, r18
+.delay_slot
+ 11226 0x14 0xe2 0xf0 0x18 NEZ r17, r19
+.delay_slot
+ 11230 0x10 0xe5 0x25 0x98 OR r18, r3, r18
+.delay_slot
+ 11234 0x00 0x2c 0xf0 0x00 0x24 0x67 0x22 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM
+.label TGT_F_ZL14addFloat32Sigsjji_208
+ 11248 0x00 0x16 0x78 0x40 0x01 0x84 JNZ r0, #11504
+.delay_slot
+ 11254 0x20 0x00 0x0a 0x20 0x00 0x44 MOVXM r20, #536870912
+.delay_slot
+.swstall delay_slot
+ 11260 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11262 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11264 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11266 0x00 0x00 NOPX
+ 11268 0x18 0x51 0xa0 0xf8 MOV r1, r3
+ 11272 0x88 0xff 0xe1 0x3b 0x41 0xe4 ADD r3, r17, #-1; MOV r2, r27
+ 11278 0x1e 0xdc 0xa0 0xf8 MOV r27, r25
+ 11282 0x10 0xe3 0x12 0x18 SEL.EQZ r17, r3, r17, r27
+ 11286 0x88 0x16 0x28 0x00 0x01 0x84 JZ r17, #11344
+.delay_slot
+ 11292 0x15 0x01 0x05 0x98 OR r0, r20, r16
+.delay_slot
+ 11296 0x14 0x20 0x02 0x18 SEL.EQZ r16, r16, r0, r27
+.delay_slot
+.swstall delay_slot
+ 11300 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11302 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11304 0x00 0x00 NOPX
+ 11306 0xc0 0xe2 0x30 0x20 0x81 0x64 SUB r3, r24, r17; MOV r0, #32
+ 11312 0x10 0xe5 0x24 0x98 AND r18, r3, r18
+ 11316 0x14 0x25 0x2d 0x98 LSHL r18, r16, r18
+ 11320 0x14 0x06 0x3d 0x98 LSHL r3, r16, r3
+ 11324 0x14 0x76 0x0a 0x98 LT r27, r17, r0
+ 11328 0x14 0xa4 0xf0 0x18 NEZ r18, r18
+ 11332 0x14 0x20 0xf0 0x18 NEZ r16, r16
+ 11336 0x10 0xe3 0x25 0x98 OR r17, r3, r18
+ 11340 0x14 0x21 0x12 0x18 SEL.EQZ r16, r16, r17, r27
+.label TGT_F_ZL14addFloat32Sigsjji_304
+ 11344 0x00 0x32 0x00 0x27 0x3a 0x2e 0x28 0xbf 0xc8 0xba MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1
+ 11354 0x9c 0xe0 0x18 0x31 0x01 0x24 ADD r19, r19, r16; ADD.NC r16, r17, #1
+ 11360 0x14 0xe5 0x2d 0x98 LSHL r18, r19, r18
+ 11364 0x14 0xb7 0x8a 0x98 LT r27, r18, r24
+ 11368 0x14 0x45 0x02 0x18 SEL.EQZ r2, r17, r16, r27
+ 11372 0x14 0x87 0x32 0x18 SEL.EQZ r3, r18, r19, r27
+.label __ll1__ZL14addFloat32Sigsjji
+.tail_call
+ 11376 0x00 0x14 0x78 0x00 0x00 0x84 J #10480
+.delay_slot
+.swstall delay_slot
+ 11382 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11384 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11386 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11388 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11390 0x00 0x00 NOPX
+.label TGT_F_ZL14addFloat32Sigsjji_352
+.return_address
+ 11392 0x80 0x16 0x88 0x40 0x01 0x84 JNZ r16, #11536
+.delay_slot
+.swstall delay_slot
+ 11398 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11400 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11402 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11404 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11406 0x00 0x00 NOPX
+ 11408 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 11412 0x7f 0x80 0x08 0x20 0x00 0x44 MOVXM r16, #2139095040
+.delay_slot
+ 11418 0x11 0x01 0x00 0x98 ADD r0, r4, r16
+.delay_slot
+.swstall delay_slot
+ 11422 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11424 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11426 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_ZL14addFloat32Sigsjji_400
+ 11440 0x00 0x16 0x90 0x40 0x01 0x84 JNZ r0, #11552
+.delay_slot
+.swstall delay_slot
+ 11446 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11448 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11450 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11452 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11454 0x00 0x00 NOPX
+ 11456 0xd8 0x16 0xa8 0x00 0x01 0x84 JZ r27, #11600
+.delay_slot
+.swstall delay_slot
+ 11462 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11464 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11468 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11470 0x00 0x00 NOPX
+ 11472 0x18 0x51 0xa0 0xf8 MOV r1, r3
+ 11476 0x00 0x16 0x38 0x00 0x00 0x84 J #11376
+.delay_slot
+ 11482 0x40 0x00 0x08 0xa0 0x00 0x44 MOVXM r17, #1073741824
+.delay_slot
+ 11488 0x9c 0x62 0x11 0x3b 0x41 0xe4 ADD r17, r19, r17; MOV r2, r27
+.delay_slot
+ 11494 0x14 0x47 0x00 0x98 ADD r3, r17, r16
+.delay_slot
+.swstall delay_slot
+ 11498 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11500 0x00 0x01 0x67 0x98 NOPA
+.label TGT_F_ZL14addFloat32Sigsjji_464
+ 11504 0x98 0x16 0xb8 0x40 0x01 0x84 JNZ r19, #11632
+.delay_slot
+.swstall delay_slot
+ 11510 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11512 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11514 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11516 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11518 0x00 0x00 NOPX
+ 11520 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 11524 0x18 0x10 0xa0 0xf8 MOV r0, r1
+.delay_slot
+.swstall delay_slot
+ 11528 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11530 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11532 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11534 0x00 0x00 NOPX
+.label TGT_F_ZL14addFloat32Sigsjji_496
+.tail_call
+ 11536 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 11542 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11544 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11546 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11548 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11550 0x00 0x00 NOPX
+.label TGT_F_ZL14addFloat32Sigsjji_512
+.return_address
+ 11552 0x14 0xe1 0x05 0x98 OR r16, r19, r16
+ 11556 0x80 0x16 0xc0 0x40 0x01 0x84 JNZ r16, #11648
+.delay_slot
+.swstall delay_slot
+ 11562 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11564 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11566 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11568 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11570 0x00 0x00 NOPX
+ 11572 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 11576 0x18 0x10 0xa0 0xf8 MOV r0, r1
+.delay_slot
+.swstall delay_slot
+ 11580 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11582 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11584 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11586 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_ZL14addFloat32Sigsjji_560
+ 11600 0x05 0x00 0x08 0x33 0x82 0xa4 RET lr; ADD.NC r16, r19, r16
+.delay_slot
+ 11606 0x17 0xe2 0xe9 0x18 MOVX r17, #-6
+.delay_slot
+ 11610 0x14 0x21 0x1d 0x98 LSHL r16, r16, r17
+.delay_slot
+ 11614 0x11 0x01 0x00 0x98 ADD r0, r4, r16
+.delay_slot
+.swstall delay_slot
+ 11618 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11620 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZL14addFloat32Sigsjji_592
+.tail_call
+ 11632 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 11638 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11640 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11642 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11644 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11646 0x00 0x00 NOPX
+.label TGT_F_ZL14addFloat32Sigsjji_608
+.tail_call
+.return_address
+ 11648 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 11654 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11656 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11658 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11660 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11662 0x00 0x00 NOPX
+.label _ZL14addFloat32Sigsjji__end
+.label _ZL14subFloat32Sigsjji
+.function_start
+ 11664 0xfd 0x31 0x00 0x00 0x1f 0xfe 0x0f 0xff 0x90 0xba MOVA r17, #-23; MOVXM r16, #8388607
+ 11674 0x10 0x89 0x1d 0x98 LSHL r4, r2, r17
+ 11678 0x10 0x65 0x1d 0x98 LSHL r18, r1, r17
+ 11682 0x10 0x69 0x04 0x98 AND r20, r1, r16
+ 11686 0x11 0x32 0x90 0x18 EXTEND.u8 r25, r4
+ 11690 0x14 0xb6 0x90 0x18 EXTEND.u8 r27, r18
+ 11694 0x14 0x20 0x99 0xa0 0x1d 0x64 AND r16, r2, r16; MOV r19, #7
+ 11700 0x15 0x23 0x3d 0x98 LSHL r17, r20, r19
+ 11704 0xdc 0xb2 0x3c 0x20 0x01 0x64 SUB r18, r27, r25; MOV r24, #0
+ 11710 0x16 0x0b 0x2a 0x98 LT r5, r24, r18
+ 11714 0x28 0x17 0x40 0x40 0x01 0x84 JNZ r5, #11904
+.delay_slot
+ 11720 0x14 0x21 0x3d 0x98 LSHL r16, r16, r19
+.delay_slot
+ 11724 0x1f 0xe0 0x00 0x10 0x00 0x00 0x88 0x00 0x10 0xba MOVA r0, #255; MOVXM r4, #1073741824
+.delay_slot
+ 11734 0x16 0xe8 0x07 0x98 EQ r20, r27, r0
+.delay_slot
+ 11738 0x14 0x66 0x45 0x98 OR r19, r17, r4
+.delay_slot
+ 11742 0x11 0x09 0x05 0x98 OR r4, r4, r16
+ 11746 0x14 0x8d 0x89 0x98 GE r6, r18, r24
+ 11750 0x30 0x17 0x90 0x40 0x01 0x84 JNZ r6, #12064
+.delay_slot
+ 11756 0x10 0x0a 0x05 0x18 MOVX r5, #1
+.delay_slot
+ 11760 0x10 0xce 0x56 0x98 XOR r7, r3, r5
+.delay_slot
+.swstall delay_slot
+ 11764 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11766 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11768 0x00 0x00 NOPX
+ 11770 0x16 0x68 0x07 0x98 EQ r20, r25, r0
+ 11774 0xa0 0x17 0xc8 0x40 0x01 0x84 JNZ r20, #12176
+.delay_slot
+.swstall delay_slot
+ 11780 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11782 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11784 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11786 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11788 0x00 0x00 NOPX
+ 11790 0x18 0x53 0xa0 0xf8 MOV r1, r7
+ 11794 0x14 0xa0 0x07 0x18 ADD r16, r18, #1
+ 11798 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27
+ 11802 0x16 0x21 0x01 0x98 SUB r16, r24, r16
+ 11806 0x80 0x17 0x30 0x00 0x01 0x84 JZ r16, #11872
+.delay_slot
+ 11812 0x14 0x63 0x32 0x18 SEL.EQZ r17, r17, r19, r27
+.delay_slot
+.swstall delay_slot
+ 11816 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11818 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11820 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11822 0x00 0x00 NOPX
+ 11824 0x04 0x14 0x00 0x30 0x38 0x0e 0x48 0x1f 0x58 0xba MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31
+ 11834 0x10 0xe5 0x24 0x98 AND r18, r3, r18
+ 11838 0x14 0x65 0x2d 0x98 LSHL r18, r17, r18
+ 11842 0x14 0x66 0xf0 0x18 NEZ r19, r17
+ 11846 0x14 0x37 0x4a 0x98 LT r27, r16, r20
+ 11850 0x14 0x62 0x3d 0x98 LSHL r17, r17, r3
+ 11854 0x14 0xa4 0xf0 0x18 NEZ r18, r18
+ 11858 0x14 0x61 0x25 0x98 OR r16, r17, r18
+ 11862 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0xe3 0x02 0x7a NOPA; NOPS; SEL.EQZ r17, r19, r16, r27
+.label __ll2__ZL14subFloat32Sigsjji
+ 11872 0x00 0x17 0x80 0x00 0x00 0x84 J #12032
+.delay_slot
+ 11878 0x11 0x07 0x11 0x98 SUB r3, r4, r17
+.delay_slot
+.swstall delay_slot
+ 11882 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11884 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11886 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11888 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_240
+ 11904 0xa0 0x17 0xe0 0x40 0x01 0x84 JNZ r20, #12224
+.delay_slot
+.swstall delay_slot
+ 11910 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11912 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11914 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11916 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11918 0x00 0x00 NOPX
+ 11920 0x18 0x1d 0xa0 0xf8 MOV r0, r27
+ 11924 0x18 0x51 0xa0 0xf8 MOV r1, r3
+ 11928 0x1e 0xdc 0xa0 0xf8 MOV r27, r25
+ 11932 0x1e 0x50 0x20 0xf8 MOV r25, r0
+ 11936 0x14 0xa3 0xff 0x18 ADD r17, r18, #-1
+ 11940 0x14 0x63 0x22 0x18 SEL.EQZ r17, r17, r18, r27
+ 11944 0x88 0x17 0x78 0x00 0x01 0x84 JZ r17, #12016
+.delay_slot
+ 11950 0x14 0x20 0x42 0x18 SEL.EQZ r16, r16, r4, r27
+.delay_slot
+.swstall delay_slot
+ 11954 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11956 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11958 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11960 0x00 0x00 NOPX
+ 11962 0x04 0x03 0x00 0x31 0x28 0x8e 0x88 0x1f 0x58 0xba MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31
+ 11972 0x14 0xa9 0x44 0x98 AND r20, r18, r20
+ 11976 0x14 0x29 0x4d 0x98 LSHL r20, r16, r20
+ 11980 0x14 0x25 0x2d 0x98 LSHL r18, r16, r18
+ 11984 0x14 0x76 0x3a 0x98 LT r27, r17, r3
+ 11988 0x15 0x28 0xf0 0x18 NEZ r20, r20
+ 11992 0x14 0x20 0xf0 0x18 NEZ r16, r16
+ 11996 0x14 0xa3 0x45 0x98 OR r17, r18, r20
+ 12000 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x21 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV
+.label __ll1__ZL14subFloat32Sigsjji
+ 12016 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x26 0x38 0x0c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_368
+.tail_call
+ 12032 0x00 0x15 0x40 0x00 0x00 0x84 J #10880
+.delay_slot
+ 12038 0x16 0x45 0xff 0x18 ADD r2, r25, #-1
+.delay_slot
+.swstall delay_slot
+ 12042 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12044 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12046 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12048 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_400
+.return_address
+ 12064 0xa0 0x17 0xf0 0x40 0x01 0x84 JNZ r20, #12256
+.delay_slot
+.swstall delay_slot
+ 12070 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12072 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12074 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12076 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12078 0x00 0x00 NOPX
+ 12080 0x14 0x27 0x1c 0x98 LTU r19, r16, r17
+ 12084 0x98 0x18 0x08 0x40 0x01 0x84 JNZ r19, #12304
+.delay_slot
+ 12090 0x11 0x71 0x92 0x18 SEL.EQZ r24, r5, r25, r27
+.delay_slot
+ 12094 0x1c 0x9d 0xa0 0xf8 MOV r18, r27
+.delay_slot
+ 12098 0x11 0x73 0x22 0x18 SEL.EQZ r25, r5, r18, r27
+.delay_slot
+.swstall delay_slot
+ 12102 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12104 0x00 0x00 NOPX
+ 12106 0x14 0x65 0x0c 0x98 LTU r18, r17, r16
+ 12110 0x90 0x18 0x18 0x40 0x01 0x84 JNZ r18, #12336
+.delay_slot
+.swstall delay_slot
+ 12116 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12118 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12120 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12122 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12124 0x00 0x00 NOPX
+ 12126 0x10 0x20 0x7d 0x18 MOVX r16, #31
+ 12130 0x00 0x07 0xc0 0xc9 0xe8 0x44 MOVXM p0, #509172
+ 12136 0x00 0x06 0x56 0x98 LDA r18, [p0]
+.swstall __RAW__R_1948
+ 12140 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 12142 0x00 0x00 NOPX
+ 12144 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 12148 0x10 0x22 0x0d 0x18 MOVX r17, #3
+.delay_slot
+.swstall delay_slot
+ 12152 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12154 0x00 0x00 NOPX
+.delay_slot
+ 12156 0x14 0x63 0x27 0x98 EQ r17, r17, r18
+.delay_slot
+ 12160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x22 0x08 0x6c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_512
+ 12176 0x80 0x18 0x28 0x40 0x01 0x84 JNZ r16, #12368
+.delay_slot
+.swstall delay_slot
+ 12182 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12184 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12186 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12188 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12190 0x00 0x00 NOPX
+ 12192 0x10 0x20 0x7d 0x18 MOVX r16, #31
+ 12196 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 12200 0x11 0xe1 0x0d 0x98 LSHL r16, r7, r16
+.delay_slot
+ 12204 0x7f 0x80 0x08 0xa0 0x00 0x44 MOVXM r17, #2139095040
+.delay_slot
+ 12210 0x14 0x41 0x00 0x98 ADD r0, r17, r16
+.delay_slot
+.swstall delay_slot
+ 12214 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12216 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_560
+ 12224 0x88 0x18 0x30 0x40 0x01 0x84 JNZ r17, #12384
+.delay_slot
+.swstall delay_slot
+ 12230 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12232 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12234 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12236 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12238 0x00 0x00 NOPX
+ 12240 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 12244 0x18 0x10 0xa0 0xf8 MOV r0, r1
+.delay_slot
+.swstall delay_slot
+ 12248 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12250 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12252 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12254 0x00 0x00 NOPX
+.label TGT_F_ZL14subFloat32Sigsjji_592
+ 12256 0x14 0x61 0x05 0x98 OR r16, r17, r16
+ 12260 0x80 0x18 0x38 0x40 0x01 0x84 JNZ r16, #12400
+.delay_slot
+.swstall delay_slot
+ 12266 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12268 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12270 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12272 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12274 0x00 0x00 NOPX
+ 12276 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 12280 0x7f 0xff 0xf0 0x3f 0xfe 0x44 MOVXM r0, #2147483647
+.delay_slot
+.swstall delay_slot
+ 12286 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12288 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12290 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12292 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZL14subFloat32Sigsjji_640
+ 12304 0x00 0x17 0x78 0x00 0x00 0x84 J #12016
+.delay_slot
+ 12310 0x18 0x51 0xa0 0xf8 MOV r1, r3
+.delay_slot
+ 12314 0x1c 0xd8 0xa0 0xf8 MOV r19, r17
+.delay_slot
+.swstall delay_slot
+ 12318 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12320 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12322 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_672
+ 12336 0x00 0x17 0x30 0x00 0x00 0x84 J #11872
+.delay_slot
+ 12342 0x19 0x18 0x20 0xf8 MOV r4, r16
+.delay_slot
+ 12346 0x1e 0x5c 0x20 0xf8 MOV r25, r24
+.delay_slot
+ 12350 0x18 0x53 0xa0 0xf8 MOV r1, r7
+.delay_slot
+.swstall delay_slot
+ 12354 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12356 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZL14subFloat32Sigsjji_704
+.tail_call
+ 12368 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 12374 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12376 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12378 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12380 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12382 0x00 0x00 NOPX
+.label TGT_F_ZL14subFloat32Sigsjji_720
+.tail_call
+.return_address
+ 12384 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 12390 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12392 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12394 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12396 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12398 0x00 0x00 NOPX
+.label TGT_F_ZL14subFloat32Sigsjji_736
+.tail_call
+.return_address
+ 12400 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 12406 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12408 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12410 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12412 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12414 0x00 0x00 NOPX
+.label _ZL14subFloat32Sigsjji__end
+.label float32_add
+.function_start
+ 12416 0x17 0xe0 0x85 0x18 MOVX r16, #-31
+ 12420 0x10 0x47 0x0d 0x98 LSHL r3, r1, r16
+ 12424 0x10 0xa1 0x0d 0x98 LSHL r16, r2, r16
+ 12428 0x10 0xe1 0x07 0x98 EQ r16, r3, r16
+ 12432 0x80 0x18 0x58 0x40 0x01 0x84 JNZ r16, #12464
+.delay_slot
+.swstall delay_slot
+ 12438 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12440 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12442 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12444 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12446 0x00 0x00 NOPX
+.tail_call
+ 12448 0x00 0x16 0xc8 0x00 0x00 0x84 J #11664
+.delay_slot
+.swstall delay_slot
+ 12454 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12456 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12458 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12460 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12462 0x00 0x00 NOPX
+.label TGT_Ffloat32_add_48
+.tail_call
+.return_address
+ 12464 0x00 0x15 0x90 0x00 0x00 0x84 J #11040
+.delay_slot
+.swstall delay_slot
+ 12470 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12472 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12474 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12476 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12478 0x00 0x00 NOPX
+.label float32_add__end
+
+.data_segment DMb 508992
+.label reduce_mean_c8_params
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x7
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+
+.bss_segment DMb 509120 40
+
+.data_segment DMb 509160
+.label _ZL8num_iter
+ 0x1
+ 0x0
+ 0x0
+ 0x0
+
+.bss_segment DMb 509164 4
+
+.bss_segment DMb 509168 1
+
+.bss_segment DMb 509172 4
+
+.bss_segment DMb 509184 64
+
+.stack DM_stack 506560 508928
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.map
new file mode 100644
index 0000000000000000000000000000000000000000..a0123fcd2abb0ee7d6fe767c4cfeb9204c35f584
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.map
@@ -0,0 +1,177 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme
+
+// Release: ipp V-2024.06-TGT-241219
+
+Memory map for memory 'DM_stack':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 2368
+
+ 0x0007bac0..0x0007c3ff ( 2368 items) : Stack
+
+Memory map for memory 'DMb':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 2613
+
+ 0x00000000..0x0007babf ( 506560 items) : Reserved
+ 0x0007bac0..0x0007c3ff ( 2368 items) : Stack
+ 0x0007c400..0x0007c43f ( 64 items) : Reserved
+ 0x0007c440..0x0007c4bf ( 128 items) : ../Release/0_0_reloadable2.o::reduce_mean_c8_params (Data, Global, .data.DMb.64)
+ 0x0007c4c0..0x0007c4c3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL9curr_iter (Data, Local, .bss.DMb.4)
+ 0x0007c4c4..0x0007c4c7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL10depth_iter (Data, Local, .bss.DMb.4)
+ 0x0007c4c8..0x0007c4cb ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8core_row (Data, Local, .bss.DMb.4)
+ 0x0007c4cc..0x0007c4cf ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4)
+ 0x0007c4d0..0x0007c4d3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11reduce_axis (Data, Local, .bss.DMb.4)
+ 0x0007c4d4..0x0007c4d7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8l3_width (Data, Local, .bss.DMb.4)
+ 0x0007c4d8..0x0007c4db ( 4 items) : ../Release/0_0_reloadable2.o::_ZL9l3_height (Data, Local, .bss.DMb.4)
+ 0x0007c4dc..0x0007c4df ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8l3_depth (Data, Local, .bss.DMb.4)
+ 0x0007c4e0..0x0007c4e3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL10width_iter (Data, Local, .bss.DMb.4)
+ 0x0007c4e4..0x0007c4e7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11height_iter (Data, Local, .bss.DMb.4)
+ 0x0007c4e8..0x0007c4eb ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8num_iter (Data, Local, .data.DMb.4)
+ 0x0007c4ec..0x0007c4ef ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4)
+ 0x0007c4f0..0x0007c4f0 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1)
+ 0x0007c4f4..0x0007c4f7 ( 4 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::float_rounding_mode (Data, Global, .bss.DMb.4)
+ 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable2.o::pad_3d_params (Data, Global, .bss.DMb.64)
+ 0x0007ccc0..0x000fffff ( 537408 items) : Reserved
+
+Memory map for memory 'PM':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 10058
+
+ 0x00000000..0x0000092f ( 2352 items) : Reserved
+ 0x00000930..0x00000a0b ( 220 items) : ../Release/0_0_reloadable2.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00000a10..0x00001043 ( 1588 items) : ../Release/0_0_reloadable2.o::_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64)
+ 0x00001050..0x000012ed ( 670 items) : ../Release/0_0_reloadable2.o::_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t (Function, Weak, .text) (stack frame size = 0)
+ 0x000012f0..0x00001d67 ( 2680 items) : ../Release/0_0_reloadable2.o::_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E (Function, Weak, .text) (stack frame size = 256)
+
+ Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001d70..0x000027e3 ( 2676 items) : ../Release/0_0_reloadable2.o::_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128)
+
+ Called functions : _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ int32_to_float32
+ float32_add
+ _ZN12me_primitive10udiv_dstepEjjRjS0_
+ _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+ _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ _ZN12me_primitive11control_satE
+ _ZN12me_primitive11control_rndE
+ reduce_mean_c8_params
+ _ZL11reduce_axis
+ _ZL11ifm1_offset
+ pad_3d_params
+ _ZL8num_iter
+ _ZL8l3_width
+ _ZL9l3_height
+ _ZL8l3_depth
+ _ZL10depth_iter
+ _ZL10width_iter
+ _ZL11height_iter
+
+ 0x000027f0..0x0000287d ( 142 items) : me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0)
+ 0x00002880..0x000028ef ( 112 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL19propagateFloat32NaNjj (Function, Local, .text) (stack frame size = 0)
+ 0x000028f0..0x00002a77 ( 392 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL19roundAndPackFloat32iij (Function, Local, .text) (stack frame size = 0)
+
+ Referenced symbols: float_rounding_mode
+
+ 0x00002a80..0x00002a97 ( 24 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL28normalizeRoundAndPackFloat32iij (Function, Local, .text) (stack frame size = 0)
+
+ Called functions : _ZL19roundAndPackFloat32iij
+
+ 0x00002aa0..0x00002b11 ( 114 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::int32_to_float32 (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _ZL28normalizeRoundAndPackFloat32iij
+
+ 0x00002b20..0x00002d8f ( 624 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL14addFloat32Sigsjji (Function, Local, .text) (stack frame size = 0)
+
+ Called functions : _ZL19roundAndPackFloat32iij
+ _ZL19propagateFloat32NaNjj
+
+ 0x00002d90..0x0000307f ( 752 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL14subFloat32Sigsjji (Function, Local, .text) (stack frame size = 0)
+
+ Called functions : _ZL28normalizeRoundAndPackFloat32iij
+ _ZL19propagateFloat32NaNjj
+
+ Referenced symbols: float_rounding_mode
+
+ 0x00003080..0x000030bf ( 64 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::float32_add (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _ZL14subFloat32Sigsjji
+ _ZL14addFloat32Sigsjji
+
+
+External symbols:
+
+ __dso_handle = 0x0
+ _ctors_end = 0x0
+ _ctors_start = 0x0
+ _dtors_end = 0x0
+ _dtors_start = 0x0
+ _pc_end = 0x30c0
+ _pc_start = 0x930
+ _sp_end_DM_stack = 0x7c400
+ _sp_start_DM_stack = 0x7bac0
+
+Section summary for memory 'DM_stack':
+
+ .stack File
+ ---------- ----------
+ 2368
+ ---------- ----------
+ 2368 Total
+
+Section summary for memory 'DMb':
+
+ .bss .data File
+ ---------- ---------- ----------
+ 104 132 ../Release/0_0_reloadable2.o
+ 4 0 softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)
+ 5 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ ---------- ---------- ----------
+ 113 132 Total
+
+Section summary for memory 'PM':
+
+ .text File
+ ---------- ----------
+ 7834 ../Release/0_0_reloadable2.o
+ 2082 softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)
+ 142 me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ ---------- ----------
+ 10058 Total
+
+File summary:
+
+../Release/0_0_reloadable2.o
+ DMb 236
+ PM 7834
+
+me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ DMb 5
+
+softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)
+ DMb 4
+ PM 2082
+
+me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ PM 142
+
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.sdr
new file mode 100644
index 0000000000000000000000000000000000000000..efa1bd1f1f0feebb4e1aac96628ff9f168810f9e
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.sdr
@@ -0,0 +1,96 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme
+
+// Release: ipp V-2024.06-TGT-241219
+
+// Symbols in memory 'DM_bankA':
+// Symbols in memory 'DM_bankAB':
+// Symbols in memory 'DM_bankAC':
+// Symbols in memory 'DM_bankAD':
+// Symbols in memory 'DM_bankB':
+// Symbols in memory 'DM_bankBC':
+// Symbols in memory 'DM_bankBD':
+// Symbols in memory 'DM_bankC':
+// Symbols in memory 'DM_bankCD':
+// Symbols in memory 'DM_bankD':
+// Symbols in memory 'DM_stack':
+// Symbols in memory 'DM_test':
+// Symbols in memory 'DMb':
+_symbol reduce_mean_c8_params 0x0007c440
+_symbol _ZN12me_primitive11control_satE 0x0007c4ec
+_symbol _ZN12me_primitive11control_rndE 0x0007c4f0
+_symbol float_rounding_mode 0x0007c4f4
+_symbol pad_3d_params 0x0007c500
+// Symbols in memory 'DMh':
+// Symbols in memory 'DMh_bankA':
+// Symbols in memory 'DMh_bankAB':
+// Symbols in memory 'DMh_bankAC':
+// Symbols in memory 'DMh_bankAD':
+// Symbols in memory 'DMh_bankB':
+// Symbols in memory 'DMh_bankBC':
+// Symbols in memory 'DMh_bankBD':
+// Symbols in memory 'DMh_bankC':
+// Symbols in memory 'DMh_bankCD':
+// Symbols in memory 'DMh_bankD':
+// Symbols in memory 'DMh_stack':
+// Symbols in memory 'DMs':
+// Symbols in memory 'DMs_bankA':
+// Symbols in memory 'DMs_bankAB':
+// Symbols in memory 'DMs_bankAC':
+// Symbols in memory 'DMs_bankAD':
+// Symbols in memory 'DMs_bankB':
+// Symbols in memory 'DMs_bankBC':
+// Symbols in memory 'DMs_bankBD':
+// Symbols in memory 'DMs_bankC':
+// Symbols in memory 'DMs_bankCD':
+// Symbols in memory 'DMs_bankD':
+// Symbols in memory 'DMs_stack':
+// Symbols in memory 'DMv':
+// Symbols in memory 'DMv_bankA':
+// Symbols in memory 'DMv_bankAB':
+// Symbols in memory 'DMv_bankAC':
+// Symbols in memory 'DMv_bankAD':
+// Symbols in memory 'DMv_bankB':
+// Symbols in memory 'DMv_bankBC':
+// Symbols in memory 'DMv_bankBD':
+// Symbols in memory 'DMv_bankC':
+// Symbols in memory 'DMv_bankCD':
+// Symbols in memory 'DMv_bankD':
+// Symbols in memory 'DMv_stack':
+// Symbols in memory 'DMw':
+// Symbols in memory 'DMw_bankA':
+// Symbols in memory 'DMw_bankAB':
+// Symbols in memory 'DMw_bankAC':
+// Symbols in memory 'DMw_bankAD':
+// Symbols in memory 'DMw_bankB':
+// Symbols in memory 'DMw_bankBC':
+// Symbols in memory 'DMw_bankBD':
+// Symbols in memory 'DMw_bankC':
+// Symbols in memory 'DMw_bankCD':
+// Symbols in memory 'DMw_bankD':
+// Symbols in memory 'DMw_stack':
+// Symbols in memory 'DMx':
+// Symbols in memory 'DMx_bankA':
+// Symbols in memory 'DMx_bankAB':
+// Symbols in memory 'DMx_bankAC':
+// Symbols in memory 'DMx_bankAD':
+// Symbols in memory 'DMx_bankB':
+// Symbols in memory 'DMx_bankBC':
+// Symbols in memory 'DMx_bankBD':
+// Symbols in memory 'DMx_bankC':
+// Symbols in memory 'DMx_bankCD':
+// Symbols in memory 'DMx_bankD':
+// Symbols in memory 'DMx_stack':
+// Symbols in memory 'PM':
+_symbol _Z13kernelWrapperPPvjjjj 0x00000930
+_symbol _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv 0x00000a10
+_symbol _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t 0x00001050
+_symbol _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E 0x000012f0
+_symbol _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001d70
+_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x000027f0
+_symbol int32_to_float32 0x00002aa0
+_symbol float32_add 0x00003080
+// Symbols in memory 'PMw':
+// Symbols in memory 'TM4':
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.srv
new file mode 100644
index 0000000000000000000000000000000000000000..d037f49ea23915d17f1d140dbcf225735acc1af1
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.srv
@@ -0,0 +1,14427 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:41 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me
+
+// Release: ipp V-2024.06-TGT-241219
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function kernelWrapper _Z13kernelWrapperPPvjjjj
+.src_ref 0 "0_0_reloadable2.cc" 29 first
+.src_ref 0 "0_0_reloadable2.cc" 31 60 first
+.function_start
+ 2352 "11010100" // LDA r16, [p0]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2353 "01000001" // /* MW 5 */
+ 2354 "00101111" // /* MW 4 */
+ 2355 "11010000" // /* MW 3 */
+ 2356 "11000010" // /* MW 2 */
+ 2357 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 29
+ 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2359 "00000001" // /* MW 5 */
+ 2360 "00000000" // /* MW 4 */
+ 2361 "00000000" // /* MW 3 */
+ 2362 "00001000" // /* MW 2 */
+ 2363 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 31 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2364 "00000010" // ST p7, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2365 "01110000" // /* MW 7 */
+ 2366 "01010000" // /* MW 6 */
+ 2367 "11101000" // /* MW 5 */
+ 2368 "00000001" // /* MW 4 */
+ 2369 "10110000" // /* MW 3 */
+ 2370 "01110011" // /* MW 2 */
+ 2371 "11111111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 17 79
+.src_ref 0 "0_0_reloadable2.cc" 31 110 first
+ 2372 "00111010" // ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2373 "01111001" // /* MW 9 */
+ 2374 "01100000" // /* MW 8 */
+ 2375 "10110000" // /* MW 7 */
+ 2376 "10000011" // /* MW 6 */
+ 2377 "10100111" // /* MW 5 */
+ 2378 "00011111" // /* MW 4 */
+ 2379 "10110000" // /* MW 3 */
+ 2380 "10000010" // /* MW 2 */
+ 2381 "11111111" // /* MW 1 */
+ 2382 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2383 "00111101" // /* MW 3 */
+ 2384 "11110100" // /* MW 2 */
+ 2385 "00001111" // /* MW 1 */
+ 2386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2387 "00000000" // /* MW 1 */
+ 2388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2389 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2390 "00011000" // ADD.NC p0, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2391 "00000010" // /* MW 3 */
+ 2392 "01101000" // /* MW 2 */
+ 2393 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2394 "10011000" // LDA r16, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2395 "00010110" // /* MW 3 */
+ 2396 "00011110" // /* MW 2 */
+ 2397 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2398 "10011000" // LDA r18, [p0], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2399 "01010110" // /* MW 3 */
+ 2400 "00111110" // /* MW 2 */
+ 2401 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2402 "10011000" // LDA r17, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2403 "00110110" // /* MW 3 */
+ 2404 "11101110" // /* MW 2 */
+ 2405 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2406 "10011000" // LDA r27, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2407 "01110110" // /* MW 3 */
+ 2408 "00000111" // /* MW 2 */
+ 2409 "00000000" // /* MW 1 */
+ 2410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2411 "00000000" // /* MW 1 */
+ 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2413 "00000000" // /* MW 1 */
+ 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2415 "00000000" // /* MW 1 */
+ 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2417 "00000000" // /* MW 1 */
+ 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2419 "00000000" // /* MW 1 */
+ 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2421 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2422 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2423 "00100010" // /* MW 3 */
+ 2424 "00100001" // /* MW 2 */
+ 2425 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2426 "10011000" // ST r16, [p0, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2427 "00010001" // /* MW 3 */
+ 2428 "11010110" // /* MW 2 */
+ 2429 "00001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2430 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2431 "11111101" // /* MW 3 */
+ 2432 "11100000" // /* MW 2 */
+ 2433 "00010111" // /* MW 1 */
+ 2434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2435 "00000000" // /* MW 1 */
+ 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2437 "00000000" // /* MW 1 */
+ 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2439 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2440 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2441 "00001000" // /* MW 3 */
+ 2442 "01010111" // /* MW 2 */
+ 2443 "00010100" // /* MW 1 */
+ 2444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2445 "00000000" // /* MW 1 */
+ 2446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2447 "00000000" // /* MW 1 */
+ 2448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2449 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 17 79 first
+ 2450 "10011000" // LDA p0, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2451 "00011110" // /* MW 3 */
+ 2452 "00101100" // /* MW 2 */
+ 2453 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 18 47 first
+ 2454 "10011000" // LDA p1, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2455 "10011110" // /* MW 3 */
+ 2456 "11111100" // /* MW 2 */
+ 2457 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 19 81 first
+ 2458 "10011000" // LDA p2, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2459 "00011110" // /* MW 3 */
+ 2460 "00000101" // /* MW 2 */
+ 2461 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 16 4 first
+.no_stack_arguments
+ 2462 "00000100" // JL #7536 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7536 delay_slots=5 */
+ 2463 "00000001" // /* MW 5 */
+ 2464 "00000000" // /* MW 4 */
+ 2465 "10111000" // /* MW 3 */
+ 2466 "00001110" // /* MW 2 */
+ 2467 "00000000" // /* MW 1 */
+.delay_slot
+ 2468 "10011000" // ST r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2469 "01010101" // /* MW 3 */
+ 2470 "11110011" // /* MW 2 */
+ 2471 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2475 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2479 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 38 60 first
+.return_address
+ 2480 "10011000" // LDA r16, [p7, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2481 "00010110" // /* MW 3 */
+ 2482 "11110110" // /* MW 2 */
+ 2483 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2484 "00011000" // LDA r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2485 "01010001" // /* MW 3 */
+ 2486 "11110011" // /* MW 2 */
+ 2487 "00000111" // /* MW 1 */
+ 2488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2489 "00000000" // /* MW 1 */
+ 2490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2491 "00000000" // /* MW 1 */
+ 2492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2493 "00000000" // /* MW 1 */
+ 2494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2495 "00000000" // /* MW 1 */
+ 2496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2497 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+ 2498 "00011000" // ADD.NC p0, r16, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2499 "00001000" // /* MW 3 */
+ 2500 "01101000" // /* MW 2 */
+ 2501 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+ 2502 "10011000" // LDA r16, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2503 "00010110" // /* MW 3 */
+ 2504 "00000110" // /* MW 2 */
+ 2505 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2506 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2507 "00000101" // /* MW 3 */
+ 2508 "00100010" // /* MW 2 */
+ 2509 "00010000" // /* MW 1 */
+ 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2511 "00000000" // /* MW 1 */
+ 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2513 "00000000" // /* MW 1 */
+ 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2515 "00000000" // /* MW 1 */
+ 2516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2517 "00000000" // /* MW 1 */
+ 2518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2519 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+ 2520 "00011000" // REL.COND r16, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2521 "00011000" // /* MW 3 */
+ 2522 "00010101" // /* MW 2 */
+ 2523 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2524 "11010100" // LDA lr, [sp, #-12]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2525 "01000001" // /* MW 5 */
+ 2526 "10101111" // /* MW 4 */
+ 2527 "00101101" // /* MW 3 */
+ 2528 "10000111" // /* MW 2 */
+ 2529 "11111110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2530 "10011000" // LDA r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2531 "00010110" // /* MW 3 */
+ 2532 "11110110" // /* MW 2 */
+ 2533 "00000000" // /* MW 1 */
+ 2534 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2535 "10011001" // /* MW 3 */
+ 2536 "11111011" // /* MW 2 */
+ 2537 "00000111" // /* MW 1 */
+ 2538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2539 "00000000" // /* MW 1 */
+ 2540 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2541 "11110001" // /* MW 3 */
+ 2542 "11111101" // /* MW 2 */
+ 2543 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41 first
+ 2544 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2545 "00000001" // /* MW 5 */
+ 2546 "00000000" // /* MW 4 */
+ 2547 "00000000" // /* MW 3 */
+ 2548 "11111000" // /* MW 2 */
+ 2549 "11111111" // /* MW 1 */
+ 2550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2551 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41
+ 2552 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 2553 "00000000" // /* MW 3 */
+ 2554 "00101000" // /* MW 2 */
+ 2555 "00010000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+.delay_slot
+ 2556 "10011000" // SUB r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2557 "00000001" // /* MW 3 */
+ 2558 "01100011" // /* MW 2 */
+ 2559 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2560 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2561 "00010010" // /* MW 3 */
+ 2562 "00100001" // /* MW 2 */
+ 2563 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 22
+.delay_slot
+ 2564 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2565 "00010001" // /* MW 3 */
+ 2566 "11110110" // /* MW 2 */
+ 2567 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+ 2571 "00000000" // /* MW 1 */
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.function setup _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.src_ref 2 "reduce_base_c8.h" 218 first
+.src_ref 2 "reduce_base_c8.h" 220 27 first
+.src_ref 2 "reduce_base_c8.h" 290 63
+.src_ref 2 "reduce_base_c8.h" 348 46
+.function_start
+ 2576 "01110110" // LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2577 "01111000" // /* MW 11 */
+ 2578 "01100000" // /* MW 10 */
+ 2579 "00001001" // /* MW 9 */
+ 2580 "01101000" // /* MW 8 */
+ 2581 "01100111" // /* MW 7 */
+ 2582 "00111110" // /* MW 6 */
+ 2583 "10001011" // /* MW 5 */
+ 2584 "10000000" // /* MW 4 */
+ 2585 "11010011" // /* MW 3 */
+ 2586 "10001110" // /* MW 2 */
+ 2587 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 287 40
+.src_ref 2 "reduce_base_c8.h" 287 40
+.src_ref 2 "reduce_base_c8.h" 348 46 first
+ 2588 "10111010" // MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2589 "00001000" // /* MW 9 */
+ 2590 "00000111" // /* MW 8 */
+ 2591 "00110000" // /* MW 7 */
+ 2592 "00001001" // /* MW 6 */
+ 2593 "00100101" // /* MW 5 */
+ 2594 "00111110" // /* MW 4 */
+ 2595 "00000000" // /* MW 3 */
+ 2596 "00000111" // /* MW 2 */
+ 2597 "00000010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 293 77
+.src_ref 2 "reduce_base_c8.h" 298 44
+.src_ref 2 "reduce_base_c8.h" 299 40
+.src_ref 2 "reduce_base_c8.h" 300 59
+.src_ref 2 "reduce_base_c8.h" 326 79
+ 2598 "10111010" // MOVA r30, #3; MOVX r1, #-3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2599 "01111000" // /* MW 9 */
+ 2600 "01100000" // /* MW 8 */
+ 2601 "00001000" // /* MW 7 */
+ 2602 "10101000" // /* MW 6 */
+ 2603 "00010111" // /* MW 5 */
+ 2604 "00111110" // /* MW 4 */
+ 2605 "00000000" // /* MW 3 */
+ 2606 "01111110" // /* MW 2 */
+ 2607 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 57
+.src_ref 2 "reduce_base_c8.h" 301 81
+.src_ref 2 "reduce_base_c8.h" 305 77
+ 2608 "10111010" // MOVA r5, #-1; MOVXM r4, #65528 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2609 "00010000" // /* MW 9 */
+ 2610 "11111100" // /* MW 8 */
+ 2611 "10001111" // /* MW 7 */
+ 2612 "00111100" // /* MW 6 */
+ 2613 "00000000" // /* MW 5 */
+ 2614 "00000000" // /* MW 4 */
+ 2615 "00000000" // /* MW 3 */
+ 2616 "11100101" // /* MW 2 */
+ 2617 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 218
+.src_ref 2 "reduce_base_c8.h" 280 76
+.src_ref 2 "reduce_base_c8.h" 312 98
+ 2618 "10111010" // MOVA r16, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2619 "01110000" // /* MW 9 */
+ 2620 "00000000" // /* MW 8 */
+ 2621 "00000000" // /* MW 7 */
+ 2622 "00000000" // /* MW 6 */
+ 2623 "00000010" // /* MW 5 */
+ 2624 "00000000" // /* MW 4 */
+ 2625 "00000000" // /* MW 3 */
+ 2626 "10010000" // /* MW 2 */
+ 2627 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 298 44 first
+ 2628 "00011000" // ADD.NC p4, r0, #46 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2629 "00010111" // /* MW 3 */
+ 2630 "01100000" // /* MW 2 */
+ 2631 "00011100" // /* MW 1 */
+ 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2633 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 220 25 first
+ 2634 "10011000" // ST r3, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2635 "01110001" // /* MW 3 */
+ 2636 "00011100" // /* MW 2 */
+ 2637 "00001000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 221 28 first
+ 2638 "10011000" // LDA r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2639 "01010110" // /* MW 3 */
+ 2640 "00011111" // /* MW 2 */
+ 2641 "00000001" // /* MW 1 */
+ 2642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2643 "00000000" // /* MW 1 */
+ 2644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2645 "00000000" // /* MW 1 */
+ 2646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2647 "00000000" // /* MW 1 */
+ 2648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2649 "00000000" // /* MW 1 */
+ 2650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2651 "00000000" // /* MW 1 */
+ 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2653 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 221 26
+.src_ref 2 "reduce_base_c8.h" 301 81 first
+ 2654 "01011100" // ST r26, [p0], #4; AND r17, r26, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2655 "10001001" // /* MW 5 */
+ 2656 "01000100" // /* MW 4 */
+ 2657 "00111101" // /* MW 3 */
+ 2658 "11101010" // /* MW 2 */
+ 2659 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 222 26 first
+.src_ref 2 "reduce_base_c8.h" 293 58 first
+.src_ref 2 "reduce_base_c8.h" 301 81
+ 2660 "10111010" // LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2661 "10101000" // /* MW 9 */
+ 2662 "01001000" // /* MW 8 */
+ 2663 "11001100" // /* MW 7 */
+ 2664 "01111110" // /* MW 6 */
+ 2665 "01001101" // /* MW 5 */
+ 2666 "00000110" // /* MW 4 */
+ 2667 "11010000" // /* MW 3 */
+ 2668 "11110110" // /* MW 2 */
+ 2669 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 63 first
+ 2670 "10011000" // LSHL r18, r26, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2671 "01101101" // /* MW 3 */
+ 2672 "10100100" // /* MW 2 */
+ 2673 "00010110" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 293 77 first
+ 2674 "10011000" // LSHL r6, r4, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2675 "00011101" // /* MW 3 */
+ 2676 "00001100" // /* MW 2 */
+ 2677 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 41 first
+.src_ref 2 "reduce_base_c8.h" 300 59 first
+ 2678 "00100100" // LSHL r17, r26, r1; ADD.NC r1, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2679 "11111111" // /* MW 5 */
+ 2680 "10110010" // /* MW 4 */
+ 2681 "10110000" // /* MW 3 */
+ 2682 "01000011" // /* MW 2 */
+ 2683 "11010100" // /* MW 1 */
+ 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2685 "00000000" // /* MW 1 */
+ 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2687 "00000000" // /* MW 1 */
+ 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2689 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 222 24 first
+.src_ref 2 "reduce_base_c8.h" 287 40 first
+ 2690 "01011100" // ST r29, [p0], #4; MAC r7, r7, r29, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2691 "01001100" // /* MW 5 */
+ 2692 "10011100" // /* MW 4 */
+ 2693 "00111110" // /* MW 3 */
+ 2694 "11110110" // /* MW 2 */
+ 2695 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 223 29 first
+.src_ref 2 "reduce_base_c8.h" 312 60 first
+ 2696 "11111010" // LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2697 "10101111" // /* MW 9 */
+ 2698 "01001001" // /* MW 8 */
+ 2699 "00000111" // /* MW 7 */
+ 2700 "10000000" // /* MW 6 */
+ 2701 "10110101" // /* MW 5 */
+ 2702 "11111111" // /* MW 4 */
+ 2703 "11010111" // /* MW 3 */
+ 2704 "10001010" // /* MW 2 */
+ 2705 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 305 57 first
+ 2706 "10011000" // MUL r20, r3, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2707 "11011111" // /* MW 3 */
+ 2708 "11101001" // /* MW 2 */
+ 2709 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 78 first
+ 2710 "10011000" // MUL r28, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2711 "01001111" // /* MW 3 */
+ 2712 "11111000" // /* MW 2 */
+ 2713 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 299 40 first
+ 2714 "10011000" // LSHL r21, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2715 "11101101" // /* MW 3 */
+ 2716 "01101011" // /* MW 2 */
+ 2717 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 57 first
+.src_ref 2 "reduce_base_c8.h" 299 40
+ 2718 "00100100" // LSHL r18, r29, r5; ADD.NC r27, r21, #-48 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2719 "11010000" // /* MW 5 */
+ 2720 "10110101" // /* MW 4 */
+ 2721 "10111101" // /* MW 3 */
+ 2722 "10001011" // /* MW 2 */
+ 2723 "11101100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 41
+ 2724 "00011000" // ADD r23, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2725 "11111111" // /* MW 3 */
+ 2726 "10101111" // /* MW 2 */
+ 2727 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 301 85 first
+ 2728 "10011000" // MUL r29, r29, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2729 "01101111" // /* MW 3 */
+ 2730 "01111011" // /* MW 2 */
+ 2731 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 223 27 first
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2732 "01011100" // ST r2, [p0], #4; LT r24, r30, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2733 "01010101" // /* MW 5 */
+ 2734 "01100000" // /* MW 4 */
+ 2735 "00111111" // /* MW 3 */
+ 2736 "10001010" // /* MW 2 */
+ 2737 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 224 33 first
+ 2738 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2739 "00101110" // /* MW 3 */
+ 2740 "00011100" // /* MW 2 */
+ 2741 "00000001" // /* MW 1 */
+ 2742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2743 "00000000" // /* MW 1 */
+ 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2745 "00000000" // /* MW 1 */
+ 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2747 "00000000" // /* MW 1 */
+ 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2749 "00000000" // /* MW 1 */
+ 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2751 "00000000" // /* MW 1 */
+ 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2753 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 224 31
+.src_ref 2 "reduce_base_c8.h" 318 64
+ 2754 "00000010" // ST el0, [p0], #4; MOV r31, el0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2755 "01110000" // /* MW 7 */
+ 2756 "00001110" // /* MW 6 */
+ 2757 "11110000" // /* MW 5 */
+ 2758 "00000011" // /* MW 4 */
+ 2759 "00110000" // /* MW 3 */
+ 2760 "10000101" // /* MW 2 */
+ 2761 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 225 34 first
+ 2762 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2763 "00001110" // /* MW 3 */
+ 2764 "00000100" // /* MW 2 */
+ 2765 "00000001" // /* MW 1 */
+ 2766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2767 "00000000" // /* MW 1 */
+ 2768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2769 "00000000" // /* MW 1 */
+ 2770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2771 "00000000" // /* MW 1 */
+ 2772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2773 "00000000" // /* MW 1 */
+ 2774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2775 "00000000" // /* MW 1 */
+ 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2777 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 225 32
+.src_ref 2 "reduce_base_c8.h" 318 64
+ 2778 "00000010" // ST eh0, [p0]; MOV r25, eh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2779 "01110000" // /* MW 7 */
+ 2780 "10001110" // /* MW 6 */
+ 2781 "00110000" // /* MW 5 */
+ 2782 "00000011" // /* MW 4 */
+ 2783 "00110000" // /* MW 3 */
+ 2784 "10000001" // /* MW 2 */
+ 2785 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 226 32 first
+ 2786 "10011000" // LDA r30, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2787 "11010110" // /* MW 3 */
+ 2788 "00010111" // /* MW 2 */
+ 2789 "00000001" // /* MW 1 */
+ 2790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2791 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2792 "10000100" // JNZ r24, #2912 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=2912 delay_slots=5 */
+ 2793 "00000001" // /* MW 5 */
+ 2794 "01000000" // /* MW 4 */
+ 2795 "10110000" // /* MW 3 */
+ 2796 "00000101" // /* MW 2 */
+ 2797 "11000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 98 first
+.delay_slot
+ 2798 "10011000" // LSHL r19, r28, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2799 "00001101" // /* MW 3 */
+ 2800 "00100111" // /* MW 2 */
+ 2801 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 318 64 first
+.delay_slot
+ 2802 "10011000" // MUL r25, r31, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2803 "10011111" // /* MW 3 */
+ 2804 "11110011" // /* MW 2 */
+ 2805 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 300 41 first
+.src_ref 2 "reduce_base_c8.h" 305 77 first
+.delay_slot
+ 2806 "00100100" // LSHL r20, r20, r5; ADD.NC r5, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2807 "11111111" // /* MW 5 */
+ 2808 "10110001" // /* MW 4 */
+ 2809 "10110010" // /* MW 3 */
+ 2810 "00001011" // /* MW 2 */
+ 2811 "10100101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 280 76 first
+.delay_slot
+ 2812 "10011000" // LSHL r16, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2813 "00001101" // /* MW 3 */
+ 2814 "00100001" // /* MW 2 */
+ 2815 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 226 30 first
+.src_ref 2 "reduce_base_c8.h" 318 88 first
+.delay_slot
+ 2816 "01011100" // ST r30, [p0, #4]; MUL r31, r25, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2817 "11011111" // /* MW 5 */
+ 2818 "11111111" // /* MW 4 */
+ 2819 "00111100" // /* MW 3 */
+ 2820 "11111010" // /* MW 2 */
+ 2821 "00000010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2822 "00011000" // MOVX r28, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2823 "00000101" // /* MW 3 */
+ 2824 "00111000" // /* MW 2 */
+ 2825 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2826 "10011000" // EQ r28, r2, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2827 "11000111" // /* MW 3 */
+ 2828 "10111001" // /* MW 2 */
+ 2829 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2830 "10000100" // JNZ r28, #4032 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4032 delay_slots=5 */
+ 2831 "00000001" // /* MW 5 */
+ 2832 "01000000" // /* MW 4 */
+ 2833 "11100000" // /* MW 3 */
+ 2834 "00000111" // /* MW 2 */
+ 2835 "11100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2837 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2839 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2841 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2843 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 242 41 first
+.delay_slot
+ 2844 "00011000" // ADD r22, r3, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2845 "11111111" // /* MW 3 */
+ 2846 "11101101" // /* MW 2 */
+ 2847 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2848 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2849 "00001001" // /* MW 3 */
+ 2850 "00100010" // /* MW 2 */
+ 2851 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2852 "10011000" // EQ r17, r17, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2853 "00100111" // /* MW 3 */
+ 2854 "01100010" // /* MW 2 */
+ 2855 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2856 "10000100" // JNZ r17, #3904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3904 delay_slots=5 */
+ 2857 "00000001" // /* MW 5 */
+ 2858 "01000000" // /* MW 4 */
+ 2859 "10100000" // /* MW 3 */
+ 2860 "00000111" // /* MW 2 */
+ 2861 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2863 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2865 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2869 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.delay_slot
+ 2870 "00011000" // MOVX r7, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2871 "00001101" // /* MW 3 */
+ 2872 "00001110" // /* MW 2 */
+ 2873 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2874 "10011000" // EQ r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2875 "00100111" // /* MW 3 */
+ 2876 "11000100" // /* MW 2 */
+ 2877 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2878 "10000100" // JNZ r2, #3744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3744 delay_slots=5 */
+ 2879 "00000001" // /* MW 5 */
+ 2880 "01000000" // /* MW 4 */
+ 2881 "01010000" // /* MW 3 */
+ 2882 "00000111" // /* MW 2 */
+ 2883 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2885 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2887 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2889 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2891 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2893 "00000000" // /* MW 1 */
+ 2894 "10000100" // J #3552 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3552 delay_slots=5 */
+ 2895 "00000000" // /* MW 5 */
+ 2896 "00000000" // /* MW 4 */
+ 2897 "11110000" // /* MW 3 */
+ 2898 "00000110" // /* MW 2 */
+ 2899 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 2900 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2901 "00010001" // /* MW 3 */
+ 2902 "00110100" // /* MW 2 */
+ 2903 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2905 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2907 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2909 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2911 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2912 "00011000" // MOVX r29, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2913 "00010101" // /* MW 3 */
+ 2914 "00111010" // /* MW 2 */
+ 2915 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2916 "10011000" // LT r24, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2917 "00101010" // /* MW 3 */
+ 2918 "01110000" // /* MW 2 */
+ 2919 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2920 "10000100" // JNZ r24, #3232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3232 delay_slots=5 */
+ 2921 "00000001" // /* MW 5 */
+ 2922 "01000000" // /* MW 4 */
+ 2923 "01010000" // /* MW 3 */
+ 2924 "00000110" // /* MW 2 */
+ 2925 "11000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2933 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 316 38
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 2934 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2935 "00010001" // /* MW 3 */
+ 2936 "00110100" // /* MW 2 */
+ 2937 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2938 "10011000" // EQ r17, r26, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2939 "00100111" // /* MW 3 */
+ 2940 "10100010" // /* MW 2 */
+ 2941 "00010110" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2942 "10000100" // JNZ r17, #3104 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3104 delay_slots=5 */
+ 2943 "00000001" // /* MW 5 */
+ 2944 "01000000" // /* MW 4 */
+ 2945 "00010000" // /* MW 3 */
+ 2946 "00000110" // /* MW 2 */
+ 2947 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2949 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2951 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2953 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2957 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2958 "10011000" // NE r2, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2959 "00101000" // /* MW 3 */
+ 2960 "01000100" // /* MW 2 */
+ 2961 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2962 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */
+ 2963 "00000001" // /* MW 5 */
+ 2964 "01000000" // /* MW 4 */
+ 2965 "11110000" // /* MW 3 */
+ 2966 "00000110" // /* MW 2 */
+ 2967 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2969 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2971 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2973 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2975 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2977 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 286 44 first
+.src_ref 2 "reduce_base_c8.h" 289 38
+.src_ref 2 "reduce_base_c8.h" 291 40
+.src_ref 2 "reduce_base_c8.h" 291 40
+ 2978 "10111010" // ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2979 "01011000" // /* MW 9 */
+ 2980 "11101100" // /* MW 8 */
+ 2981 "00000111" // /* MW 7 */
+ 2982 "00001000" // /* MW 6 */
+ 2983 "00100010" // /* MW 5 */
+ 2984 "00000000" // /* MW 4 */
+ 2985 "11100000" // /* MW 3 */
+ 2986 "11010110" // /* MW 2 */
+ 2987 "10000011" // /* MW 1 */
+ 2988 "11111000" // MOV r30, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2989 "10100000" // /* MW 3 */
+ 2990 "10011100" // /* MW 2 */
+ 2991 "00011111" // /* MW 1 */
+ 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2993 "00000000" // /* MW 1 */
+ 2994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2995 "00000000" // /* MW 1 */
+ 2996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2997 "00000000" // /* MW 1 */
+ 2998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2999 "00000000" // /* MW 1 */
+ 3000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3001 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 287 38 first
+ 3002 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3003 "11110111" // /* MW 3 */
+ 3004 "00011100" // /* MW 2 */
+ 3005 "00000100" // /* MW 1 */
+ 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3007 "00000000" // /* MW 1 */
+ 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3009 "00000000" // /* MW 1 */
+ 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3011 "00000000" // /* MW 1 */
+ 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3013 "00000000" // /* MW 1 */
+ 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3015 "00000000" // /* MW 1 */
+ 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3017 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 288 39 first
+ 3018 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3019 "11110111" // /* MW 3 */
+ 3020 "00011110" // /* MW 2 */
+ 3021 "00000100" // /* MW 1 */
+ 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3023 "00000000" // /* MW 1 */
+ 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3025 "00000000" // /* MW 1 */
+ 3026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3027 "00000000" // /* MW 1 */
+ 3028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3029 "00000000" // /* MW 1 */
+ 3030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3031 "00000000" // /* MW 1 */
+ 3032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3033 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 289 38 first
+ 3034 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3035 "01010111" // /* MW 3 */
+ 3036 "00011100" // /* MW 2 */
+ 3037 "00000100" // /* MW 1 */
+ 3038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3039 "00000000" // /* MW 1 */
+ 3040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3041 "00000000" // /* MW 1 */
+ 3042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3043 "00000000" // /* MW 1 */
+ 3044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3045 "00000000" // /* MW 1 */
+ 3046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3047 "00000000" // /* MW 1 */
+ 3048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3049 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 39 first
+ 3050 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3051 "00110111" // /* MW 3 */
+ 3052 "00011100" // /* MW 2 */
+ 3053 "00000100" // /* MW 1 */
+ 3054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3055 "00000000" // /* MW 1 */
+ 3056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3057 "00000000" // /* MW 1 */
+ 3058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3059 "00000000" // /* MW 1 */
+ 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3061 "00000000" // /* MW 1 */
+ 3062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3063 "00000000" // /* MW 1 */
+ 3064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3065 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 291 40 first
+ 3066 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3067 "01010111" // /* MW 3 */
+ 3068 "00001000" // /* MW 2 */
+ 3069 "00000100" // /* MW 1 */
+ 3070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3071 "00000000" // /* MW 1 */
+ 3072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3073 "00000000" // /* MW 1 */
+ 3074 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3075 "00000000" // /* MW 5 */
+ 3076 "00000000" // /* MW 4 */
+ 3077 "11101000" // /* MW 3 */
+ 3078 "00000110" // /* MW 2 */
+ 3079 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3081 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3083 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3085 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 292 38 first
+.delay_slot
+ 3086 "10011000" // ST r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3087 "01010001" // /* MW 3 */
+ 3088 "00000110" // /* MW 2 */
+ 3089 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 293 38 first
+.delay_slot
+ 3090 "00101110" // NOPA; ST r6, [p4, #4]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 3091 "00011100" // /* MW 13 */
+ 3092 "00000000" // /* MW 12 */
+ 3093 "00000000" // /* MW 11 */
+ 3094 "01010111" // /* MW 10 */
+ 3095 "00011010" // /* MW 9 */
+ 3096 "01000000" // /* MW 8 */
+ 3097 "00000000" // /* MW 7 */
+ 3098 "00000000" // /* MW 6 */
+ 3099 "10100011" // /* MW 5 */
+ 3100 "00101001" // /* MW 4 */
+ 3101 "11111000" // /* MW 3 */
+ 3102 "00101100" // /* MW 2 */
+ 3103 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528
+.src_ref 2 "reduce_base_c8.h" 274 44 first
+.src_ref 2 "reduce_base_c8.h" 275 40
+.src_ref 2 "reduce_base_c8.h" 275 40
+ 3104 "10111010" // ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3105 "01011000" // /* MW 9 */
+ 3106 "00010000" // /* MW 8 */
+ 3107 "01001000" // /* MW 7 */
+ 3108 "10101000" // /* MW 6 */
+ 3109 "01100111" // /* MW 5 */
+ 3110 "00111110" // /* MW 4 */
+ 3111 "11100000" // /* MW 3 */
+ 3112 "10010010" // /* MW 2 */
+ 3113 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 40 first
+.src_ref 2 "reduce_base_c8.h" 279 40
+ 3114 "10111010" // MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3115 "01111000" // /* MW 9 */
+ 3116 "00001110" // /* MW 8 */
+ 3117 "11010000" // /* MW 7 */
+ 3118 "00110011" // /* MW 6 */
+ 3119 "00100010" // /* MW 5 */
+ 3120 "00001100" // /* MW 4 */
+ 3121 "10000000" // /* MW 3 */
+ 3122 "10000000" // /* MW 2 */
+ 3123 "11111101" // /* MW 1 */
+ 3124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3125 "00000000" // /* MW 1 */
+ 3126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3127 "00000000" // /* MW 1 */
+ 3128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3129 "00000000" // /* MW 1 */
+ 3130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3131 "00000000" // /* MW 1 */
+ 3132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3133 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 38
+ 3134 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3135 "01010111" // /* MW 3 */
+ 3136 "00011100" // /* MW 2 */
+ 3137 "00000100" // /* MW 1 */
+ 3138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3139 "00000000" // /* MW 1 */
+ 3140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3141 "00000000" // /* MW 1 */
+ 3142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3143 "00000000" // /* MW 1 */
+ 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3145 "00000000" // /* MW 1 */
+ 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3147 "00000000" // /* MW 1 */
+ 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3149 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 39 first
+ 3150 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3151 "11110111" // /* MW 3 */
+ 3152 "00011110" // /* MW 2 */
+ 3153 "00000100" // /* MW 1 */
+ 3154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3155 "00000000" // /* MW 1 */
+ 3156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3157 "00000000" // /* MW 1 */
+ 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3159 "00000000" // /* MW 1 */
+ 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3161 "00000000" // /* MW 1 */
+ 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3163 "00000000" // /* MW 1 */
+ 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3165 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 38 first
+.src_ref 2 "reduce_base_c8.h" 277 38 first
+ 3166 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3167 "01010111" // /* MW 3 */
+ 3168 "00011100" // /* MW 2 */
+ 3169 "00000100" // /* MW 1 */
+ 3170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3171 "00000000" // /* MW 1 */
+ 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3173 "00000000" // /* MW 1 */
+ 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3175 "00000000" // /* MW 1 */
+ 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3177 "00000000" // /* MW 1 */
+ 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3179 "00000000" // /* MW 1 */
+ 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3181 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 278 39 first
+ 3182 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3183 "10110111" // /* MW 3 */
+ 3184 "00011100" // /* MW 2 */
+ 3185 "00000100" // /* MW 1 */
+ 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3187 "00000000" // /* MW 1 */
+ 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3189 "00000000" // /* MW 1 */
+ 3190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3191 "00000000" // /* MW 1 */
+ 3192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3193 "00000000" // /* MW 1 */
+ 3194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3195 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3197 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 279 40 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3198 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3199 "00110111" // /* MW 3 */
+ 3200 "00001000" // /* MW 2 */
+ 3201 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3203 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3205 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3206 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3207 "00000000" // /* MW 5 */
+ 3208 "00000000" // /* MW 4 */
+ 3209 "11101000" // /* MW 3 */
+ 3210 "00000110" // /* MW 2 */
+ 3211 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 279 40
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3212 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3213 "01000001" // /* MW 3 */
+ 3214 "00000010" // /* MW 2 */
+ 3215 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3217 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3219 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 281 38 first
+.delay_slot
+ 3220 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3221 "01110001" // /* MW 3 */
+ 3222 "00010100" // /* MW 2 */
+ 3223 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 280 38 first
+.delay_slot
+ 3224 "00000010" // ST r16, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3225 "01110000" // /* MW 7 */
+ 3226 "10100101" // /* MW 6 */
+ 3227 "00000001" // /* MW 5 */
+ 3228 "00000000" // /* MW 4 */
+ 3229 "00110000" // /* MW 3 */
+ 3230 "11000010" // /* MW 2 */
+ 3231 "10000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 301 40
+.src_ref 2 "reduce_base_c8.h" 302 76
+ 3232 "00101100" // LDA r3, [sp, #-4]; MOVX r4, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3233 "00110010" // /* MW 5 */
+ 3234 "00010000" // /* MW 4 */
+ 3235 "00100000" // /* MW 3 */
+ 3236 "10001110" // /* MW 2 */
+ 3237 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 3238 "10011000" // EQ r4, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3239 "01000111" // /* MW 3 */
+ 3240 "10001000" // /* MW 2 */
+ 3241 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3242 "10000100" // JNZ r4, #3408 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3408 delay_slots=5 */
+ 3243 "00000001" // /* MW 5 */
+ 3244 "01000000" // /* MW 4 */
+ 3245 "10101000" // /* MW 3 */
+ 3246 "00000110" // /* MW 2 */
+ 3247 "00100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 298 44
+.src_ref 2 "reduce_base_c8.h" 303 40
+.src_ref 2 "reduce_base_c8.h" 310 44
+.src_ref 2 "reduce_base_c8.h" 311 38
+.delay_slot
+ 3248 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3249 "01000001" // /* MW 3 */
+ 3250 "00000010" // /* MW 2 */
+ 3251 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3255 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3257 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3259 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3260 "00011000" // MOVX r3, #7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3261 "00011101" // /* MW 3 */
+ 3262 "00000110" // /* MW 2 */
+ 3263 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3264 "10011000" // NE r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3265 "00101000" // /* MW 3 */
+ 3266 "11000100" // /* MW 2 */
+ 3267 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3268 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */
+ 3269 "00000001" // /* MW 5 */
+ 3270 "01000000" // /* MW 4 */
+ 3271 "11110000" // /* MW 3 */
+ 3272 "00000110" // /* MW 2 */
+ 3273 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3275 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3277 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3283 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 310 44 first
+.src_ref 2 "reduce_base_c8.h" 312 41 first
+.src_ref 2 "reduce_base_c8.h" 315 40
+ 3284 "10111010" // ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3285 "01011000" // /* MW 9 */
+ 3286 "11101100" // /* MW 8 */
+ 3287 "00000111" // /* MW 7 */
+ 3288 "11111000" // /* MW 6 */
+ 3289 "00101111" // /* MW 5 */
+ 3290 "00100110" // /* MW 4 */
+ 3291 "11100000" // /* MW 3 */
+ 3292 "10000110" // /* MW 2 */
+ 3293 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 313 38
+.src_ref 2 "reduce_base_c8.h" 317 97
+ 3294 "10111010" // MOVA r3, #-6; MOVXM dj0, #65536 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3295 "00010000" // /* MW 9 */
+ 3296 "00000000" // /* MW 8 */
+ 3297 "01000000" // /* MW 7 */
+ 3298 "01000000" // /* MW 6 */
+ 3299 "00000000" // /* MW 5 */
+ 3300 "00000000" // /* MW 4 */
+ 3301 "00000000" // /* MW 3 */
+ 3302 "01000011" // /* MW 2 */
+ 3303 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 315 40
+.src_ref 2 "reduce_base_c8.h" 317 97 first
+ 3304 "01100100" // LSHL r3, r28, r3; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3305 "00000001" // /* MW 5 */
+ 3306 "00100000" // /* MW 4 */
+ 3307 "10111100" // /* MW 3 */
+ 3308 "11000111" // /* MW 2 */
+ 3309 "11100000" // /* MW 1 */
+ 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3311 "00000000" // /* MW 1 */
+ 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3313 "00000000" // /* MW 1 */
+ 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3315 "00000000" // /* MW 1 */
+ 3316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3317 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 311 38 first
+ 3318 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3319 "00110111" // /* MW 3 */
+ 3320 "00011100" // /* MW 2 */
+ 3321 "00000100" // /* MW 1 */
+ 3322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3323 "00000000" // /* MW 1 */
+ 3324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3325 "00000000" // /* MW 1 */
+ 3326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3327 "00000000" // /* MW 1 */
+ 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3329 "00000000" // /* MW 1 */
+ 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3331 "00000000" // /* MW 1 */
+ 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3333 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 39 first
+ 3334 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3335 "01010111" // /* MW 3 */
+ 3336 "00011100" // /* MW 2 */
+ 3337 "00000100" // /* MW 1 */
+ 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3339 "00000000" // /* MW 1 */
+ 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3341 "00000000" // /* MW 1 */
+ 3342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3343 "00000000" // /* MW 1 */
+ 3344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3345 "00000000" // /* MW 1 */
+ 3346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3347 "00000000" // /* MW 1 */
+ 3348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3349 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 313 38 first
+ 3350 "10011000" // ST dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3351 "01000001" // /* MW 3 */
+ 3352 "00011100" // /* MW 2 */
+ 3353 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 315 40 first
+ 3354 "00011000" // ST.s16 r24, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3355 "00010111" // /* MW 3 */
+ 3356 "00001011" // /* MW 2 */
+ 3357 "00000100" // /* MW 1 */
+ 3358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3359 "00000000" // /* MW 1 */
+ 3360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3361 "00000000" // /* MW 1 */
+ 3362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3363 "00000000" // /* MW 1 */
+ 3364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3365 "00000000" // /* MW 1 */
+ 3366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3367 "00000000" // /* MW 1 */
+ 3368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3369 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 316 38 first
+ 3370 "10011000" // ST r26, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3371 "01010001" // /* MW 3 */
+ 3372 "00000111" // /* MW 2 */
+ 3373 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 317 38 first
+ 3374 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3375 "01110001" // /* MW 3 */
+ 3376 "00010100" // /* MW 2 */
+ 3377 "00001100" // /* MW 1 */
+ 3378 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3379 "00000000" // /* MW 5 */
+ 3380 "00000000" // /* MW 4 */
+ 3381 "11101000" // /* MW 3 */
+ 3382 "00000110" // /* MW 2 */
+ 3383 "00000000" // /* MW 1 */
+.delay_slot
+ 3384 "11111000" // MOV r30, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3385 "10100000" // /* MW 3 */
+ 3386 "10011111" // /* MW 2 */
+ 3387 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3391 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3394 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 3395 "00011100" // /* MW 13 */
+ 3396 "00000000" // /* MW 12 */
+ 3397 "00000000" // /* MW 11 */
+ 3398 "01010111" // /* MW 10 */
+ 3399 "00011010" // /* MW 9 */
+ 3400 "01000000" // /* MW 8 */
+ 3401 "00000000" // /* MW 7 */
+ 3402 "00000000" // /* MW 6 */
+ 3403 "10110110" // /* MW 5 */
+ 3404 "00000010" // /* MW 4 */
+ 3405 "11110000" // /* MW 3 */
+ 3406 "00101100" // /* MW 2 */
+ 3407 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832
+.src_ref 2 "reduce_base_c8.h" 298 44 first
+.src_ref 2 "reduce_base_c8.h" 301 40
+.src_ref 2 "reduce_base_c8.h" 301 40 first
+ 3408 "10111010" // ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3409 "01011000" // /* MW 9 */
+ 3410 "00010000" // /* MW 8 */
+ 3411 "01001000" // /* MW 7 */
+ 3412 "01110000" // /* MW 6 */
+ 3413 "00101011" // /* MW 5 */
+ 3414 "00000110" // /* MW 4 */
+ 3415 "11100000" // /* MW 3 */
+ 3416 "10000110" // /* MW 2 */
+ 3417 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 76
+.src_ref 2 "reduce_base_c8.h" 303 40
+.src_ref 2 "reduce_base_c8.h" 306 62
+ 3418 "10111010" // MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3419 "01111000" // /* MW 9 */
+ 3420 "00001110" // /* MW 8 */
+ 3421 "11010000" // /* MW 7 */
+ 3422 "10101000" // /* MW 6 */
+ 3423 "01000111" // /* MW 5 */
+ 3424 "00111110" // /* MW 4 */
+ 3425 "10000000" // /* MW 3 */
+ 3426 "10000000" // /* MW 2 */
+ 3427 "11111101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 76 first
+ 3428 "10011000" // LSHL r4, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3429 "01001101" // /* MW 3 */
+ 3430 "11001000" // /* MW 2 */
+ 3431 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 41
+.src_ref 2 "reduce_base_c8.h" 306 62 first
+ 3432 "00100100" // MUL r30, r30, r6; ADD.NC r3, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3433 "11111111" // /* MW 5 */
+ 3434 "10100100" // /* MW 4 */
+ 3435 "11110001" // /* MW 3 */
+ 3436 "10001101" // /* MW 2 */
+ 3437 "11110111" // /* MW 1 */
+ 3438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3439 "00000000" // /* MW 1 */
+ 3440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3441 "00000000" // /* MW 1 */
+ 3442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3443 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 299 38 first
+ 3444 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3445 "01110111" // /* MW 3 */
+ 3446 "00011111" // /* MW 2 */
+ 3447 "00000100" // /* MW 1 */
+ 3448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3449 "00000000" // /* MW 1 */
+ 3450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3451 "00000000" // /* MW 1 */
+ 3452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3453 "00000000" // /* MW 1 */
+ 3454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3455 "00000000" // /* MW 1 */
+ 3456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3457 "00000000" // /* MW 1 */
+ 3458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3459 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 300 39 first
+ 3460 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3461 "10110111" // /* MW 3 */
+ 3462 "00011100" // /* MW 2 */
+ 3463 "00000100" // /* MW 1 */
+ 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3465 "00000000" // /* MW 1 */
+ 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3467 "00000000" // /* MW 1 */
+ 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3469 "00000000" // /* MW 1 */
+ 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3471 "00000000" // /* MW 1 */
+ 3472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3473 "00000000" // /* MW 1 */
+ 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3475 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 301 38 first
+ 3476 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3477 "01010111" // /* MW 3 */
+ 3478 "00011100" // /* MW 2 */
+ 3479 "00000100" // /* MW 1 */
+ 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3481 "00000000" // /* MW 1 */
+ 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3483 "00000000" // /* MW 1 */
+ 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3485 "00000000" // /* MW 1 */
+ 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3487 "00000000" // /* MW 1 */
+ 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3489 "00000000" // /* MW 1 */
+ 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3491 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 39 first
+ 3492 "00011000" // ST.s16 r3, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3493 "01110111" // /* MW 3 */
+ 3494 "00011100" // /* MW 2 */
+ 3495 "00000100" // /* MW 1 */
+ 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3497 "00000000" // /* MW 1 */
+ 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3499 "00000000" // /* MW 1 */
+ 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3501 "00000000" // /* MW 1 */
+ 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3503 "00000000" // /* MW 1 */
+ 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3505 "00000000" // /* MW 1 */
+ 3506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3507 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 303 40 first
+ 3508 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3509 "00110111" // /* MW 3 */
+ 3510 "00001000" // /* MW 2 */
+ 3511 "00000100" // /* MW 1 */
+ 3512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3513 "00000000" // /* MW 1 */
+ 3514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3515 "00000000" // /* MW 1 */
+ 3516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3517 "00000000" // /* MW 1 */
+ 3518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3519 "00000000" // /* MW 1 */
+ 3520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3521 "00000000" // /* MW 1 */
+ 3522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3523 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 304 38 first
+ 3524 "10011000" // ST r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3525 "00110001" // /* MW 3 */
+ 3526 "00000110" // /* MW 2 */
+ 3527 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 305 38 first
+ 3528 "00000010" // ST r20, [p4, #4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3529 "01110000" // /* MW 7 */
+ 3530 "10100101" // /* MW 6 */
+ 3531 "00000001" // /* MW 5 */
+ 3532 "00000000" // /* MW 4 */
+ 3533 "00110000" // /* MW 3 */
+ 3534 "11010010" // /* MW 2 */
+ 3535 "10000010" // /* MW 1 */
+.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ 3536 "10111000" // MOV dj0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3537 "01000000" // /* MW 3 */
+ 3538 "10000000" // /* MW 2 */
+ 3539 "00011000" // /* MW 1 */
+ 3540 "00110110" // ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3541 "10000001" // /* MW 11 */
+ 3542 "10101101" // /* MW 10 */
+ 3543 "00000000" // /* MW 9 */
+ 3544 "00000000" // /* MW 8 */
+ 3545 "00000000" // /* MW 7 */
+ 3546 "00000000" // /* MW 6 */
+ 3547 "00100000" // /* MW 5 */
+ 3548 "00000000" // /* MW 4 */
+ 3549 "11100000" // /* MW 3 */
+ 3550 "01111010" // /* MW 2 */
+ 3551 "01100000" // /* MW 1 */
+.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.src_ref 2 "reduce_base_c8.h" 326 79 first
+.src_ref 2 "reduce_base_c8.h" 329 51
+ 3552 "00010100" // MOVA m2, #24; ADD.NC p0, r0, #30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3553 "00011110" // /* MW 5 */
+ 3554 "11000000" // /* MW 4 */
+ 3555 "10000000" // /* MW 3 */
+ 3556 "00001000" // /* MW 2 */
+ 3557 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.src_ref 2 "reduce_base_c8.h" 330 26
+.src_ref 3 "reduce_mean_c8_impl.h" 139 51 first
+ 3558 "10111010" // LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3559 "01011000" // /* MW 9 */
+ 3560 "11100010" // /* MW 8 */
+ 3561 "00000111" // /* MW 7 */
+ 3562 "00001000" // /* MW 6 */
+ 3563 "00000010" // /* MW 5 */
+ 3564 "00000000" // /* MW 4 */
+ 3565 "11010000" // /* MW 3 */
+ 3566 "10001010" // /* MW 2 */
+ 3567 "01000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 331 24
+.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first
+ 3568 "01010100" // LDA.s16 r3, [p2]; MOV m1, #38 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3569 "10011001" // /* MW 5 */
+ 3570 "00000000" // /* MW 4 */
+ 3571 "01010010" // /* MW 3 */
+ 3572 "10001110" // /* MW 2 */
+ 3573 "01000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 141 49 first
+ 3574 "10011000" // LDA r1, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3575 "00110110" // /* MW 3 */
+ 3576 "00010100" // /* MW 2 */
+ 3577 "00000010" // /* MW 1 */
+ 3578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3579 "00000000" // /* MW 1 */
+ 3580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3581 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 326 28 first
+ 3582 "00011000" // ST.s16 r31, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3583 "11110111" // /* MW 3 */
+ 3584 "00101111" // /* MW 2 */
+ 3585 "00000000" // /* MW 1 */
+ 3586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3587 "00000000" // /* MW 1 */
+ 3588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3589 "00000000" // /* MW 1 */
+ 3590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3591 "00000000" // /* MW 1 */
+ 3592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3593 "00000000" // /* MW 1 */
+ 3594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3595 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3597 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 327 31 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3598 "00011000" // ST.s16 r24, [p0], #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3599 "00010111" // /* MW 3 */
+ 3600 "01011111" // /* MW 2 */
+ 3601 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3603 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3605 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3607 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3609 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 327 31
+.src_ref 2 "reduce_base_c8.h" 328 23
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3610 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3611 "00000001" // /* MW 3 */
+ 3612 "00110000" // /* MW 2 */
+ 3613 "00010000" // /* MW 1 */
+ 3614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3615 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 328 23 first
+ 3616 "00011000" // ST.s16 r24, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3617 "00010111" // /* MW 3 */
+ 3618 "11001111" // /* MW 2 */
+ 3619 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 51 first
+ 3620 "10011000" // LDA.u16 r4, [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3621 "10011010" // /* MW 3 */
+ 3622 "01001000" // /* MW 2 */
+ 3623 "00000000" // /* MW 1 */
+ 3624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3625 "00000000" // /* MW 1 */
+ 3626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3627 "00000000" // /* MW 1 */
+ 3628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3629 "00000000" // /* MW 1 */
+ 3630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3631 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3633 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 28
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3634 "00011000" // ST.s16 r0, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3635 "00010111" // /* MW 3 */
+ 3636 "11111100" // /* MW 2 */
+ 3637 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.src_ref 2 "reduce_base_c8.h" 330 28
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3638 "00100100" // LSHL r4, r4, r26; ADD.NC r5, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3639 "11111111" // /* MW 5 */
+ 3640 "10100100" // /* MW 4 */
+ 3641 "10110010" // /* MW 3 */
+ 3642 "00110101" // /* MW 2 */
+ 3643 "00100001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3644 "10011000" // SUB r0, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3645 "01000001" // /* MW 3 */
+ 3646 "00000000" // /* MW 2 */
+ 3647 "00010000" // /* MW 1 */
+ 3648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3649 "00000000" // /* MW 1 */
+ 3650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3651 "00000000" // /* MW 1 */
+ 3652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3653 "00000000" // /* MW 1 */
+ 3654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3655 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 330 26 first
+ 3656 "00011000" // ST.s16 r5, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3657 "10110111" // /* MW 3 */
+ 3658 "00001000" // /* MW 2 */
+ 3659 "00000000" // /* MW 1 */
+ 3660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3661 "00000000" // /* MW 1 */
+ 3662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3663 "00000000" // /* MW 1 */
+ 3664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3665 "00000000" // /* MW 1 */
+ 3666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3667 "00000000" // /* MW 1 */
+ 3668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3669 "00000000" // /* MW 1 */
+ 3670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3671 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 331 24 first
+ 3672 "00011000" // ST.s16 r19, [p0], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3673 "01110111" // /* MW 3 */
+ 3674 "00101010" // /* MW 2 */
+ 3675 "00000000" // /* MW 1 */
+ 3676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3677 "00000000" // /* MW 1 */
+ 3678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3679 "00000000" // /* MW 1 */
+ 3680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3681 "00000000" // /* MW 1 */
+ 3682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3683 "00000000" // /* MW 1 */
+ 3684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3685 "00000000" // /* MW 1 */
+ 3686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3687 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 139 40 first
+ 3688 "00011000" // ST.s8 r2, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3689 "01000111" // /* MW 3 */
+ 3690 "11101100" // /* MW 2 */
+ 3691 "00000000" // /* MW 1 */
+ 3692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3693 "00000000" // /* MW 1 */
+ 3694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3695 "00000000" // /* MW 1 */
+ 3696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3697 "00000000" // /* MW 1 */
+ 3698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3699 "00000000" // /* MW 1 */
+ 3700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3701 "00000000" // /* MW 1 */
+ 3702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3703 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first
+ 3704 "00011000" // ST.s16 r3, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3705 "01110111" // /* MW 3 */
+ 3706 "00000100" // /* MW 2 */
+ 3707 "00000000" // /* MW 1 */
+ 3708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3709 "00000000" // /* MW 1 */
+ 3710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3711 "00000000" // /* MW 1 */
+ 3712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3713 "00000000" // /* MW 1 */
+ 3714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3715 "00000000" // /* MW 1 */
+ 3716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3717 "00000000" // /* MW 1 */
+ 3718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3719 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 141 38 first
+ 3720 "00011000" // ST.s8 r1, [p0, #-2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3721 "00100111" // /* MW 3 */
+ 3722 "11100100" // /* MW 2 */
+ 3723 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 349 4 first
+ 3724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 3725 "00000000" // /* MW 3 */
+ 3726 "00101000" // /* MW 2 */
+ 3727 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 349 4
+.delay_slot
+ 3728 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3729 "00000001" // /* MW 5 */
+ 3730 "00000000" // /* MW 4 */
+ 3731 "00000000" // /* MW 3 */
+ 3732 "11111000" // /* MW 2 */
+ 3733 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3735 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3737 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3739 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3740 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3741 "01100111" // /* MW 3 */
+ 3742 "00000001" // /* MW 2 */
+ 3743 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168
+.src_ref 2 "reduce_base_c8.h" 262 44 first
+.src_ref 2 "reduce_base_c8.h" 263 77
+ 3744 "10111010" // ST.s16 r21, [p4], #2; MOVXM r5, #65512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3745 "00010000" // /* MW 9 */
+ 3746 "11110100" // /* MW 8 */
+ 3747 "10101111" // /* MW 7 */
+ 3748 "00111100" // /* MW 6 */
+ 3749 "00000000" // /* MW 5 */
+ 3750 "00000000" // /* MW 4 */
+ 3751 "11100000" // /* MW 3 */
+ 3752 "11010110" // /* MW 2 */
+ 3753 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 56
+.src_ref 2 "reduce_base_c8.h" 263 77 first
+.src_ref 2 "reduce_base_c8.h" 267 40
+ 3754 "10111010" // LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3755 "01011000" // /* MW 9 */
+ 3756 "11101100" // /* MW 8 */
+ 3757 "00000111" // /* MW 7 */
+ 3758 "00000100" // /* MW 6 */
+ 3759 "01111101" // /* MW 5 */
+ 3760 "00001010" // /* MW 4 */
+ 3761 "00100000" // /* MW 3 */
+ 3762 "10001010" // /* MW 2 */
+ 3763 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 118
+.src_ref 2 "reduce_base_c8.h" 329 30
+ 3764 "10111010" // MOVA r26, #4; MOVXM r6, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3765 "10010000" // /* MW 9 */
+ 3766 "11111111" // /* MW 8 */
+ 3767 "11001111" // /* MW 7 */
+ 3768 "00111100" // /* MW 6 */
+ 3769 "00000000" // /* MW 5 */
+ 3770 "00000000" // /* MW 4 */
+ 3771 "00000000" // /* MW 3 */
+ 3772 "10011010" // /* MW 2 */
+ 3773 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 118 first
+ 3774 "10011000" // ADD r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3775 "01100000" // /* MW 3 */
+ 3776 "11100010" // /* MW 2 */
+ 3777 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 98
+.src_ref 2 "reduce_base_c8.h" 267 116 first
+ 3778 "00011000" // MAC r29, r29, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3779 "01000110" // /* MW 3 */
+ 3780 "01111010" // /* MW 2 */
+ 3781 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 60 first
+.src_ref 2 "reduce_base_c8.h" 265 98 first
+ 3782 "00011000" // MSC r21, r21, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3783 "01001110" // /* MW 3 */
+ 3784 "01101010" // /* MW 2 */
+ 3785 "00010100" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3787 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 38 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3788 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3789 "01010111" // /* MW 3 */
+ 3790 "00011100" // /* MW 2 */
+ 3791 "00000100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 56
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3792 "10011000" // MUL r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3793 "00101111" // /* MW 3 */
+ 3794 "11000100" // /* MW 2 */
+ 3795 "00010001" // /* MW 1 */
+ 3796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3797 "00000000" // /* MW 1 */
+ 3798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3799 "00000000" // /* MW 1 */
+ 3800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3801 "00000000" // /* MW 1 */
+ 3802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3803 "00000000" // /* MW 1 */
+ 3804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3805 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 264 39 first
+ 3806 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3807 "11010111" // /* MW 3 */
+ 3808 "00011110" // /* MW 2 */
+ 3809 "00000100" // /* MW 1 */
+ 3810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3811 "00000000" // /* MW 1 */
+ 3812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3813 "00000000" // /* MW 1 */
+ 3814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3815 "00000000" // /* MW 1 */
+ 3816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3817 "00000000" // /* MW 1 */
+ 3818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3819 "00000000" // /* MW 1 */
+ 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3821 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 38 first
+ 3822 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3823 "10110111" // /* MW 3 */
+ 3824 "00011110" // /* MW 2 */
+ 3825 "00000100" // /* MW 1 */
+ 3826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3827 "00000000" // /* MW 1 */
+ 3828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3829 "00000000" // /* MW 1 */
+ 3830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3831 "00000000" // /* MW 1 */
+ 3832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3833 "00000000" // /* MW 1 */
+ 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3835 "00000000" // /* MW 1 */
+ 3836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3837 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 266 39 first
+ 3838 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3839 "00110111" // /* MW 3 */
+ 3840 "00011100" // /* MW 2 */
+ 3841 "00000100" // /* MW 1 */
+ 3842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3843 "00000000" // /* MW 1 */
+ 3844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3845 "00000000" // /* MW 1 */
+ 3846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3847 "00000000" // /* MW 1 */
+ 3848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3849 "00000000" // /* MW 1 */
+ 3850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3851 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3853 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 40 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3854 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3855 "01010111" // /* MW 3 */
+ 3856 "00001000" // /* MW 2 */
+ 3857 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3859 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3861 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3862 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3863 "00000000" // /* MW 5 */
+ 3864 "00000000" // /* MW 4 */
+ 3865 "11101000" // /* MW 3 */
+ 3866 "00000110" // /* MW 2 */
+ 3867 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 42
+.delay_slot
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3868 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3869 "01000001" // /* MW 3 */
+ 3870 "00001010" // /* MW 2 */
+ 3871 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 42
+.delay_slot
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3872 "10011000" // SUB r2, r5, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3873 "11010001" // /* MW 3 */
+ 3874 "01000101" // /* MW 2 */
+ 3875 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 270 64
+.delay_slot
+ 3876 "11111000" // MOV r6, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3877 "00011100" // /* MW 3 */
+ 3878 "10100001" // /* MW 2 */
+ 3879 "00011001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 268 38 first
+.delay_slot
+ 3880 "00000010" // ST r3, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3881 "01110000" // /* MW 7 */
+ 3882 "10100101" // /* MW 6 */
+ 3883 "00000001" // /* MW 5 */
+ 3884 "00000000" // /* MW 4 */
+ 3885 "00110000" // /* MW 3 */
+ 3886 "10001110" // /* MW 2 */
+ 3887 "10000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 269 38 first
+.src_ref 2 "reduce_base_c8.h" 270 64 first
+.delay_slot
+ 3888 "11100001" // NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 3889 "00000000" // /* MW 15 */
+ 3890 "00000000" // /* MW 14 */
+ 3891 "01111000" // /* MW 13 */
+ 3892 "10100101" // /* MW 12 */
+ 3893 "00000001" // /* MW 11 */
+ 3894 "01111100" // /* MW 10 */
+ 3895 "11100011" // /* MW 9 */
+ 3896 "10111101" // /* MW 8 */
+ 3897 "00010001" // /* MW 7 */
+ 3898 "00010110" // /* MW 6 */
+ 3899 "00100100" // /* MW 5 */
+ 3900 "00000000" // /* MW 4 */
+ 3901 "11110000" // /* MW 3 */
+ 3902 "00101100" // /* MW 2 */
+ 3903 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328
+.src_ref 2 "reduce_base_c8.h" 250 44
+.src_ref 2 "reduce_base_c8.h" 250 44 first
+.src_ref 2 "reduce_base_c8.h" 255 40
+ 3904 "10111010" // ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3905 "01011000" // /* MW 9 */
+ 3906 "11101100" // /* MW 8 */
+ 3907 "00000111" // /* MW 7 */
+ 3908 "00001000" // /* MW 6 */
+ 3909 "01000010" // /* MW 5 */
+ 3910 "00000000" // /* MW 4 */
+ 3911 "11100000" // /* MW 3 */
+ 3912 "10010010" // /* MW 2 */
+ 3913 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 255 113 first
+ 3914 "10111010" // LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3915 "01011000" // /* MW 9 */
+ 3916 "00001000" // /* MW 8 */
+ 3917 "01001000" // /* MW 7 */
+ 3918 "01110000" // /* MW 6 */
+ 3919 "00101101" // /* MW 5 */
+ 3920 "00000110" // /* MW 4 */
+ 3921 "00100000" // /* MW 3 */
+ 3922 "10000110" // /* MW 2 */
+ 3923 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 329 30
+ 3924 "01100100" // MOVX r3, #16; MOV r26, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3925 "00010001" // /* MW 5 */
+ 3926 "00100000" // /* MW 4 */
+ 3927 "00101101" // /* MW 3 */
+ 3928 "11001000" // /* MW 2 */
+ 3929 "00000000" // /* MW 1 */
+ 3930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3931 "00000000" // /* MW 1 */
+ 3932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3933 "00000000" // /* MW 1 */
+ 3934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3935 "00000000" // /* MW 1 */
+ 3936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3937 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 251 38 first
+ 3938 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3939 "01110111" // /* MW 3 */
+ 3940 "00011111" // /* MW 2 */
+ 3941 "00000100" // /* MW 1 */
+ 3942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3943 "00000000" // /* MW 1 */
+ 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3945 "00000000" // /* MW 1 */
+ 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3947 "00000000" // /* MW 1 */
+ 3948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3949 "00000000" // /* MW 1 */
+ 3950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3951 "00000000" // /* MW 1 */
+ 3952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3953 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 252 39 first
+ 3954 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3955 "10110111" // /* MW 3 */
+ 3956 "00011100" // /* MW 2 */
+ 3957 "00000100" // /* MW 1 */
+ 3958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3959 "00000000" // /* MW 1 */
+ 3960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3961 "00000000" // /* MW 1 */
+ 3962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3963 "00000000" // /* MW 1 */
+ 3964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3965 "00000000" // /* MW 1 */
+ 3966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3967 "00000000" // /* MW 1 */
+ 3968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3969 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 253 38 first
+ 3970 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3971 "01110111" // /* MW 3 */
+ 3972 "00011111" // /* MW 2 */
+ 3973 "00000100" // /* MW 1 */
+ 3974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3975 "00000000" // /* MW 1 */
+ 3976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3977 "00000000" // /* MW 1 */
+ 3978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3979 "00000000" // /* MW 1 */
+ 3980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3981 "00000000" // /* MW 1 */
+ 3982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3983 "00000000" // /* MW 1 */
+ 3984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3985 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 254 39 first
+ 3986 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3987 "11010111" // /* MW 3 */
+ 3988 "00011110" // /* MW 2 */
+ 3989 "00000100" // /* MW 1 */
+ 3990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3991 "00000000" // /* MW 1 */
+ 3992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3993 "00000000" // /* MW 1 */
+ 3994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3995 "00000000" // /* MW 1 */
+ 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3997 "00000000" // /* MW 1 */
+ 3998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3999 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 4000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4001 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 40 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 4002 "00011000" // ST.s16 r3, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4003 "01110111" // /* MW 3 */
+ 4004 "00001000" // /* MW 2 */
+ 4005 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4007 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4009 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4010 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 4011 "00000000" // /* MW 5 */
+ 4012 "00000000" // /* MW 4 */
+ 4013 "11101000" // /* MW 3 */
+ 4014 "00000110" // /* MW 2 */
+ 4015 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.delay_slot
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4016 "00011000" // MAC r3, r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4017 "00100110" // /* MW 3 */
+ 4018 "01000110" // /* MW 2 */
+ 4019 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4023 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 256 38 first
+.delay_slot
+ 4024 "10011000" // ST r6, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4025 "11010001" // /* MW 3 */
+ 4026 "00000100" // /* MW 2 */
+ 4027 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 257 38 first
+.delay_slot
+ 4028 "10011000" // ST r18, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4029 "01010001" // /* MW 3 */
+ 4030 "00010110" // /* MW 2 */
+ 4031 "00001100" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456
+.src_ref 2 "reduce_base_c8.h" 238 44 first
+ 4032 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4033 "10110111" // /* MW 3 */
+ 4034 "00011110" // /* MW 2 */
+ 4035 "00000100" // /* MW 1 */
+ 4036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4037 "00000000" // /* MW 1 */
+ 4038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4039 "00000000" // /* MW 1 */
+ 4040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4041 "00000000" // /* MW 1 */
+ 4042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4043 "00000000" // /* MW 1 */
+ 4044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4045 "00000000" // /* MW 1 */
+ 4046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4047 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 239 38 first
+ 4048 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4049 "11110111" // /* MW 3 */
+ 4050 "00011100" // /* MW 2 */
+ 4051 "00000100" // /* MW 1 */
+ 4052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4053 "00000000" // /* MW 1 */
+ 4054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4055 "00000000" // /* MW 1 */
+ 4056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4057 "00000000" // /* MW 1 */
+ 4058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4059 "00000000" // /* MW 1 */
+ 4060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4061 "00000000" // /* MW 1 */
+ 4062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4063 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 240 39 first
+ 4064 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4065 "11110111" // /* MW 3 */
+ 4066 "00011110" // /* MW 2 */
+ 4067 "00000100" // /* MW 1 */
+ 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4069 "00000000" // /* MW 1 */
+ 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4071 "00000000" // /* MW 1 */
+ 4072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4073 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 94
+ 4074 "00011000" // LDA r3, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4075 "01110001" // /* MW 3 */
+ 4076 "11111100" // /* MW 2 */
+ 4077 "00000111" // /* MW 1 */
+ 4078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4079 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 4080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4081 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 38 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 4082 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4083 "00110111" // /* MW 3 */
+ 4084 "00011100" // /* MW 2 */
+ 4085 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4087 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4088 "01000100" // MOVXM r1, #65504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4089 "11000000" // /* MW 5 */
+ 4090 "10111111" // /* MW 4 */
+ 4091 "11110000" // /* MW 3 */
+ 4092 "00000000" // /* MW 2 */
+ 4093 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4094 "10011000" // ADD r2, r1, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4095 "10100000" // /* MW 3 */
+ 4096 "01000101" // /* MW 2 */
+ 4097 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 94
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4098 "01100100" // MAC r1, r1, r3, r2; MOV r1, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4099 "01000001" // /* MW 5 */
+ 4100 "10100000" // /* MW 4 */
+ 4101 "11000000" // /* MW 3 */
+ 4102 "01000100" // /* MW 2 */
+ 4103 "00011000" // /* MW 1 */
+ 4104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4105 "00000000" // /* MW 1 */
+ 4106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4107 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 242 39 first
+ 4108 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4109 "11010111" // /* MW 3 */
+ 4110 "00011110" // /* MW 2 */
+ 4111 "00000100" // /* MW 1 */
+ 4112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4113 "00000000" // /* MW 1 */
+ 4114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4115 "00000000" // /* MW 1 */
+ 4116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4117 "00000000" // /* MW 1 */
+ 4118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4119 "00000000" // /* MW 1 */
+ 4120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4121 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 40
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 4122 "10111000" // MOV m0, #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4123 "11011000" // /* MW 3 */
+ 4124 "00001111" // /* MW 2 */
+ 4125 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 40 first
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 4126 "00011000" // ST.s16 r5, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4127 "10110111" // /* MW 3 */
+ 4128 "00001000" // /* MW 2 */
+ 4129 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4131 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4133 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4134 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 4135 "00000000" // /* MW 5 */
+ 4136 "00000000" // /* MW 4 */
+ 4137 "11101000" // /* MW 3 */
+ 4138 "00000110" // /* MW 2 */
+ 4139 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 42
+.src_ref 2 "reduce_base_c8.h" 243 42
+.src_ref 2 "reduce_base_c8.h" 243 91
+.src_ref 2 "reduce_base_c8.h" 243 91
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4140 "01100100" // MSC r5, r5, r22, r4; MOV r5, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4141 "01000001" // /* MW 5 */
+ 4142 "10100000" // /* MW 4 */
+ 4143 "11000010" // /* MW 3 */
+ 4144 "01001001" // /* MW 2 */
+ 4145 "10110001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4147 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4149 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 244 38 first
+.delay_slot
+ 4150 "10011000" // ST r20, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4151 "10010001" // /* MW 3 */
+ 4152 "00000110" // /* MW 2 */
+ 4153 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 245 38 first
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 4154 "00111010" // ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4155 "01111001" // /* MW 9 */
+ 4156 "10001110" // /* MW 8 */
+ 4157 "11010000" // /* MW 7 */
+ 4158 "10001011" // /* MW 6 */
+ 4159 "10100000" // /* MW 5 */
+ 4160 "00000001" // /* MW 4 */
+ 4161 "00110000" // /* MW 3 */
+ 4162 "11000110" // /* MW 2 */
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0
+ 4163 "10000010" // /* MW 1 */
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+.function pad_3d<(pad_3d_mode)0, bfloat16, 1> _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+.src_ref 3 "pad_3d.h" 266 first
+.src_ref 3 "pad_3d.h" 465 37 first
+.src_ref 3 "pad_3d.h" 468 21 first
+.src_ref 3 "pad_3d.h" 471 29
+.src_ref 3 "pad_3d.h" 479 21
+.function_start
+ 4176 "10111010" // LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4177 "01011000" // /* MW 9 */
+ 4178 "11101000" // /* MW 8 */
+ 4179 "10000111" // /* MW 7 */
+ 4180 "11001000" // /* MW 6 */
+ 4181 "01000111" // /* MW 5 */
+ 4182 "00111110" // /* MW 4 */
+ 4183 "11010000" // /* MW 3 */
+ 4184 "10000010" // /* MW 2 */
+ 4185 "01000010" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 469 21 first
+.src_ref 3 "pad_3d.h" 478 21
+.src_ref 3 "pad_3d.h" 499 52
+.src_ref 3 "pad_3d.h" 511 25
+ 4186 "10111010" // LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4187 "01011000" // /* MW 9 */
+ 4188 "00000110" // /* MW 8 */
+ 4189 "00001000" // /* MW 7 */
+ 4190 "10101010" // /* MW 6 */
+ 4191 "00100111" // /* MW 5 */
+ 4192 "00111110" // /* MW 4 */
+ 4193 "11010000" // /* MW 3 */
+ 4194 "10000110" // /* MW 2 */
+ 4195 "01000101" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 470 21 first
+.src_ref 3 "pad_3d.h" 486 26
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 26
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 517 22
+ 4196 "10111010" // LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4197 "01111000" // /* MW 9 */
+ 4198 "01100000" // /* MW 8 */
+ 4199 "01101000" // /* MW 7 */
+ 4200 "00001000" // /* MW 6 */
+ 4201 "10000000" // /* MW 5 */
+ 4202 "00000001" // /* MW 4 */
+ 4203 "11010000" // /* MW 3 */
+ 4204 "10010110" // /* MW 2 */
+ 4205 "01001111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 471 29 first
+ 4206 "10011000" // LDA.s16 r18, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4207 "01010010" // /* MW 3 */
+ 4208 "00101010" // /* MW 2 */
+ 4209 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 472 25 first
+ 4210 "10011000" // LDA r6, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4211 "11010110" // /* MW 3 */
+ 4212 "00011100" // /* MW 2 */
+ 4213 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 473 26 first
+ 4214 "10011000" // LDA r7, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4215 "11110110" // /* MW 3 */
+ 4216 "00101100" // /* MW 2 */
+ 4217 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 475 24 first
+ 4218 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4219 "00110110" // /* MW 3 */
+ 4220 "00000110" // /* MW 2 */
+ 4221 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 479 21 first
+ 4222 "10011000" // ASHL r19, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4223 "01001110" // /* MW 3 */
+ 4224 "00100110" // /* MW 2 */
+ 4225 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 477 23 first
+ 4226 "10011000" // LDA r4, [p2, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4227 "10010110" // /* MW 3 */
+ 4228 "00100100" // /* MW 2 */
+ 4229 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 478 21 first
+ 4230 "10011000" // ASHL r20, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4231 "00101110" // /* MW 3 */
+ 4232 "01101000" // /* MW 2 */
+ 4233 "00010001" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 56 25 first
+ 4234 "11111000" // VBCST.16 x0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4235 "01110010" // /* MW 3 */
+ 4236 "01001001" // /* MW 2 */
+ 4237 "00011000" // /* MW 1 */
+ 4238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4239 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 485 45 first
+ 4240 "10011000" // MUL r18, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4241 "01001111" // /* MW 3 */
+ 4242 "11100101" // /* MW 2 */
+ 4243 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 485 34
+ 4244 "10011000" // SUB r19, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4245 "00010001" // /* MW 3 */
+ 4246 "01100111" // /* MW 2 */
+ 4247 "00010000" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 998 25 first
+ 4248 "10011000" // MUL r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4249 "00101111" // /* MW 3 */
+ 4250 "11100111" // /* MW 2 */
+ 4251 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 43 first
+ 4252 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4253 "00101111" // /* MW 3 */
+ 4254 "01100011" // /* MW 2 */
+ 4255 "00010100" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13 first
+ 4256 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4257 "00001101" // /* MW 3 */
+ 4258 "11100001" // /* MW 2 */
+ 4259 "00010100" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 486 26 first
+ 4260 "10100100" // GE r16, r24, r17; ADD.NC p2, r3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4261 "10000010" // /* MW 5 */
+ 4262 "11000011" // /* MW 4 */
+ 4263 "00110100" // /* MW 3 */
+ 4264 "00100011" // /* MW 2 */
+ 4265 "11000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4266 "10000100" // JNZ r16, #4416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4416 delay_slots=5 */
+ 4267 "00000001" // /* MW 5 */
+ 4268 "01000000" // /* MW 4 */
+ 4269 "10100000" // /* MW 3 */
+ 4270 "00001000" // /* MW 2 */
+ 4271 "10000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 487 22
+.delay_slot
+ 4272 "11111000" // VMOV bmll0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4273 "10010010" // /* MW 3 */
+ 4274 "00000000" // /* MW 2 */
+ 4275 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4277 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4283 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4284 "01000100" // MOVXM ls, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4285 "01100000" // /* MW 5 */
+ 4286 "11100010" // /* MW 4 */
+ 4287 "00010001" // /* MW 3 */
+ 4288 "00000000" // /* MW 2 */
+ 4289 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4290 "01000100" // MOVXM le, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4291 "01100000" // /* MW 5 */
+ 4292 "11100010" // /* MW 4 */
+ 4293 "00010110" // /* MW 3 */
+ 4294 "00000000" // /* MW 2 */
+ 4295 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4296 "00000010" // NOPS; MOV lc, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4297 "01110000" // /* MW 7 */
+ 4298 "01010000" // /* MW 6 */
+ 4299 "10111100" // /* MW 5 */
+ 4300 "00000010" // /* MW 4 */
+ 4301 "01100000" // /* MW 3 */
+ 4302 "00101011" // /* MW 2 */
+ 4303 "00000000" // /* MW 1 */
+ 4304 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4305 "00000000" // /* MW 15 */
+ 4306 "00000000" // /* MW 14 */
+ 4307 "01111000" // /* MW 13 */
+ 4308 "10100101" // /* MW 12 */
+ 4309 "00000001" // /* MW 11 */
+ 4310 "00000000" // /* MW 10 */
+ 4311 "00000000" // /* MW 9 */
+ 4312 "00000000" // /* MW 8 */
+ 4313 "01011011" // /* MW 7 */
+ 4314 "00000001" // /* MW 6 */
+ 4315 "00100000" // /* MW 5 */
+ 4316 "00000000" // /* MW 4 */
+ 4317 "11110000" // /* MW 3 */
+ 4318 "00101100" // /* MW 2 */
+ 4319 "00000000" // /* MW 1 */
+ 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4321 "00000000" // /* MW 15 */
+ 4322 "00000000" // /* MW 14 */
+ 4323 "01111000" // /* MW 13 */
+ 4324 "10100101" // /* MW 12 */
+ 4325 "00000001" // /* MW 11 */
+ 4326 "00000000" // /* MW 10 */
+ 4327 "00000000" // /* MW 9 */
+ 4328 "00000000" // /* MW 8 */
+ 4329 "01011011" // /* MW 7 */
+ 4330 "00000001" // /* MW 6 */
+ 4331 "00100000" // /* MW 5 */
+ 4332 "00000000" // /* MW 4 */
+ 4333 "11110000" // /* MW 3 */
+ 4334 "00101100" // /* MW 2 */
+ 4335 "00000000" // /* MW 1 */
+ 4336 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4337 "00000000" // /* MW 15 */
+ 4338 "00000000" // /* MW 14 */
+ 4339 "01111000" // /* MW 13 */
+ 4340 "10100101" // /* MW 12 */
+ 4341 "00000001" // /* MW 11 */
+ 4342 "00000000" // /* MW 10 */
+ 4343 "00000000" // /* MW 9 */
+ 4344 "00000000" // /* MW 8 */
+ 4345 "01011011" // /* MW 7 */
+ 4346 "00000001" // /* MW 6 */
+ 4347 "00100000" // /* MW 5 */
+ 4348 "00000000" // /* MW 4 */
+ 4349 "11110000" // /* MW 3 */
+ 4350 "00101100" // /* MW 2 */
+ 4351 "00000000" // /* MW 1 */
+ 4352 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4353 "00000000" // /* MW 15 */
+ 4354 "00000000" // /* MW 14 */
+ 4355 "01111000" // /* MW 13 */
+ 4356 "10100101" // /* MW 12 */
+ 4357 "00000001" // /* MW 11 */
+ 4358 "00000000" // /* MW 10 */
+ 4359 "00000000" // /* MW 9 */
+ 4360 "00000000" // /* MW 8 */
+ 4361 "01011011" // /* MW 7 */
+ 4362 "00000001" // /* MW 6 */
+ 4363 "00100000" // /* MW 5 */
+ 4364 "00000000" // /* MW 4 */
+ 4365 "11110000" // /* MW 3 */
+ 4366 "00101100" // /* MW 2 */
+ 4367 "00000000" // /* MW 1 */
+ 4368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4369 "00000000" // /* MW 15 */
+ 4370 "00000000" // /* MW 14 */
+ 4371 "01111000" // /* MW 13 */
+ 4372 "10100101" // /* MW 12 */
+ 4373 "00000001" // /* MW 11 */
+ 4374 "00000000" // /* MW 10 */
+ 4375 "00000000" // /* MW 9 */
+ 4376 "00000000" // /* MW 8 */
+ 4377 "01011011" // /* MW 7 */
+ 4378 "00000001" // /* MW 6 */
+ 4379 "00100000" // /* MW 5 */
+ 4380 "00000000" // /* MW 4 */
+ 4381 "11110000" // /* MW 3 */
+ 4382 "00101100" // /* MW 2 */
+ 4383 "00000000" // /* MW 1 */
+ 4384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4385 "00000000" // /* MW 15 */
+ 4386 "00000000" // /* MW 14 */
+ 4387 "01111000" // /* MW 13 */
+ 4388 "10100101" // /* MW 12 */
+ 4389 "00000001" // /* MW 11 */
+ 4390 "00000000" // /* MW 10 */
+ 4391 "00000000" // /* MW 9 */
+ 4392 "00000000" // /* MW 8 */
+ 4393 "01011011" // /* MW 7 */
+ 4394 "00000001" // /* MW 6 */
+ 4395 "00100000" // /* MW 5 */
+ 4396 "00000000" // /* MW 4 */
+ 4397 "11110000" // /* MW 3 */
+ 4398 "00101100" // /* MW 2 */
+ 4399 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224
+.src_ref 3 "pad_3d.h" 487 22 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4400 "11100001" // NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4401 "00000000" // /* MW 15 */
+ 4402 "00000000" // /* MW 14 */
+ 4403 "01111000" // /* MW 13 */
+ 4404 "10100101" // /* MW 12 */
+ 4405 "00000001" // /* MW 11 */
+ 4406 "00000000" // /* MW 10 */
+ 4407 "00000000" // /* MW 9 */
+ 4408 "10000000" // /* MW 8 */
+ 4409 "00000110" // /* MW 7 */
+ 4410 "00011100" // /* MW 6 */
+ 4411 "00100010" // /* MW 5 */
+ 4412 "00000000" // /* MW 4 */
+ 4413 "11110000" // /* MW 3 */
+ 4414 "00101100" // /* MW 2 */
+ 4415 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240
+.src_ref 3 "pad_3d.h" 495 21
+.src_ref 3 "pad_3d.h" 495 40 first
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 38 first
+.loop_nesting 0
+ 4416 "10111010" // MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4417 "10101000" // /* MW 9 */
+ 4418 "11001100" // /* MW 8 */
+ 4419 "00101001" // /* MW 7 */
+ 4420 "11111110" // /* MW 6 */
+ 4421 "00000000" // /* MW 5 */
+ 4422 "00001011" // /* MW 4 */
+ 4423 "00000000" // /* MW 3 */
+ 4424 "10000110" // /* MW 2 */
+ 4425 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 40
+.src_ref 3 "pad_3d.h" 496 29 first
+ 4426 "00100100" // SUB r17, r0, r17; ADD.NC dn1, r7, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4427 "11111111" // /* MW 5 */
+ 4428 "10000111" // /* MW 4 */
+ 4429 "00110010" // /* MW 3 */
+ 4430 "01100010" // /* MW 2 */
+ 4431 "00000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 21 first
+ 4432 "10011000" // LSHL r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4433 "01101101" // /* MW 3 */
+ 4434 "01100010" // /* MW 2 */
+ 4435 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 58
+.src_ref 3 "pad_3d.h" 498 23 first
+ 4436 "00100100" // SUB r17, r0, r7; ADD.NC m1, r17, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4437 "00010000" // /* MW 5 */
+ 4438 "00010001" // /* MW 4 */
+ 4439 "00110010" // /* MW 3 */
+ 4440 "01001110" // /* MW 2 */
+ 4441 "00000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 45 first
+ 4442 "10011000" // MUL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4443 "00001111" // /* MW 3 */
+ 4444 "11100001" // /* MW 2 */
+ 4445 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 498 10 first
+ 4446 "10011000" // LSHL r6, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4447 "01101101" // /* MW 3 */
+ 4448 "01001100" // /* MW 2 */
+ 4449 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 52 first
+ 4450 "10100100" // ASHL r6, r16, r2; ADD.NC p2, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4451 "00110010" // /* MW 5 */
+ 4452 "11000011" // /* MW 4 */
+ 4453 "11010100" // /* MW 3 */
+ 4454 "10000101" // /* MW 2 */
+ 4455 "10000001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 26
+ 4456 "10011000" // GE r7, r24, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4457 "01101001" // /* MW 3 */
+ 4458 "00001110" // /* MW 2 */
+ 4459 "00010110" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 4
+ 4460 "10000100" // JNZ r7, #4624 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4624 delay_slots=5 */
+ 4461 "00000001" // /* MW 5 */
+ 4462 "01000000" // /* MW 4 */
+ 4463 "00001000" // /* MW 3 */
+ 4464 "00001001" // /* MW 2 */
+ 4465 "00111000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4475 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 499 4
+ 4476 "10111010" // MOVA dc1, #0; MOVXM ls, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4477 "00010000" // /* MW 9 */
+ 4478 "00000000" // /* MW 8 */
+ 4479 "01111001" // /* MW 7 */
+ 4480 "00000100" // /* MW 6 */
+ 4481 "00000000" // /* MW 5 */
+ 4482 "00000000" // /* MW 4 */
+ 4483 "10000000" // /* MW 3 */
+ 4484 "00000111" // /* MW 2 */
+ 4485 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 499 4
+ 4486 "10111010" // MOVA dj1, #16; MOVXM le, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4487 "00010000" // /* MW 9 */
+ 4488 "00000000" // /* MW 8 */
+ 4489 "10111001" // /* MW 7 */
+ 4490 "00000101" // /* MW 6 */
+ 4491 "00000000" // /* MW 5 */
+ 4492 "00000000" // /* MW 4 */
+ 4493 "10000000" // /* MW 3 */
+ 4494 "00000110" // /* MW 2 */
+ 4495 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 4
+ 4496 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4497 "00000000" // /* MW 15 */
+ 4498 "00000000" // /* MW 14 */
+ 4499 "01111000" // /* MW 13 */
+ 4500 "10010000" // /* MW 12 */
+ 4501 "10111001" // /* MW 11 */
+ 4502 "00000010" // /* MW 10 */
+ 4503 "00000000" // /* MW 9 */
+ 4504 "00000000" // /* MW 8 */
+ 4505 "01011011" // /* MW 7 */
+ 4506 "00000001" // /* MW 6 */
+ 4507 "00100000" // /* MW 5 */
+ 4508 "00000000" // /* MW 4 */
+ 4509 "11110000" // /* MW 3 */
+ 4510 "00101100" // /* MW 2 */
+ 4511 "00000000" // /* MW 1 */
+ 4512 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4513 "00000000" // /* MW 15 */
+ 4514 "00000000" // /* MW 14 */
+ 4515 "01111000" // /* MW 13 */
+ 4516 "10100101" // /* MW 12 */
+ 4517 "00000001" // /* MW 11 */
+ 4518 "00000000" // /* MW 10 */
+ 4519 "00000000" // /* MW 9 */
+ 4520 "00000000" // /* MW 8 */
+ 4521 "01011011" // /* MW 7 */
+ 4522 "00000001" // /* MW 6 */
+ 4523 "00100000" // /* MW 5 */
+ 4524 "00000000" // /* MW 4 */
+ 4525 "11110000" // /* MW 3 */
+ 4526 "00101100" // /* MW 2 */
+ 4527 "00000000" // /* MW 1 */
+ 4528 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4529 "00000000" // /* MW 15 */
+ 4530 "00000000" // /* MW 14 */
+ 4531 "01111000" // /* MW 13 */
+ 4532 "10100101" // /* MW 12 */
+ 4533 "00000001" // /* MW 11 */
+ 4534 "00000000" // /* MW 10 */
+ 4535 "00000000" // /* MW 9 */
+ 4536 "00000000" // /* MW 8 */
+ 4537 "01011011" // /* MW 7 */
+ 4538 "00000001" // /* MW 6 */
+ 4539 "00100000" // /* MW 5 */
+ 4540 "00000000" // /* MW 4 */
+ 4541 "11110000" // /* MW 3 */
+ 4542 "00101100" // /* MW 2 */
+ 4543 "00000000" // /* MW 1 */
+ 4544 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4545 "00000000" // /* MW 15 */
+ 4546 "00000000" // /* MW 14 */
+ 4547 "01111000" // /* MW 13 */
+ 4548 "10100101" // /* MW 12 */
+ 4549 "00000001" // /* MW 11 */
+ 4550 "00000000" // /* MW 10 */
+ 4551 "00000000" // /* MW 9 */
+ 4552 "00000000" // /* MW 8 */
+ 4553 "01011011" // /* MW 7 */
+ 4554 "00000001" // /* MW 6 */
+ 4555 "00100000" // /* MW 5 */
+ 4556 "00000000" // /* MW 4 */
+ 4557 "11110000" // /* MW 3 */
+ 4558 "00101100" // /* MW 2 */
+ 4559 "00000000" // /* MW 1 */
+ 4560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4561 "00000000" // /* MW 15 */
+ 4562 "00000000" // /* MW 14 */
+ 4563 "01111000" // /* MW 13 */
+ 4564 "10100101" // /* MW 12 */
+ 4565 "00000001" // /* MW 11 */
+ 4566 "00000000" // /* MW 10 */
+ 4567 "00000000" // /* MW 9 */
+ 4568 "00000000" // /* MW 8 */
+ 4569 "01011011" // /* MW 7 */
+ 4570 "00000001" // /* MW 6 */
+ 4571 "00100000" // /* MW 5 */
+ 4572 "00000000" // /* MW 4 */
+ 4573 "11110000" // /* MW 3 */
+ 4574 "00101100" // /* MW 2 */
+ 4575 "00000000" // /* MW 1 */
+ 4576 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4577 "00000000" // /* MW 15 */
+ 4578 "00000000" // /* MW 14 */
+ 4579 "01111000" // /* MW 13 */
+ 4580 "10100101" // /* MW 12 */
+ 4581 "00000001" // /* MW 11 */
+ 4582 "00000000" // /* MW 10 */
+ 4583 "00000000" // /* MW 9 */
+ 4584 "00000000" // /* MW 8 */
+ 4585 "01011011" // /* MW 7 */
+ 4586 "00000001" // /* MW 6 */
+ 4587 "00100000" // /* MW 5 */
+ 4588 "00000000" // /* MW 4 */
+ 4589 "11110000" // /* MW 3 */
+ 4590 "00101100" // /* MW 2 */
+ 4591 "00000000" // /* MW 1 */
+ 4592 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4593 "00000000" // /* MW 15 */
+ 4594 "00000000" // /* MW 14 */
+ 4595 "01111000" // /* MW 13 */
+ 4596 "10100101" // /* MW 12 */
+ 4597 "00000001" // /* MW 11 */
+ 4598 "00000000" // /* MW 10 */
+ 4599 "00000000" // /* MW 9 */
+ 4600 "00000000" // /* MW 8 */
+ 4601 "01011011" // /* MW 7 */
+ 4602 "00000001" // /* MW 6 */
+ 4603 "00100000" // /* MW 5 */
+ 4604 "00000000" // /* MW 4 */
+ 4605 "11110000" // /* MW 3 */
+ 4606 "00101100" // /* MW 2 */
+ 4607 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4608 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4609 "00000000" // /* MW 15 */
+ 4610 "00000000" // /* MW 14 */
+ 4611 "01111000" // /* MW 13 */
+ 4612 "10100101" // /* MW 12 */
+ 4613 "00000001" // /* MW 11 */
+ 4614 "00000000" // /* MW 10 */
+ 4615 "00000000" // /* MW 9 */
+ 4616 "00000000" // /* MW 8 */
+ 4617 "00101110" // /* MW 7 */
+ 4618 "00110000" // /* MW 6 */
+ 4619 "00100010" // /* MW 5 */
+ 4620 "00000000" // /* MW 4 */
+ 4621 "11110000" // /* MW 3 */
+ 4622 "00101100" // /* MW 2 */
+ 4623 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448
+.src_ref 3 "pad_3d.h" 514 39
+.loop_nesting 0
+ 4624 "01000100" // MOVXM r7, #2147483640 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4625 "11110000" // /* MW 5 */
+ 4626 "10111111" // /* MW 4 */
+ 4627 "11110011" // /* MW 3 */
+ 4628 "11111111" // /* MW 2 */
+ 4629 "01111111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 39 first
+ 4630 "10011000" // AND r7, r7, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4631 "01000100" // /* MW 3 */
+ 4632 "11001110" // /* MW 2 */
+ 4633 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 35
+ 4634 "10011000" // SUB r7, r5, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4635 "01110001" // /* MW 3 */
+ 4636 "01001110" // /* MW 2 */
+ 4637 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55
+ 4638 "10011000" // MUL r7, r7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4639 "00001111" // /* MW 3 */
+ 4640 "11001110" // /* MW 2 */
+ 4641 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 511 25 first
+ 4642 "10011000" // ASHL r2, r4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4643 "00101110" // /* MW 3 */
+ 4644 "00000100" // /* MW 2 */
+ 4645 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 36 first
+ 4646 "10011000" // SUB r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4647 "01000001" // /* MW 3 */
+ 4648 "01001000" // /* MW 2 */
+ 4649 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 515 30 first
+ 4650 "10011000" // MUL r2, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4651 "00001111" // /* MW 3 */
+ 4652 "10000100" // /* MW 2 */
+ 4653 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 28 first
+ 4654 "10011000" // MUL r0, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4655 "00001111" // /* MW 3 */
+ 4656 "00000000" // /* MW 2 */
+ 4657 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 514 55
+.src_ref 3 "pad_3d.h" 517 39 first
+ 4658 "01100100" // MUL r1, r1, r2; MOV r6, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4659 "00000101" // /* MW 5 */
+ 4660 "00100000" // /* MW 4 */
+ 4661 "11110011" // /* MW 3 */
+ 4662 "01000101" // /* MW 2 */
+ 4663 "00001000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21 first
+ 4664 "10011000" // LSHL r0, r0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4665 "01101101" // /* MW 3 */
+ 4666 "00000000" // /* MW 2 */
+ 4667 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 517 22 first
+ 4668 "10100100" // GE r0, r24, r1; ADD.NC p2, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4669 "00000010" // /* MW 5 */
+ 4670 "11000011" // /* MW 4 */
+ 4671 "00110100" // /* MW 3 */
+ 4672 "00000011" // /* MW 2 */
+ 4673 "11000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 517 4
+ 4674 "10000100" // JNZ r0, #4832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */
+ 4675 "00000001" // /* MW 5 */
+ 4676 "01000000" // /* MW 4 */
+ 4677 "01110000" // /* MW 3 */
+ 4678 "00001001" // /* MW 2 */
+ 4679 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4681 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4683 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55 first
+.delay_slot
+ 4684 "10011000" // LSHL r4, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4685 "01101101" // /* MW 3 */
+ 4686 "11001000" // /* MW 2 */
+ 4687 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55
+.delay_slot
+ 4688 "00011000" // ADD.NC m0, r4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4689 "00001000" // /* MW 3 */
+ 4690 "00000010" // /* MW 2 */
+ 4691 "00011000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 515 37 first
+.delay_slot
+ 4692 "10011000" // ADD.NC dn0, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4693 "01111111" // /* MW 3 */
+ 4694 "01000001" // /* MW 2 */
+ 4695 "00011000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 517 4 first
+ 4696 "10111010" // MOVA dc0, #0; MOVXM ls, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4697 "00010000" // /* MW 9 */
+ 4698 "01101000" // /* MW 8 */
+ 4699 "01111001" // /* MW 7 */
+ 4700 "00000100" // /* MW 6 */
+ 4701 "00000000" // /* MW 5 */
+ 4702 "00000000" // /* MW 4 */
+ 4703 "10000000" // /* MW 3 */
+ 4704 "00000011" // /* MW 2 */
+ 4705 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 517 4
+ 4706 "10111010" // MOVA dj0, #16; MOVXM le, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4707 "00010000" // /* MW 9 */
+ 4708 "01101000" // /* MW 8 */
+ 4709 "10111001" // /* MW 7 */
+ 4710 "00000101" // /* MW 6 */
+ 4711 "00000000" // /* MW 5 */
+ 4712 "00000000" // /* MW 4 */
+ 4713 "10000000" // /* MW 3 */
+ 4714 "00000010" // /* MW 2 */
+ 4715 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 517 4
+ 4716 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4717 "10100000" // /* MW 3 */
+ 4718 "01110000" // /* MW 2 */
+ 4719 "00011101" // /* MW 1 */
+ 4720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4721 "00000000" // /* MW 15 */
+ 4722 "00000000" // /* MW 14 */
+ 4723 "01111000" // /* MW 13 */
+ 4724 "10100101" // /* MW 12 */
+ 4725 "00000001" // /* MW 11 */
+ 4726 "00000000" // /* MW 10 */
+ 4727 "00000000" // /* MW 9 */
+ 4728 "00000000" // /* MW 8 */
+ 4729 "01011011" // /* MW 7 */
+ 4730 "00000001" // /* MW 6 */
+ 4731 "00100000" // /* MW 5 */
+ 4732 "00000000" // /* MW 4 */
+ 4733 "11110000" // /* MW 3 */
+ 4734 "00101100" // /* MW 2 */
+ 4735 "00000000" // /* MW 1 */
+ 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4737 "00000000" // /* MW 15 */
+ 4738 "00000000" // /* MW 14 */
+ 4739 "01111000" // /* MW 13 */
+ 4740 "10100101" // /* MW 12 */
+ 4741 "00000001" // /* MW 11 */
+ 4742 "00000000" // /* MW 10 */
+ 4743 "00000000" // /* MW 9 */
+ 4744 "00000000" // /* MW 8 */
+ 4745 "01011011" // /* MW 7 */
+ 4746 "00000001" // /* MW 6 */
+ 4747 "00100000" // /* MW 5 */
+ 4748 "00000000" // /* MW 4 */
+ 4749 "11110000" // /* MW 3 */
+ 4750 "00101100" // /* MW 2 */
+ 4751 "00000000" // /* MW 1 */
+ 4752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4753 "00000000" // /* MW 15 */
+ 4754 "00000000" // /* MW 14 */
+ 4755 "01111000" // /* MW 13 */
+ 4756 "10100101" // /* MW 12 */
+ 4757 "00000001" // /* MW 11 */
+ 4758 "00000000" // /* MW 10 */
+ 4759 "00000000" // /* MW 9 */
+ 4760 "00000000" // /* MW 8 */
+ 4761 "01011011" // /* MW 7 */
+ 4762 "00000001" // /* MW 6 */
+ 4763 "00100000" // /* MW 5 */
+ 4764 "00000000" // /* MW 4 */
+ 4765 "11110000" // /* MW 3 */
+ 4766 "00101100" // /* MW 2 */
+ 4767 "00000000" // /* MW 1 */
+ 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4769 "00000000" // /* MW 15 */
+ 4770 "00000000" // /* MW 14 */
+ 4771 "01111000" // /* MW 13 */
+ 4772 "10100101" // /* MW 12 */
+ 4773 "00000001" // /* MW 11 */
+ 4774 "00000000" // /* MW 10 */
+ 4775 "00000000" // /* MW 9 */
+ 4776 "00000000" // /* MW 8 */
+ 4777 "01011011" // /* MW 7 */
+ 4778 "00000001" // /* MW 6 */
+ 4779 "00100000" // /* MW 5 */
+ 4780 "00000000" // /* MW 4 */
+ 4781 "11110000" // /* MW 3 */
+ 4782 "00101100" // /* MW 2 */
+ 4783 "00000000" // /* MW 1 */
+ 4784 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4785 "00000000" // /* MW 15 */
+ 4786 "00000000" // /* MW 14 */
+ 4787 "01111000" // /* MW 13 */
+ 4788 "10100101" // /* MW 12 */
+ 4789 "00000001" // /* MW 11 */
+ 4790 "00000000" // /* MW 10 */
+ 4791 "00000000" // /* MW 9 */
+ 4792 "00000000" // /* MW 8 */
+ 4793 "01011011" // /* MW 7 */
+ 4794 "00000001" // /* MW 6 */
+ 4795 "00100000" // /* MW 5 */
+ 4796 "00000000" // /* MW 4 */
+ 4797 "11110000" // /* MW 3 */
+ 4798 "00101100" // /* MW 2 */
+ 4799 "00000000" // /* MW 1 */
+ 4800 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4801 "00000000" // /* MW 15 */
+ 4802 "00000000" // /* MW 14 */
+ 4803 "01111000" // /* MW 13 */
+ 4804 "10100101" // /* MW 12 */
+ 4805 "00000001" // /* MW 11 */
+ 4806 "00000000" // /* MW 10 */
+ 4807 "00000000" // /* MW 9 */
+ 4808 "00000000" // /* MW 8 */
+ 4809 "01011011" // /* MW 7 */
+ 4810 "00000001" // /* MW 6 */
+ 4811 "00100000" // /* MW 5 */
+ 4812 "00000000" // /* MW 4 */
+ 4813 "11110000" // /* MW 3 */
+ 4814 "00101100" // /* MW 2 */
+ 4815 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4816 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4817 "00000000" // /* MW 15 */
+ 4818 "00000000" // /* MW 14 */
+ 4819 "01111000" // /* MW 13 */
+ 4820 "10100101" // /* MW 12 */
+ 4821 "00000001" // /* MW 11 */
+ 4822 "00000000" // /* MW 10 */
+ 4823 "00000000" // /* MW 9 */
+ 4824 "00000000" // /* MW 8 */
+ 4825 "00101110" // /* MW 7 */
+ 4826 "00010000" // /* MW 6 */
+ 4827 "00100010" // /* MW 5 */
+ 4828 "00000000" // /* MW 4 */
+ 4829 "11110000" // /* MW 3 */
+ 4830 "00101100" // /* MW 2 */
+ 4831 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656
+.src_ref 3 "pad_3d.h" 282 first
+.loop_nesting 0
+ 4832 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4833 "00000000" // /* MW 3 */
+ 4834 "00101000" // /* MW 2 */
+ 4835 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4837 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4839 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4841 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4843 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0
+ 4845 "00000000" // /* MW 1 */
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.function run _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 2 "reduce_base_c8.h" 352 30
+.src_ref 2 "reduce_base_c8.h" 362 first
+.src_ref 2 "reduce_base_c8.h" 365 18
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+.function_start
+ 4848 "11111000" // MOV r3, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4849 "11000000" // /* MW 3 */
+ 4850 "11010100" // /* MW 2 */
+ 4851 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 365 18 first
+ 4852 "00000010" // MOVS dn3, p7; ADD.NC p7, r3, #44 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4853 "00000000" // /* MW 7 */
+ 4854 "11001011" // /* MW 6 */
+ 4855 "10110000" // /* MW 5 */
+ 4856 "00000011" // /* MW 4 */
+ 4857 "01100000" // /* MW 3 */
+ 4858 "10010001" // /* MW 2 */
+ 4859 "01101011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 367 19 first
+ 4860 "10011000" // LDA.u16 r0, [p7], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4861 "00011010" // /* MW 3 */
+ 4862 "10001100" // /* MW 2 */
+ 4863 "00000111" // /* MW 1 */
+ 4864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4865 "00000000" // /* MW 1 */
+ 4866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4867 "00000000" // /* MW 1 */
+ 4868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4869 "00000000" // /* MW 1 */
+ 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4871 "00000000" // /* MW 1 */
+ 4872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4873 "00000000" // /* MW 1 */
+ 4874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4875 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 367 12
+.src_ref 2 "reduce_base_c8.h" 367 19
+ 4876 "10000100" // JNZ r0, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */
+ 4877 "00000001" // /* MW 5 */
+ 4878 "01000000" // /* MW 4 */
+ 4879 "11110000" // /* MW 3 */
+ 4880 "00001001" // /* MW 2 */
+ 4881 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 101 18
+.src_ref 5 "broadcast.hpp" 80 25
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 2 "reduce_base_c8.h" 372 34
+.delay_slot
+ 4882 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4883 "00000001" // /* MW 3 */
+ 4884 "00100000" // /* MW 2 */
+ 4885 "00010000" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+.delay_slot
+ 4886 "11111000" // VBCST.32 x1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4887 "01110010" // /* MW 3 */
+ 4888 "11000010" // /* MW 2 */
+ 4889 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4891 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4893 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 362
+.delay_slot
+ 4894 "11000100" // PADDXM [sp], #256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4895 "00000001" // /* MW 5 */
+ 4896 "00000000" // /* MW 4 */
+ 4897 "00000000" // /* MW 3 */
+ 4898 "00100000" // /* MW 2 */
+ 4899 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 43
+ 4900 "10111000" // MOV dj2, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4901 "01001000" // /* MW 3 */
+ 4902 "10000000" // /* MW 2 */
+ 4903 "00011010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 43 first
+ 4904 "10011000" // LDA r1, [p2, dj2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4905 "00110110" // /* MW 3 */
+ 4906 "01000000" // /* MW 2 */
+ 4907 "00000010" // /* MW 1 */
+ 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4909 "00000000" // /* MW 1 */
+ 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4911 "00000000" // /* MW 1 */
+ 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4913 "00000000" // /* MW 1 */
+ 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4915 "00000000" // /* MW 1 */
+ 4916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4917 "00000000" // /* MW 1 */
+ 4918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4919 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 34
+ 4920 "10011000" // GE r2, r16, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4921 "00011001" // /* MW 3 */
+ 4922 "00000100" // /* MW 2 */
+ 4923 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4924 "10000100" // JNZ r2, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */
+ 4925 "00000001" // /* MW 5 */
+ 4926 "01000000" // /* MW 4 */
+ 4927 "11110000" // /* MW 3 */
+ 4928 "00001001" // /* MW 2 */
+ 4929 "00010000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 2 "reduce_base_c8.h" 374 29
+.delay_slot
+ 4930 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4931 "10010010" // /* MW 3 */
+ 4932 "00000010" // /* MW 2 */
+ 4933 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4935 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4941 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 2 "reduce_base_c8.h" 372 12
+.src_ref 2 "reduce_base_c8.h" 374 29
+ 4942 "01110110" // NOPA; MOVS p3, p1; MOVXM ls, #5072 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4943 "00010000" // /* MW 11 */
+ 4944 "11101000" // /* MW 10 */
+ 4945 "01111001" // /* MW 9 */
+ 4946 "00000100" // /* MW 8 */
+ 4947 "00000000" // /* MW 7 */
+ 4948 "00000000" // /* MW 6 */
+ 4949 "10001011" // /* MW 5 */
+ 4950 "10000100" // /* MW 4 */
+ 4951 "11110011" // /* MW 3 */
+ 4952 "00101100" // /* MW 2 */
+ 4953 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4954 "01000100" // MOVXM le, #5072 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4955 "10100000" // /* MW 5 */
+ 4956 "11100111" // /* MW 4 */
+ 4957 "00010110" // /* MW 3 */
+ 4958 "00000000" // /* MW 2 */
+ 4959 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4960 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4961 "00000000" // /* MW 15 */
+ 4962 "00000000" // /* MW 14 */
+ 4963 "01111000" // /* MW 13 */
+ 4964 "01010000" // /* MW 12 */
+ 4965 "10111000" // /* MW 11 */
+ 4966 "00000010" // /* MW 10 */
+ 4967 "00000000" // /* MW 9 */
+ 4968 "00000000" // /* MW 8 */
+ 4969 "01011011" // /* MW 7 */
+ 4970 "00000001" // /* MW 6 */
+ 4971 "00100000" // /* MW 5 */
+ 4972 "00000000" // /* MW 4 */
+ 4973 "11110000" // /* MW 3 */
+ 4974 "00101100" // /* MW 2 */
+ 4975 "00000000" // /* MW 1 */
+ 4976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4977 "00000000" // /* MW 15 */
+ 4978 "00000000" // /* MW 14 */
+ 4979 "01111000" // /* MW 13 */
+ 4980 "10100101" // /* MW 12 */
+ 4981 "00000001" // /* MW 11 */
+ 4982 "00000000" // /* MW 10 */
+ 4983 "00000000" // /* MW 9 */
+ 4984 "00000000" // /* MW 8 */
+ 4985 "01011011" // /* MW 7 */
+ 4986 "00000001" // /* MW 6 */
+ 4987 "00100000" // /* MW 5 */
+ 4988 "00000000" // /* MW 4 */
+ 4989 "11110000" // /* MW 3 */
+ 4990 "00101100" // /* MW 2 */
+ 4991 "00000000" // /* MW 1 */
+ 4992 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4993 "00000000" // /* MW 15 */
+ 4994 "00000000" // /* MW 14 */
+ 4995 "01111000" // /* MW 13 */
+ 4996 "10100101" // /* MW 12 */
+ 4997 "00000001" // /* MW 11 */
+ 4998 "00000000" // /* MW 10 */
+ 4999 "00000000" // /* MW 9 */
+ 5000 "00000000" // /* MW 8 */
+ 5001 "01011011" // /* MW 7 */
+ 5002 "00000001" // /* MW 6 */
+ 5003 "00100000" // /* MW 5 */
+ 5004 "00000000" // /* MW 4 */
+ 5005 "11110000" // /* MW 3 */
+ 5006 "00101100" // /* MW 2 */
+ 5007 "00000000" // /* MW 1 */
+ 5008 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5009 "00000000" // /* MW 15 */
+ 5010 "00000000" // /* MW 14 */
+ 5011 "01111000" // /* MW 13 */
+ 5012 "10100101" // /* MW 12 */
+ 5013 "00000001" // /* MW 11 */
+ 5014 "00000000" // /* MW 10 */
+ 5015 "00000000" // /* MW 9 */
+ 5016 "00000000" // /* MW 8 */
+ 5017 "01011011" // /* MW 7 */
+ 5018 "00000001" // /* MW 6 */
+ 5019 "00100000" // /* MW 5 */
+ 5020 "00000000" // /* MW 4 */
+ 5021 "11110000" // /* MW 3 */
+ 5022 "00101100" // /* MW 2 */
+ 5023 "00000000" // /* MW 1 */
+ 5024 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5025 "00000000" // /* MW 15 */
+ 5026 "00000000" // /* MW 14 */
+ 5027 "01111000" // /* MW 13 */
+ 5028 "10100101" // /* MW 12 */
+ 5029 "00000001" // /* MW 11 */
+ 5030 "00000000" // /* MW 10 */
+ 5031 "00000000" // /* MW 9 */
+ 5032 "00000000" // /* MW 8 */
+ 5033 "01011011" // /* MW 7 */
+ 5034 "00000001" // /* MW 6 */
+ 5035 "00100000" // /* MW 5 */
+ 5036 "00000000" // /* MW 4 */
+ 5037 "11110000" // /* MW 3 */
+ 5038 "00101100" // /* MW 2 */
+ 5039 "00000000" // /* MW 1 */
+ 5040 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5041 "00000000" // /* MW 15 */
+ 5042 "00000000" // /* MW 14 */
+ 5043 "01111000" // /* MW 13 */
+ 5044 "10100101" // /* MW 12 */
+ 5045 "00000001" // /* MW 11 */
+ 5046 "00000000" // /* MW 10 */
+ 5047 "00000000" // /* MW 9 */
+ 5048 "00000000" // /* MW 8 */
+ 5049 "01011011" // /* MW 7 */
+ 5050 "00000001" // /* MW 6 */
+ 5051 "00100000" // /* MW 5 */
+ 5052 "00000000" // /* MW 4 */
+ 5053 "11110000" // /* MW 3 */
+ 5054 "00101100" // /* MW 2 */
+ 5055 "00000000" // /* MW 1 */
+ 5056 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5057 "00000000" // /* MW 15 */
+ 5058 "00000000" // /* MW 14 */
+ 5059 "01111000" // /* MW 13 */
+ 5060 "10100101" // /* MW 12 */
+ 5061 "00000001" // /* MW 11 */
+ 5062 "00000000" // /* MW 10 */
+ 5063 "00000000" // /* MW 9 */
+ 5064 "00000000" // /* MW 8 */
+ 5065 "01011011" // /* MW 7 */
+ 5066 "00000001" // /* MW 6 */
+ 5067 "00100000" // /* MW 5 */
+ 5068 "00000000" // /* MW 4 */
+ 5069 "11110000" // /* MW 3 */
+ 5070 "00101100" // /* MW 2 */
+ 5071 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 2 "reduce_base_c8.h" 374 29 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 5072 "11100001" // NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5073 "00000000" // /* MW 15 */
+ 5074 "00000000" // /* MW 14 */
+ 5075 "01111000" // /* MW 13 */
+ 5076 "10100101" // /* MW 12 */
+ 5077 "00000001" // /* MW 11 */
+ 5078 "00000000" // /* MW 10 */
+ 5079 "00000000" // /* MW 9 */
+ 5080 "10000000" // /* MW 8 */
+ 5081 "00000110" // /* MW 7 */
+ 5082 "00011101" // /* MW 6 */
+ 5083 "00100011" // /* MW 5 */
+ 5084 "00000000" // /* MW 4 */
+ 5085 "11110000" // /* MW 3 */
+ 5086 "00101100" // /* MW 2 */
+ 5087 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 412 41
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.loop_nesting 0
+ 5088 "10111000" // MOV m4, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5089 "01000000" // /* MW 3 */
+ 5090 "00000000" // /* MW 2 */
+ 5091 "00011100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28 first
+ 5092 "10011000" // LDA.u16 r17, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5093 "00111010" // /* MW 3 */
+ 5094 "10001010" // /* MW 2 */
+ 5095 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 388 28
+ 5096 "01010100" // LDA.s16 r22, [p7], #-2; MOV m5, #-58 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5097 "00011001" // /* MW 5 */
+ 5098 "00011111" // /* MW 4 */
+ 5099 "01011010" // /* MW 3 */
+ 5100 "11011010" // /* MW 2 */
+ 5101 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 570 33
+ 5102 "01010100" // LDA.u16 r26, [p7], m5; MOV dj0, #46 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5103 "10111001" // /* MW 5 */
+ 5104 "00000000" // /* MW 4 */
+ 5105 "01010001" // /* MW 3 */
+ 5106 "01101011" // /* MW 2 */
+ 5107 "11110101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 570 33 first
+.src_ref 2 "reduce_base_c8.h" 594 43
+ 5108 "11010100" // LDA.s16 r20, [p7, dj0]; MOV r19, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5109 "10000001" // /* MW 5 */
+ 5110 "10111101" // /* MW 4 */
+ 5111 "01011001" // /* MW 3 */
+ 5112 "01010010" // /* MW 2 */
+ 5113 "11100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 594 43 first
+ 5114 "00010100" // LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5115 "00111000" // /* MW 5 */
+ 5116 "11010011" // /* MW 4 */
+ 5117 "01010110" // /* MW 3 */
+ 5118 "01001110" // /* MW 2 */
+ 5119 "11100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 594 43
+ 5120 "10011000" // LDA.s16 r21, [p3], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5121 "10110010" // /* MW 3 */
+ 5122 "11011110" // /* MW 2 */
+ 5123 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 594 64
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 5124 "10011000" // LDA.u16 r28, [p3], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5125 "10011010" // /* MW 3 */
+ 5126 "11111111" // /* MW 2 */
+ 5127 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 595 56 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 5128 "00101100" // LDA.s16 r17, [p3], #6; MOVX r7, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5129 "00010010" // /* MW 5 */
+ 5130 "00011100" // /* MW 4 */
+ 5131 "01010000" // /* MW 3 */
+ 5132 "11000110" // /* MW 2 */
+ 5133 "01100111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 596 56 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5134 "10111010" // LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5135 "01011000" // /* MW 9 */
+ 5136 "00000000" // /* MW 8 */
+ 5137 "01100000" // /* MW 7 */
+ 5138 "11001010" // /* MW 6 */
+ 5139 "00100111" // /* MW 5 */
+ 5140 "00111111" // /* MW 4 */
+ 5141 "01010000" // /* MW 3 */
+ 5142 "11001010" // /* MW 2 */
+ 5143 "01111110" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 2 "reduce_base_c8.h" 388 28 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 33 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5144 "01110110" // LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5145 "01011000" // /* MW 11 */
+ 5146 "00000001" // /* MW 10 */
+ 5147 "11001000" // /* MW 9 */
+ 5148 "01101100" // /* MW 8 */
+ 5149 "00101001" // /* MW 7 */
+ 5150 "00100011" // /* MW 6 */
+ 5151 "01001011" // /* MW 5 */
+ 5152 "00010000" // /* MW 4 */
+ 5153 "01010010" // /* MW 3 */
+ 5154 "00011110" // /* MW 2 */
+ 5155 "11100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 595 75 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5156 "01110110" // LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5157 "01011000" // /* MW 11 */
+ 5158 "00111100" // /* MW 10 */
+ 5159 "01001000" // /* MW 9 */
+ 5160 "11101100" // /* MW 8 */
+ 5161 "01110011" // /* MW 7 */
+ 5162 "00101100" // /* MW 6 */
+ 5163 "00001011" // /* MW 5 */
+ 5164 "01011010" // /* MW 4 */
+ 5165 "01010010" // /* MW 3 */
+ 5166 "11101111" // /* MW 2 */
+ 5167 "01100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5168 "01110110" // MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5169 "01111000" // /* MW 11 */
+ 5170 "11010000" // /* MW 10 */
+ 5171 "00000001" // /* MW 9 */
+ 5172 "01101101" // /* MW 8 */
+ 5173 "01000011" // /* MW 7 */
+ 5174 "00101001" // /* MW 6 */
+ 5175 "10001011" // /* MW 5 */
+ 5176 "10000100" // /* MW 4 */
+ 5177 "10000011" // /* MW 3 */
+ 5178 "00001010" // /* MW 2 */
+ 5179 "00001000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5180 "10111010" // VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5181 "01111000" // /* MW 9 */
+ 5182 "00010000" // /* MW 8 */
+ 5183 "10000101" // /* MW 7 */
+ 5184 "01101110" // /* MW 6 */
+ 5185 "00110011" // /* MW 5 */
+ 5186 "00100111" // /* MW 4 */
+ 5187 "10110000" // /* MW 3 */
+ 5188 "00010010" // /* MW 2 */
+ 5189 "01101010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5190 "10111010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5191 "01111000" // /* MW 9 */
+ 5192 "11010000" // /* MW 8 */
+ 5193 "00000100" // /* MW 7 */
+ 5194 "01101111" // /* MW 6 */
+ 5195 "00110011" // /* MW 5 */
+ 5196 "00101011" // /* MW 4 */
+ 5197 "00110000" // /* MW 3 */
+ 5198 "01000001" // /* MW 2 */
+ 5199 "00010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5200 "00100100" // LSHL r17, r17, r6; ADD.NC lc, r18, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5201 "11111110" // /* MW 5 */
+ 5202 "11110010" // /* MW 4 */
+ 5203 "10111010" // /* MW 3 */
+ 5204 "01001101" // /* MW 2 */
+ 5205 "10001100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+ 5206 "11100100" // LSHL r17, r18, r6; MOV dj0, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5207 "01000001" // /* MW 5 */
+ 5208 "00010001" // /* MW 4 */
+ 5209 "10110001" // /* MW 3 */
+ 5210 "01001101" // /* MW 2 */
+ 5211 "10010100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+ 5212 "01110110" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5213 "01111000" // /* MW 11 */
+ 5214 "11010000" // /* MW 10 */
+ 5215 "00000100" // /* MW 9 */
+ 5216 "01101100" // /* MW 8 */
+ 5217 "01100011" // /* MW 7 */
+ 5218 "00001110" // /* MW 6 */
+ 5219 "01001011" // /* MW 5 */
+ 5220 "00010000" // /* MW 4 */
+ 5221 "00110000" // /* MW 3 */
+ 5222 "00000001" // /* MW 2 */
+ 5223 "00011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+ 5224 "01001010" // MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5225 "00111101" // /* MW 9 */
+ 5226 "00110000" // /* MW 8 */
+ 5227 "00010100" // /* MW 7 */
+ 5228 "11100100" // /* MW 6 */
+ 5229 "00100000" // /* MW 5 */
+ 5230 "00000011" // /* MW 4 */
+ 5231 "01100111" // /* MW 3 */
+ 5232 "10000001" // /* MW 2 */
+ 5233 "00001011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+ 5234 "10111010" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5235 "01110010" // /* MW 9 */
+ 5236 "01010000" // /* MW 8 */
+ 5237 "01000100" // /* MW 7 */
+ 5238 "00000010" // /* MW 6 */
+ 5239 "00001011" // /* MW 5 */
+ 5240 "01011011" // /* MW 4 */
+ 5241 "00110100" // /* MW 3 */
+ 5242 "00100001" // /* MW 2 */
+ 5243 "00011101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+ 5244 "11010100" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5245 "00000001" // /* MW 5 */
+ 5246 "10010011" // /* MW 4 */
+ 5247 "00110011" // /* MW 3 */
+ 5248 "00110001" // /* MW 2 */
+ 5249 "00000011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 5250 "01100010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5251 "00111101" // /* MW 7 */
+ 5252 "10000000" // /* MW 6 */
+ 5253 "00010001" // /* MW 5 */
+ 5254 "00000100" // /* MW 4 */
+ 5255 "00110000" // /* MW 3 */
+ 5256 "01000001" // /* MW 2 */
+ 5257 "00010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5258 "10011000" // VLDA.2D bmll1, [p3], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5259 "10010101" // /* MW 3 */
+ 5260 "01010000" // /* MW 2 */
+ 5261 "00000011" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5263 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5264 "01011010" // MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5265 "00111101" // /* MW 9 */
+ 5266 "00101000" // /* MW 8 */
+ 5267 "00010000" // /* MW 7 */
+ 5268 "00000010" // /* MW 6 */
+ 5269 "01001100" // /* MW 5 */
+ 5270 "10001111" // /* MW 4 */
+ 5271 "00000000" // /* MW 3 */
+ 5272 "00000000" // /* MW 2 */
+ 5273 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 412 41
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5274 "11010100" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5275 "00000001" // /* MW 5 */
+ 5276 "00010000" // /* MW 4 */
+ 5277 "00110111" // /* MW 3 */
+ 5278 "00000001" // /* MW 2 */
+ 5279 "00011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5280 "11101011" // MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5281 "10000001" // /* MW 15 */
+ 5282 "10100001" // /* MW 14 */
+ 5283 "01111000" // /* MW 13 */
+ 5284 "00000000" // /* MW 12 */
+ 5285 "10000010" // /* MW 11 */
+ 5286 "00001000" // /* MW 10 */
+ 5287 "01000100" // /* MW 9 */
+ 5288 "00000000" // /* MW 8 */
+ 5289 "10001011" // /* MW 7 */
+ 5290 "10000100" // /* MW 6 */
+ 5291 "00100100" // /* MW 5 */
+ 5292 "00000000" // /* MW 4 */
+ 5293 "10000000" // /* MW 3 */
+ 5294 "00000110" // /* MW 2 */
+ 5295 "00001000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5296 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5297 "01100001" // /* MW 15 */
+ 5298 "10010000" // /* MW 14 */
+ 5299 "00010000" // /* MW 13 */
+ 5300 "10010000" // /* MW 12 */
+ 5301 "10111010" // /* MW 11 */
+ 5302 "00000101" // /* MW 10 */
+ 5303 "00000000" // /* MW 9 */
+ 5304 "00000000" // /* MW 8 */
+ 5305 "00001011" // /* MW 7 */
+ 5306 "01011010" // /* MW 6 */
+ 5307 "00100001" // /* MW 5 */
+ 5308 "00000000" // /* MW 4 */
+ 5309 "00110000" // /* MW 3 */
+ 5310 "00100001" // /* MW 2 */
+ 5311 "00011101" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 5312 "10011000" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5313 "10001001" // /* MW 3 */
+ 5314 "00011001" // /* MW 2 */
+ 5315 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5316 "01100110" // VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5317 "00111101" // /* MW 11 */
+ 5318 "10000000" // /* MW 10 */
+ 5319 "00010001" // /* MW 9 */
+ 5320 "10001110" // /* MW 8 */
+ 5321 "10101101" // /* MW 7 */
+ 5322 "00000000" // /* MW 6 */
+ 5323 "00100000" // /* MW 5 */
+ 5324 "00000000" // /* MW 4 */
+ 5325 "10110000" // /* MW 3 */
+ 5326 "00010010" // /* MW 2 */
+ 5327 "01101010" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5328 "11100001" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5329 "00000000" // /* MW 15 */
+ 5330 "00000000" // /* MW 14 */
+ 5331 "01111000" // /* MW 13 */
+ 5332 "10100101" // /* MW 12 */
+ 5333 "00000001" // /* MW 11 */
+ 5334 "00000000" // /* MW 10 */
+ 5335 "00000000" // /* MW 9 */
+ 5336 "00000000" // /* MW 8 */
+ 5337 "01011011" // /* MW 7 */
+ 5338 "00000001" // /* MW 6 */
+ 5339 "00100000" // /* MW 5 */
+ 5340 "00000000" // /* MW 4 */
+ 5341 "00110000" // /* MW 3 */
+ 5342 "01000001" // /* MW 2 */
+ 5343 "00010101" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5345 "00000000" // /* MW 15 */
+ 5346 "00000000" // /* MW 14 */
+ 5347 "01111000" // /* MW 13 */
+ 5348 "10100101" // /* MW 12 */
+ 5349 "00000001" // /* MW 11 */
+ 5350 "00000000" // /* MW 10 */
+ 5351 "00000000" // /* MW 9 */
+ 5352 "00000000" // /* MW 8 */
+ 5353 "01011011" // /* MW 7 */
+ 5354 "00000001" // /* MW 6 */
+ 5355 "00100000" // /* MW 5 */
+ 5356 "00000000" // /* MW 4 */
+ 5357 "11110000" // /* MW 3 */
+ 5358 "00101100" // /* MW 2 */
+ 5359 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5360 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5361 "01000001" // /* MW 15 */
+ 5362 "10000001" // /* MW 14 */
+ 5363 "01111000" // /* MW 13 */
+ 5364 "10100101" // /* MW 12 */
+ 5365 "00000001" // /* MW 11 */
+ 5366 "00000000" // /* MW 10 */
+ 5367 "00000000" // /* MW 9 */
+ 5368 "00000000" // /* MW 8 */
+ 5369 "01011011" // /* MW 7 */
+ 5370 "00000001" // /* MW 6 */
+ 5371 "00100000" // /* MW 5 */
+ 5372 "00000000" // /* MW 4 */
+ 5373 "11110000" // /* MW 3 */
+ 5374 "00101100" // /* MW 2 */
+ 5375 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5376 "11100001" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5377 "00000000" // /* MW 15 */
+ 5378 "00000000" // /* MW 14 */
+ 5379 "01111000" // /* MW 13 */
+ 5380 "10100101" // /* MW 12 */
+ 5381 "00000001" // /* MW 11 */
+ 5382 "00000000" // /* MW 10 */
+ 5383 "00000000" // /* MW 9 */
+ 5384 "10000000" // /* MW 8 */
+ 5385 "00000110" // /* MW 7 */
+ 5386 "00110001" // /* MW 6 */
+ 5387 "00100100" // /* MW 5 */
+ 5388 "00000000" // /* MW 4 */
+ 5389 "00110000" // /* MW 3 */
+ 5390 "00000001" // /* MW 2 */
+ 5391 "00011001" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5392 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5393 "10000001" // /* MW 15 */
+ 5394 "10100001" // /* MW 14 */
+ 5395 "01111000" // /* MW 13 */
+ 5396 "10100101" // /* MW 12 */
+ 5397 "00000001" // /* MW 11 */
+ 5398 "00000000" // /* MW 10 */
+ 5399 "00000000" // /* MW 9 */
+ 5400 "00000000" // /* MW 8 */
+ 5401 "01011011" // /* MW 7 */
+ 5402 "00000001" // /* MW 6 */
+ 5403 "00100000" // /* MW 5 */
+ 5404 "00000000" // /* MW 4 */
+ 5405 "11110000" // /* MW 3 */
+ 5406 "00101100" // /* MW 2 */
+ 5407 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5408 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5409 "01100001" // /* MW 15 */
+ 5410 "10010000" // /* MW 14 */
+ 5411 "01111000" // /* MW 13 */
+ 5412 "10100101" // /* MW 12 */
+ 5413 "00000001" // /* MW 11 */
+ 5414 "00000000" // /* MW 10 */
+ 5415 "00000000" // /* MW 9 */
+ 5416 "00000000" // /* MW 8 */
+ 5417 "01011011" // /* MW 7 */
+ 5418 "00000001" // /* MW 6 */
+ 5419 "00100000" // /* MW 5 */
+ 5420 "00000000" // /* MW 4 */
+ 5421 "00110000" // /* MW 3 */
+ 5422 "00100001" // /* MW 2 */
+ 5423 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 107 23
+.src_ref 2 "reduce_base_c8.h" 412 41 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 5424 "10111010" // LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5425 "00010000" // /* MW 9 */
+ 5426 "11000000" // /* MW 8 */
+ 5427 "10101111" // /* MW 7 */
+ 5428 "00001100" // /* MW 6 */
+ 5429 "00000000" // /* MW 5 */
+ 5430 "00000000" // /* MW 4 */
+ 5431 "01010000" // /* MW 3 */
+ 5432 "00000111" // /* MW 2 */
+ 5433 "11101100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 6 "me_vmult_float_emulated.h" 107 23 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5434 "01001010" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5435 "00111101" // /* MW 9 */
+ 5436 "10000000" // /* MW 8 */
+ 5437 "00010001" // /* MW 7 */
+ 5438 "11100010" // /* MW 6 */
+ 5439 "01110010" // /* MW 5 */
+ 5440 "00010101" // /* MW 4 */
+ 5441 "00110010" // /* MW 3 */
+ 5442 "00110001" // /* MW 2 */
+ 5443 "00000011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 101 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5444 "11111000" // VBCST.16 x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5445 "01110010" // /* MW 3 */
+ 5446 "01000001" // /* MW 2 */
+ 5447 "00011000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5449 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5450 "01001000" // VADD.f dm0, dm1, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5451 "00111101" // /* MW 3 */
+ 5452 "00101000" // /* MW 2 */
+ 5453 "00010000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5454 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5455 "00000110" // /* MW 3 */
+ 5456 "00110001" // /* MW 2 */
+ 5457 "00001100" // /* MW 1 */
+ 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5459 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 412 52 first
+ 5460 "01100010" // ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5461 "00111101" // /* MW 7 */
+ 5462 "00001100" // /* MW 6 */
+ 5463 "00010010" // /* MW 5 */
+ 5464 "11111001" // /* MW 4 */
+ 5465 "01011111" // /* MW 3 */
+ 5466 "00000010" // /* MW 2 */
+ 5467 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 412 31
+ 5468 "10011000" // NE r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5469 "00001000" // /* MW 3 */
+ 5470 "01000000" // /* MW 2 */
+ 5471 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 412 16
+ 5472 "10000100" // JNZ r0, #6368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6368 delay_slots=5 */
+ 5473 "00000001" // /* MW 5 */
+ 5474 "01000000" // /* MW 4 */
+ 5475 "01110000" // /* MW 3 */
+ 5476 "00001100" // /* MW 2 */
+ 5477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5483 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.delay_slot
+ 5484 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5485 "00000110" // /* MW 3 */
+ 5486 "00110001" // /* MW 2 */
+ 5487 "00001100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5489 "00000000" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+.src_ref 3 "reduce_mean_c8_impl.h" 184 15 first
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5490 "00101100" // LDA r6, [p2, #12]; MOVX r5, #3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5491 "00011010" // /* MW 5 */
+ 5492 "00010100" // /* MW 4 */
+ 5493 "11010000" // /* MW 3 */
+ 5494 "10011010" // /* MW 2 */
+ 5495 "01000110" // /* MW 1 */
+ 5496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5497 "00000000" // /* MW 1 */
+ 5498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5499 "00000000" // /* MW 1 */
+ 5500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5501 "00000000" // /* MW 1 */
+ 5502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5503 "00000000" // /* MW 1 */
+ 5504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5505 "00000000" // /* MW 1 */
+ 5506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5507 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5508 "10011000" // GE r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5509 "01101001" // /* MW 3 */
+ 5510 "01001110" // /* MW 2 */
+ 5511 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5512 "10000100" // JNZ r7, #7296 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7296 delay_slots=5 */
+ 5513 "00000001" // /* MW 5 */
+ 5514 "01000000" // /* MW 4 */
+ 5515 "01000000" // /* MW 3 */
+ 5516 "00001110" // /* MW 2 */
+ 5517 "00111000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+.delay_slot
+ 5518 "00011000" // MOVX r0, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5519 "00010001" // /* MW 3 */
+ 5520 "00000000" // /* MW 2 */
+ 5521 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5527 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5529 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5530 "10011000" // NE r5, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5531 "00001000" // /* MW 3 */
+ 5532 "10001010" // /* MW 2 */
+ 5533 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5534 "10000100" // JNZ r5, #6512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6512 delay_slots=5 */
+ 5535 "00000001" // /* MW 5 */
+ 5536 "01000000" // /* MW 4 */
+ 5537 "10111000" // /* MW 3 */
+ 5538 "00001100" // /* MW 2 */
+ 5539 "00101000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5541 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5543 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5549 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 5550 "11100100" // MOVX r17, #257; MOV dc4, lr /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5551 "11000001" // /* MW 5 */
+ 5552 "10000011" // /* MW 4 */
+ 5553 "10101001" // /* MW 3 */
+ 5554 "01000000" // /* MW 2 */
+ 5555 "00100100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 5556 "01000100" // MOVXM r21, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5557 "11111110" // /* MW 5 */
+ 5558 "10111111" // /* MW 4 */
+ 5559 "11111010" // /* MW 3 */
+ 5560 "00000000" // /* MW 2 */
+ 5561 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48
+ 5562 "00101100" // NOPA; MOVX r20, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5563 "00000010" // /* MW 5 */
+ 5564 "01010000" // /* MW 4 */
+ 5565 "11110000" // /* MW 3 */
+ 5566 "00101100" // /* MW 2 */
+ 5567 "00000000" // /* MW 1 */
+.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+ 5568 "01110110" // MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5569 "01011000" // /* MW 11 */
+ 5570 "00111100" // /* MW 10 */
+ 5571 "01001000" // /* MW 9 */
+ 5572 "00001000" // /* MW 8 */
+ 5573 "01010010" // /* MW 7 */
+ 5574 "00000000" // /* MW 6 */
+ 5575 "00001011" // /* MW 5 */
+ 5576 "10000011" // /* MW 4 */
+ 5577 "10000010" // /* MW 3 */
+ 5578 "00001010" // /* MW 2 */
+ 5579 "00001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first
+ 5580 "00101100" // LDA.s16 r6, [p2, dj2]; MOVX r4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5581 "00000010" // /* MW 5 */
+ 5582 "00010001" // /* MW 4 */
+ 5583 "01010000" // /* MW 3 */
+ 5584 "00011010" // /* MW 2 */
+ 5585 "01001000" // /* MW 1 */
+ 5586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5587 "00000000" // /* MW 1 */
+ 5588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5589 "00000000" // /* MW 1 */
+ 5590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5591 "00000000" // /* MW 1 */
+ 5592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5593 "00000000" // /* MW 1 */
+ 5594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5595 "00000000" // /* MW 1 */
+ 5596 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5597 "01100111" // /* MW 3 */
+ 5598 "00000001" // /* MW 2 */
+ 5599 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+ 5600 "11100001" // NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5601 "00000000" // /* MW 15 */
+ 5602 "00000000" // /* MW 14 */
+ 5603 "01111000" // /* MW 13 */
+ 5604 "10100101" // /* MW 12 */
+ 5605 "00000001" // /* MW 11 */
+ 5606 "11110100" // /* MW 10 */
+ 5607 "01010010" // /* MW 9 */
+ 5608 "00001100" // /* MW 8 */
+ 5609 "01011011" // /* MW 7 */
+ 5610 "00000001" // /* MW 6 */
+ 5611 "00100000" // /* MW 5 */
+ 5612 "00000000" // /* MW 4 */
+ 5613 "11110000" // /* MW 3 */
+ 5614 "00101100" // /* MW 2 */
+ 5615 "00000000" // /* MW 1 */
+.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+ 5616 "01110110" // MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5617 "00010000" // /* MW 11 */
+ 5618 "01111000" // /* MW 10 */
+ 5619 "10110010" // /* MW 9 */
+ 5620 "11110011" // /* MW 8 */
+ 5621 "00000001" // /* MW 7 */
+ 5622 "10000000" // /* MW 6 */
+ 5623 "10100101" // /* MW 5 */
+ 5624 "11111101" // /* MW 4 */
+ 5625 "10000111" // /* MW 3 */
+ 5626 "10001010" // /* MW 2 */
+ 5627 "00000100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1289 16
+ 5628 "01110110" // LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5629 "01111000" // /* MW 11 */
+ 5630 "00111001" // /* MW 10 */
+ 5631 "10001011" // /* MW 9 */
+ 5632 "00001000" // /* MW 8 */
+ 5633 "01010000" // /* MW 7 */
+ 5634 "10000000" // /* MW 6 */
+ 5635 "01100101" // /* MW 5 */
+ 5636 "11111010" // /* MW 4 */
+ 5637 "01010111" // /* MW 3 */
+ 5638 "11011100" // /* MW 2 */
+ 5639 "11100000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 1280 49
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1287 41
+.src_ref 5 "vector.hpp" 1288 16
+.src_ref 5 "vector.hpp" 1289 16
+.src_ref 5 "vector.hpp" 1292 26
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35 first
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+ 5640 "01110110" // LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5641 "01111000" // /* MW 11 */
+ 5642 "01001001" // /* MW 10 */
+ 5643 "00000010" // /* MW 9 */
+ 5644 "11101000" // /* MW 8 */
+ 5645 "01100111" // /* MW 7 */
+ 5646 "00111111" // /* MW 6 */
+ 5647 "10001011" // /* MW 5 */
+ 5648 "10000100" // /* MW 4 */
+ 5649 "11010111" // /* MW 3 */
+ 5650 "00011010" // /* MW 2 */
+ 5651 "01001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "vector.hpp" 1280 49
+ 5652 "10111010" // MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5653 "01111000" // /* MW 9 */
+ 5654 "01001001" // /* MW 8 */
+ 5655 "00000010" // /* MW 7 */
+ 5656 "00000001" // /* MW 6 */
+ 5657 "11010010" // /* MW 5 */
+ 5658 "00000010" // /* MW 4 */
+ 5659 "00000000" // /* MW 3 */
+ 5660 "11111000" // /* MW 2 */
+ 5661 "00000011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9 first
+ 5662 "10111010" // MOVA r25, #16; MOVXM ls, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5663 "00010000" // /* MW 9 */
+ 5664 "01000000" // /* MW 8 */
+ 5665 "01111011" // /* MW 7 */
+ 5666 "00000100" // /* MW 6 */
+ 5667 "00000000" // /* MW 5 */
+ 5668 "00000000" // /* MW 4 */
+ 5669 "00000000" // /* MW 3 */
+ 5670 "00011001" // /* MW 2 */
+ 5671 "00000010" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9
+ 5672 "10111010" // VLDA wl2, [sp, #-32]; MOVXM le, #6336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5673 "00010000" // /* MW 9 */
+ 5674 "01100000" // /* MW 8 */
+ 5675 "10111100" // /* MW 7 */
+ 5676 "00000101" // /* MW 6 */
+ 5677 "00000000" // /* MW 5 */
+ 5678 "00000000" // /* MW 4 */
+ 5679 "10110000" // /* MW 3 */
+ 5680 "10010100" // /* MW 2 */
+ 5681 "11111111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 98
+ 5682 "00011000" // MOVX r26, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5683 "00000001" // /* MW 3 */
+ 5684 "01110100" // /* MW 2 */
+ 5685 "00010000" // /* MW 1 */
+ 5686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5687 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "vector.hpp" 1286 72
+.src_ref 7 "accum.hpp" 1108 103
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 5688 "00011000" // MOVX crRnd, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5689 "10000000" // /* MW 3 */
+ 5690 "11111010" // /* MW 2 */
+ 5691 "00010101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 112 19 first
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 5692 "00000010" // VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5693 "00000000" // /* MW 7 */
+ 5694 "10000000" // /* MW 6 */
+ 5695 "10111001" // /* MW 5 */
+ 5696 "00000010" // /* MW 4 */
+ 5697 "11000000" // /* MW 3 */
+ 5698 "00000010" // /* MW 2 */
+ 5699 "00001000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5700 "11111000" // VMOV x3, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5701 "10010010" // /* MW 3 */
+ 5702 "10100000" // /* MW 2 */
+ 5703 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 5704 "01100010" // VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5705 "10000011" // /* MW 7 */
+ 5706 "01000000" // /* MW 6 */
+ 5707 "00010000" // /* MW 5 */
+ 5708 "11100110" // /* MW 4 */
+ 5709 "10010010" // /* MW 3 */
+ 5710 "10100110" // /* MW 2 */
+ 5711 "00000010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 5712 "11111000" // VMOV x6, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5713 "10010010" // /* MW 3 */
+ 5714 "00101010" // /* MW 2 */
+ 5715 "00011011" // /* MW 1 */
+ 5716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5717 "00000000" // /* MW 1 */
+ 5718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5719 "00000000" // /* MW 1 */
+ 5720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5721 "00000000" // /* MW 1 */
+ 5722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5723 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 19 first
+ 5724 "00011000" // VCONV.bf16.fp32 wl3, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5725 "00010110" // /* MW 3 */
+ 5726 "11000000" // /* MW 2 */
+ 5727 "00001001" // /* MW 1 */
+ 5728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5729 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 5730 "01001000" // VMSC.f dm0, dm0, x3, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5731 "10000011" // /* MW 3 */
+ 5732 "00000110" // /* MW 2 */
+ 5733 "00010000" // /* MW 1 */
+ 5734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5735 "00000000" // /* MW 1 */
+ 5736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5737 "00000000" // /* MW 1 */
+ 5738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5739 "00000000" // /* MW 1 */
+ 5740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5741 "00000000" // /* MW 1 */
+ 5742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5743 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+ 5744 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5745 "00000000" // /* MW 15 */
+ 5746 "00000000" // /* MW 14 */
+ 5747 "01111000" // /* MW 13 */
+ 5748 "10100101" // /* MW 12 */
+ 5749 "00000001" // /* MW 11 */
+ 5750 "00001000" // /* MW 10 */
+ 5751 "01110001" // /* MW 9 */
+ 5752 "00000000" // /* MW 8 */
+ 5753 "00010110" // /* MW 7 */
+ 5754 "11000000" // /* MW 6 */
+ 5755 "00100010" // /* MW 5 */
+ 5756 "00000000" // /* MW 4 */
+ 5757 "11110000" // /* MW 3 */
+ 5758 "00101100" // /* MW 2 */
+ 5759 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 3 "reduce_mean_c8_impl.h" 226 22 first
+.begin_of_loop
+.loop_nesting 1
+ 5760 "11110100" // VLDB x7, [p1], #64; VMOV bmhh4, x9 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5761 "00100101" // /* MW 5 */
+ 5762 "10100101" // /* MW 4 */
+ 5763 "10001001" // /* MW 3 */
+ 5764 "10111110" // /* MW 2 */
+ 5765 "00100011" // /* MW 1 */
+ 5766 "11111000" // VMOV bmhh3, x11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5767 "10010010" // /* MW 3 */
+ 5768 "11010110" // /* MW 2 */
+ 5769 "00011011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1280 49
+ 5770 "11111000" // MOV r28, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5771 "11000000" // /* MW 3 */
+ 5772 "00011110" // /* MW 2 */
+ 5773 "00011111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1280 49 first
+ 5774 "10011000" // AND r29, r28, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5775 "10000100" // /* MW 3 */
+ 5776 "00111011" // /* MW 2 */
+ 5777 "00010111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1285 72 first
+ 5778 "00100100" // LT r27, r29, r4; ADD.NC r28, r29, #-32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5779 "11100000" // /* MW 5 */
+ 5780 "00111101" // /* MW 4 */
+ 5781 "01011110" // /* MW 3 */
+ 5782 "11001001" // /* MW 2 */
+ 5783 "11101110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+ 5784 "10011000" // LSHL r30, r22, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5785 "11011101" // /* MW 3 */
+ 5786 "10111101" // /* MW 2 */
+ 5787 "00010101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 98 first
+ 5788 "10011000" // SUB r31, r26, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5789 "11010001" // /* MW 3 */
+ 5790 "10111111" // /* MW 2 */
+ 5791 "00010110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "add_reduce.hpp" 322 47 first
+ 5792 "10100100" // SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5793 "11001101" // /* MW 5 */
+ 5794 "01110000" // /* MW 4 */
+ 5795 "01001000" // /* MW 3 */
+ 5796 "10111100" // /* MW 2 */
+ 5797 "00101111" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+ 5798 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5799 "10010010" // /* MW 3 */
+ 5800 "00010000" // /* MW 2 */
+ 5801 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 7 "accum.hpp" 198 120
+ 5802 "11111000" // VMOV wl8, wh7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5803 "00100010" // /* MW 3 */
+ 5804 "01001110" // /* MW 2 */
+ 5805 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 7 "accum.hpp" 198 120 first
+ 5806 "11111000" // VMOV wl10, wl7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5807 "00100010" // /* MW 3 */
+ 5808 "01001111" // /* MW 2 */
+ 5809 "00011101" // /* MW 1 */
+ 5810 "11111000" // VMOV bmhl4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5811 "10010010" // /* MW 3 */
+ 5812 "10010000" // /* MW 2 */
+ 5813 "00011100" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5814 "11111000" // VMOV bmhl3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5815 "10010010" // /* MW 3 */
+ 5816 "10010100" // /* MW 2 */
+ 5817 "00011011" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 5 "add.hpp" 28 49 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5818 "01100010" // VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5819 "00111101" // /* MW 7 */
+ 5820 "00101000" // /* MW 6 */
+ 5821 "00010011" // /* MW 5 */
+ 5822 "11100110" // /* MW 4 */
+ 5823 "10001010" // /* MW 3 */
+ 5824 "00010010" // /* MW 2 */
+ 5825 "00000010" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5826 "11111000" // VMOV cml1, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5827 "10001010" // /* MW 3 */
+ 5828 "00001110" // /* MW 2 */
+ 5829 "00011001" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 5 "vector.hpp" 243 115 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5830 "01100010" // VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5831 "00111101" // /* MW 7 */
+ 5832 "01010000" // /* MW 6 */
+ 5833 "00010010" // /* MW 5 */
+ 5834 "11100110" // /* MW 4 */
+ 5835 "00100010" // /* MW 3 */
+ 5836 "01001110" // /* MW 2 */
+ 5837 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5838 "11111000" // VMOV bmll2, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5839 "10010010" // /* MW 3 */
+ 5840 "00001110" // /* MW 2 */
+ 5841 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5842 "11011000" // VSHIFT x9, x8, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5843 "01100110" // /* MW 3 */
+ 5844 "11000000" // /* MW 2 */
+ 5845 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5846 "01100010" // VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5847 "00111101" // /* MW 7 */
+ 5848 "00110000" // /* MW 6 */
+ 5849 "00010100" // /* MW 5 */
+ 5850 "11100110" // /* MW 4 */
+ 5851 "10010010" // /* MW 3 */
+ 5852 "00010000" // /* MW 2 */
+ 5853 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5854 "11111000" // VMOV bmll4, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5855 "10010010" // /* MW 3 */
+ 5856 "00010010" // /* MW 2 */
+ 5857 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 151 136 first
+ 5858 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5859 "00010010" // /* MW 3 */
+ 5860 "00101100" // /* MW 2 */
+ 5861 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 243 115 first
+.src_ref 7 "accum.hpp" 151 115
+ 5862 "11111000" // VMOV wl9, wl8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5863 "00100010" // /* MW 3 */
+ 5864 "11010001" // /* MW 2 */
+ 5865 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5866 "11011000" // VSHIFT x8, x9, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5867 "01100110" // /* MW 3 */
+ 5868 "01001000" // /* MW 2 */
+ 5869 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5870 "01100010" // VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5871 "00111101" // /* MW 7 */
+ 5872 "01100100" // /* MW 6 */
+ 5873 "00010001" // /* MW 5 */
+ 5874 "11100110" // /* MW 4 */
+ 5875 "10010010" // /* MW 3 */
+ 5876 "00010000" // /* MW 2 */
+ 5877 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5878 "11111000" // VMOV bmll3, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5879 "10010010" // /* MW 3 */
+ 5880 "00010010" // /* MW 2 */
+ 5881 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22
+ 5882 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5883 "00010010" // /* MW 3 */
+ 5884 "00101000" // /* MW 2 */
+ 5885 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 5886 "11011000" // VSHIFT x10, x8, x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5887 "00011110" // /* MW 3 */
+ 5888 "01000000" // /* MW 2 */
+ 5889 "00011101" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5890 "01100010" // VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5891 "00111101" // /* MW 7 */
+ 5892 "01001100" // /* MW 6 */
+ 5893 "00010010" // /* MW 5 */
+ 5894 "11100110" // /* MW 4 */
+ 5895 "00010010" // /* MW 3 */
+ 5896 "00110000" // /* MW 2 */
+ 5897 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 5898 "11111000" // VMOV bmll3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5899 "10010010" // /* MW 3 */
+ 5900 "00010100" // /* MW 2 */
+ 5901 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5902 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5903 "00111101" // /* MW 7 */
+ 5904 "10001100" // /* MW 6 */
+ 5905 "00010011" // /* MW 5 */
+ 5906 "11000110" // /* MW 4 */
+ 5907 "00011110" // /* MW 3 */
+ 5908 "01000000" // /* MW 2 */
+ 5909 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5910 "11111000" // VMOV bmll3, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5911 "10010010" // /* MW 3 */
+ 5912 "00010000" // /* MW 2 */
+ 5913 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 5914 "11111000" // VMOV x8, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5915 "00010010" // /* MW 3 */
+ 5916 "00100100" // /* MW 2 */
+ 5917 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 5918 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5919 "00111101" // /* MW 7 */
+ 5920 "00110000" // /* MW 6 */
+ 5921 "00010001" // /* MW 5 */
+ 5922 "11000110" // /* MW 4 */
+ 5923 "00011110" // /* MW 3 */
+ 5924 "01000000" // /* MW 2 */
+ 5925 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5926 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5927 "10010010" // /* MW 3 */
+ 5928 "00010000" // /* MW 2 */
+ 5929 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 5930 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5931 "00010010" // /* MW 3 */
+ 5932 "00101000" // /* MW 2 */
+ 5933 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 5934 "01100010" // VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5935 "00111101" // /* MW 7 */
+ 5936 "01010000" // /* MW 6 */
+ 5937 "00010010" // /* MW 5 */
+ 5938 "11000110" // /* MW 4 */
+ 5939 "00000010" // /* MW 3 */
+ 5940 "01000000" // /* MW 2 */
+ 5941 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5942 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5943 "10010010" // /* MW 3 */
+ 5944 "00010000" // /* MW 2 */
+ 5945 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 5946 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5947 "00010010" // /* MW 3 */
+ 5948 "00101100" // /* MW 2 */
+ 5949 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 5950 "11011000" // VSHIFT x8, x8, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5951 "00000010" // /* MW 3 */
+ 5952 "01000000" // /* MW 2 */
+ 5953 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5954 "01100010" // VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5955 "00111101" // /* MW 7 */
+ 5956 "01110000" // /* MW 6 */
+ 5957 "00010011" // /* MW 5 */
+ 5958 "11100110" // /* MW 4 */
+ 5959 "00010010" // /* MW 3 */
+ 5960 "00100100" // /* MW 2 */
+ 5961 "00000101" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 10
+.nohwbrkpt
+.noswbrkpt
+ 5962 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5963 "10010010" // /* MW 3 */
+ 5964 "00010000" // /* MW 2 */
+ 5965 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5966 "01100010" // VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5967 "00111101" // /* MW 7 */
+ 5968 "00110000" // /* MW 6 */
+ 5969 "00010000" // /* MW 5 */
+ 5970 "11000110" // /* MW 4 */
+ 5971 "00000010" // /* MW 3 */
+ 5972 "01010000" // /* MW 2 */
+ 5973 "00000101" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5974 "11111000" // VMOV bmll4, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5975 "10010010" // /* MW 3 */
+ 5976 "00010100" // /* MW 2 */
+ 5977 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 5978 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5979 "00010010" // /* MW 3 */
+ 5980 "00101000" // /* MW 2 */
+ 5981 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+ 5982 "10111000" // VEXTRACT.32 r23, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5983 "00000001" // /* MW 3 */
+ 5984 "11100010" // /* MW 2 */
+ 5985 "00011101" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 5986 "11111000" // VMOV x10, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5987 "00010010" // /* MW 3 */
+ 5988 "00101100" // /* MW 2 */
+ 5989 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+.src_ref 5 "vector.hpp" 1288 16 first
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 5990 "01110100" // VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5991 "00000011" // /* MW 5 */
+ 5992 "01010100" // /* MW 4 */
+ 5993 "10000011" // /* MW 3 */
+ 5994 "11010000" // /* MW 2 */
+ 5995 "11100010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 5996 "11111000" // VMOV x11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5997 "00010010" // /* MW 3 */
+ 5998 "10100000" // /* MW 2 */
+ 5999 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+.src_ref 5 "vector.hpp" 1287 41 first
+.src_ref 5 "broadcast.hpp" 80 25 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6000 "10110100" // VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6001 "00000110" // /* MW 5 */
+ 6002 "10110100" // /* MW 4 */
+ 6003 "10001010" // /* MW 3 */
+ 6004 "11010100" // /* MW 2 */
+ 6005 "11100000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6006 "00111000" // VSEL.32 x9, x10, x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6007 "10100000" // /* MW 3 */
+ 6008 "11010100" // /* MW 2 */
+ 6009 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 853 46 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6010 "01111000" // VINSERT.32 x10, x2, #0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6011 "11010001" // /* MW 3 */
+ 6012 "00010000" // /* MW 2 */
+ 6013 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 853 46
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6014 "01111000" // VINSERT.32 x8, x2, #0, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6015 "11110001" // /* MW 3 */
+ 6016 "00010010" // /* MW 2 */
+ 6017 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 5 "vector.hpp" 1413 19 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6018 "11111000" // VMOV wl11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6019 "00100010" // /* MW 3 */
+ 6020 "11010011" // /* MW 2 */
+ 6021 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 5 "vector.hpp" 1413 19
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6022 "11111000" // VMOV wh11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6023 "00100010" // /* MW 3 */
+ 6024 "10010011" // /* MW 2 */
+ 6025 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 5 "vector.hpp" 1413 19
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6026 "11111000" // VMOV wh8, wl10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6027 "00100010" // /* MW 3 */
+ 6028 "00010101" // /* MW 2 */
+ 6029 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36 first
+ 6030 "00111000" // VSEL.32 x8, x11, x8, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6031 "00000000" // /* MW 3 */
+ 6032 "01011100" // /* MW 2 */
+ 6033 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 6034 "00111000" // VSEL.32 x8, x1, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6035 "00001000" // /* MW 3 */
+ 6036 "00001100" // /* MW 2 */
+ 6037 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 6038 "00111000" // VSEL.32 x7, x8, x7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6039 "10101000" // /* MW 3 */
+ 6040 "11000011" // /* MW 2 */
+ 6041 "00011011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+ 6042 "11111000" // VMOV bmll0, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6043 "10010010" // /* MW 3 */
+ 6044 "00001110" // /* MW 2 */
+ 6045 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6046 "11111000" // VMOV x9, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6047 "10010010" // /* MW 3 */
+ 6048 "10101100" // /* MW 2 */
+ 6049 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+ 6050 "00000010" // VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6051 "01110000" // /* MW 7 */
+ 6052 "01001001" // /* MW 6 */
+ 6053 "00000111" // /* MW 5 */
+ 6054 "00000001" // /* MW 4 */
+ 6055 "11000000" // /* MW 3 */
+ 6056 "00000010" // /* MW 2 */
+ 6057 "01101000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6058 "11111000" // VMOV x8, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6059 "10010010" // /* MW 3 */
+ 6060 "00110010" // /* MW 2 */
+ 6061 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1289 16
+ 6062 "01011010" // LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6063 "10000011" // /* MW 9 */
+ 6064 "01001100" // /* MW 8 */
+ 6065 "00010010" // /* MW 7 */
+ 6066 "00001111" // /* MW 6 */
+ 6067 "11101010" // /* MW 5 */
+ 6068 "11101101" // /* MW 4 */
+ 6069 "11001101" // /* MW 3 */
+ 6070 "10111011" // /* MW 2 */
+ 6071 "00000101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 9 first
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1289 16 first
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id first
+ 6072 "01100010" // SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6073 "10100001" // /* MW 7 */
+ 6074 "11101100" // /* MW 6 */
+ 6075 "00010001" // /* MW 5 */
+ 6076 "10010001" // /* MW 4 */
+ 6077 "00111110" // /* MW 3 */
+ 6078 "00001011" // /* MW 2 */
+ 6079 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 124 9 first
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1289 16
+.aggressive_scheduled_block_id 12
+.noswbrkpt
+ 6080 "01011010" // SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6081 "01100001" // /* MW 9 */
+ 6082 "11101100" // /* MW 8 */
+ 6083 "00010000" // /* MW 7 */
+ 6084 "00101111" // /* MW 6 */
+ 6085 "00001001" // /* MW 5 */
+ 6086 "00110011" // /* MW 4 */
+ 6087 "11100010" // /* MW 3 */
+ 6088 "10100101" // /* MW 2 */
+ 6089 "00000101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 9 first
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6090 "01001000" // VMUL.f dm3, x6, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6091 "00000001" // /* MW 3 */
+ 6092 "11101100" // /* MW 2 */
+ 6093 "00010011" // /* MW 1 */
+ 6094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6095 "00000000" // /* MW 1 */
+ 6096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6097 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 19 first
+ 6098 "00011000" // VCONV.bf16.fp32 wl9, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6099 "00010110" // /* MW 3 */
+ 6100 "11000001" // /* MW 2 */
+ 6101 "00001100" // /* MW 1 */
+ 6102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6103 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 6104 "01001000" // VMSC.f dm2, dm2, x9, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6105 "10000011" // /* MW 3 */
+ 6106 "01010010" // /* MW 2 */
+ 6107 "00010010" // /* MW 1 */
+ 6108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6109 "00000000" // /* MW 1 */
+ 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6111 "00000000" // /* MW 1 */
+ 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6113 "00000000" // /* MW 1 */
+ 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6115 "00000000" // /* MW 1 */
+ 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6117 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6118 "00011000" // VCONV.bf16.fp32 wl8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6119 "00010110" // /* MW 3 */
+ 6120 "01000001" // /* MW 2 */
+ 6121 "00001100" // /* MW 1 */
+ 6122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6123 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+ 6124 "01001000" // VMUL.f dm4, x8, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6125 "10100001" // /* MW 3 */
+ 6126 "11110000" // /* MW 2 */
+ 6127 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 9 first
+ 6128 "01001000" // VMUL.f dm2, x8, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6129 "01100001" // /* MW 3 */
+ 6130 "11110000" // /* MW 2 */
+ 6131 "00010010" // /* MW 1 */
+ 6132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6133 "00000000" // /* MW 1 */
+ 6134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6135 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id first
+ 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6137 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 9 first
+.aggressive_scheduled_block_id 13
+.noswbrkpt
+ 6138 "01001000" // VMUL.f dm2, x9, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6139 "10100001" // /* MW 3 */
+ 6140 "11110010" // /* MW 2 */
+ 6141 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6142 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6143 "00010010" // /* MW 3 */
+ 6144 "01110000" // /* MW 2 */
+ 6145 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6146 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6147 "00111101" // /* MW 3 */
+ 6148 "10001000" // /* MW 2 */
+ 6149 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6150 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6151 "10010010" // /* MW 3 */
+ 6152 "00000101" // /* MW 2 */
+ 6153 "00011100" // /* MW 1 */
+ 6154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6155 "00000000" // /* MW 1 */
+ 6156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6157 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id first
+ 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6159 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 9 first
+.aggressive_scheduled_block_id 14
+.noswbrkpt
+ 6160 "01001000" // VMUL.f dm2, x0, x8, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6161 "00000001" // /* MW 3 */
+ 6162 "11100001" // /* MW 2 */
+ 6163 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6164 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6165 "00010010" // /* MW 3 */
+ 6166 "01110000" // /* MW 2 */
+ 6167 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6168 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6169 "00111101" // /* MW 3 */
+ 6170 "10001000" // /* MW 2 */
+ 6171 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6172 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6173 "10010010" // /* MW 3 */
+ 6174 "00000001" // /* MW 2 */
+ 6175 "00011100" // /* MW 1 */
+ 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6177 "00000000" // /* MW 1 */
+ 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6179 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id first
+ 6180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6181 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 9 first
+.aggressive_scheduled_block_id 15
+.noswbrkpt
+ 6182 "01001000" // VMUL.f dm1, x9, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6183 "01100001" // /* MW 3 */
+ 6184 "11110010" // /* MW 2 */
+ 6185 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6186 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6187 "00010010" // /* MW 3 */
+ 6188 "01110000" // /* MW 2 */
+ 6189 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6190 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6191 "00111101" // /* MW 3 */
+ 6192 "10000100" // /* MW 2 */
+ 6193 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6194 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6195 "10010010" // /* MW 3 */
+ 6196 "00000101" // /* MW 2 */
+ 6197 "00011100" // /* MW 1 */
+ 6198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6199 "00000000" // /* MW 1 */
+ 6200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6201 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id first
+ 6202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6203 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 9 first
+.aggressive_scheduled_block_id 16
+.noswbrkpt
+ 6204 "01001000" // VMUL.f dm1, x9, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6205 "00000001" // /* MW 3 */
+ 6206 "11110010" // /* MW 2 */
+ 6207 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6208 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6209 "00010010" // /* MW 3 */
+ 6210 "01110000" // /* MW 2 */
+ 6211 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6212 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6213 "00111101" // /* MW 3 */
+ 6214 "10000100" // /* MW 2 */
+ 6215 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6216 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6217 "10010010" // /* MW 3 */
+ 6218 "00000001" // /* MW 2 */
+ 6219 "00011100" // /* MW 1 */
+ 6220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6221 "00000000" // /* MW 1 */
+ 6222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6223 "00000000" // /* MW 1 */
+ 6224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6225 "00000000" // /* MW 1 */
+ 6226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6227 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id first
+ 6228 "11111000" // VMOV lfl1, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6229 "00010010" // /* MW 3 */
+ 6230 "01110000" // /* MW 2 */
+ 6231 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 17
+.noswbrkpt
+ 6232 "01001000" // VADD.f dm2, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6233 "00111101" // /* MW 3 */
+ 6234 "10001000" // /* MW 2 */
+ 6235 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6236 "11111000" // VMOV bmll4, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6237 "10010010" // /* MW 3 */
+ 6238 "00010101" // /* MW 2 */
+ 6239 "00011100" // /* MW 1 */
+ 6240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6241 "00000000" // /* MW 1 */
+ 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6243 "00000000" // /* MW 1 */
+ 6244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6245 "00000000" // /* MW 1 */
+ 6246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6247 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id first
+ 6248 "11111000" // VMOV lfh1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6249 "00010010" // /* MW 3 */
+ 6250 "01101000" // /* MW 2 */
+ 6251 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6 first
+.aggressive_scheduled_block_id 18
+.noswbrkpt
+ 6252 "01001000" // VADD.f dm2, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6253 "00111101" // /* MW 3 */
+ 6254 "01000100" // /* MW 2 */
+ 6255 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6256 "11111000" // VMOV bmll2, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6257 "10010010" // /* MW 3 */
+ 6258 "00010001" // /* MW 2 */
+ 6259 "00011010" // /* MW 1 */
+ 6260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6261 "00000000" // /* MW 1 */
+ 6262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6263 "00000000" // /* MW 1 */
+ 6264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6265 "00000000" // /* MW 1 */
+ 6266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6267 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id first
+ 6268 "11111000" // VMOV lfl1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6269 "00010010" // /* MW 3 */
+ 6270 "01101000" // /* MW 2 */
+ 6271 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 19
+.noswbrkpt
+ 6272 "01001000" // VADD.f dm0, dm1, dm0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6273 "00111101" // /* MW 3 */
+ 6274 "00100000" // /* MW 2 */
+ 6275 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6276 "11111000" // VMOV bmll1, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6277 "10010010" // /* MW 3 */
+ 6278 "00010101" // /* MW 2 */
+ 6279 "00011001" // /* MW 1 */
+ 6280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6281 "00000000" // /* MW 1 */
+ 6282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6283 "00000000" // /* MW 1 */
+ 6284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6285 "00000000" // /* MW 1 */
+ 6286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6287 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id first
+ 6288 "11111000" // VMOV lfh1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6289 "00010010" // /* MW 3 */
+ 6290 "01100000" // /* MW 2 */
+ 6291 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6 first
+.aggressive_scheduled_block_id 20
+.noswbrkpt
+ 6292 "01001000" // VADD.f dm0, dm0, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6293 "00111101" // /* MW 3 */
+ 6294 "00001100" // /* MW 2 */
+ 6295 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6296 "11111000" // VMOV bmll0, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6297 "10010010" // /* MW 3 */
+ 6298 "00010001" // /* MW 2 */
+ 6299 "00011000" // /* MW 1 */
+ 6300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6301 "00000000" // /* MW 1 */
+ 6302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6303 "00000000" // /* MW 1 */
+ 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6305 "00000000" // /* MW 1 */
+ 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6307 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 72 first
+.src_ref 7 "accum.hpp" 1108 103 first
+ 6308 "00011000" // VCONV.bf16.fp32 wl11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6309 "00010110" // /* MW 3 */
+ 6310 "11000000" // /* MW 2 */
+ 6311 "00001101" // /* MW 1 */
+ 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6313 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 41
+ 6314 "11011000" // VSHIFT x11, x0, x11, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6315 "11111110" // /* MW 3 */
+ 6316 "10000101" // /* MW 2 */
+ 6317 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1289 16 first
+ 6318 "00111000" // VSEL.8 x11, x10, x11, r19:r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6319 "11001100" // /* MW 3 */
+ 6320 "11010101" // /* MW 2 */
+ 6321 "00011101" // /* MW 1 */
+ 6322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6323 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98 first
+.src_ref 5 "vector.hpp" 1292 26 first
+ 6324 "00110110" // NOPA; NOPB; VST wh11, [p7, #32]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6325 "01000001" // /* MW 11 */
+ 6326 "01100101" // /* MW 10 */
+ 6327 "10001011" // /* MW 9 */
+ 6328 "00000011" // /* MW 8 */
+ 6329 "00000000" // /* MW 7 */
+ 6330 "00000000" // /* MW 6 */
+ 6331 "00100000" // /* MW 5 */
+ 6332 "00000000" // /* MW 4 */
+ 6333 "11110000" // /* MW 3 */
+ 6334 "00101100" // /* MW 2 */
+ 6335 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19 first
+.end_of_loop
+ 6336 "11100001" // NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6337 "00000000" // /* MW 15 */
+ 6338 "00000000" // /* MW 14 */
+ 6339 "01111000" // /* MW 13 */
+ 6340 "10100101" // /* MW 12 */
+ 6341 "00000001" // /* MW 11 */
+ 6342 "00000000" // /* MW 10 */
+ 6343 "00000000" // /* MW 9 */
+ 6344 "10000000" // /* MW 8 */
+ 6345 "11101010" // /* MW 7 */
+ 6346 "10001010" // /* MW 6 */
+ 6347 "00100111" // /* MW 5 */
+ 6348 "00000000" // /* MW 4 */
+ 6349 "11110000" // /* MW 3 */
+ 6350 "00101100" // /* MW 2 */
+ 6351 "00000000" // /* MW 1 */
+.loop_nesting 0
+ 6352 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */
+ 6353 "00000000" // /* MW 5 */
+ 6354 "00000000" // /* MW 4 */
+ 6355 "01111000" // /* MW 3 */
+ 6356 "00001100" // /* MW 2 */
+ 6357 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6359 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6361 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6363 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6365 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6367 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520
+ 6368 "01011100" // ST dn3, [sp, #-4]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6369 "10000000" // /* MW 5 */
+ 6370 "10110100" // /* MW 4 */
+ 6371 "10110000" // /* MW 3 */
+ 6372 "10110100" // /* MW 2 */
+ 6373 "11111111" // /* MW 1 */
+ 6374 "01111010" // NOPA; ST lr, [sp, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6375 "00000000" // /* MW 9 */
+ 6376 "00000000" // /* MW 8 */
+ 6377 "00000000" // /* MW 7 */
+ 6378 "10000000" // /* MW 6 */
+ 6379 "00111101" // /* MW 5 */
+ 6380 "11111000" // /* MW 4 */
+ 6381 "11110111" // /* MW 3 */
+ 6382 "00101100" // /* MW 2 */
+ 6383 "00000000" // /* MW 1 */
+.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 2 "reduce_base_c8.h" 352 30 first
+ 6384 "00011000" // ADD.NC p7, r3, #34 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6385 "10010001" // /* MW 3 */
+ 6386 "01100001" // /* MW 2 */
+ 6387 "00011111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+ 6388 "11010100" // LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6389 "11000001" // /* MW 5 */
+ 6390 "01100100" // /* MW 4 */
+ 6391 "01011011" // /* MW 3 */
+ 6392 "10001111" // /* MW 2 */
+ 6393 "11100000" // /* MW 1 */
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id first
+ 6394 "11111000" // MOV crSCDEn, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6395 "01100000" // /* MW 3 */
+ 6396 "01111011" // /* MW 2 */
+ 6397 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+.aggressive_scheduled_block_id 21
+.noswbrkpt
+ 6398 "00011000" // ST.s16 r3, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6399 "01110111" // /* MW 3 */
+ 6400 "00000100" // /* MW 2 */
+ 6401 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 353 57 first
+.aggressive_scheduled_block_id 21
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6402 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 6403 "00000001" // /* MW 5 */
+ 6404 "00000000" // /* MW 4 */
+ 6405 "11111000" // /* MW 3 */
+ 6406 "00010011" // /* MW 2 */
+ 6407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6413 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30 first
+.delay_slot
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6414 "00011000" // ADD r3, r3, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6415 "00000111" // /* MW 3 */
+ 6416 "11000110" // /* MW 2 */
+ 6417 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+.delay_slot
+ 6418 "01111110" // NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6419 "01100000" // /* MW 13 */
+ 6420 "00101011" // /* MW 12 */
+ 6421 "00000000" // /* MW 11 */
+ 6422 "10101111" // /* MW 10 */
+ 6423 "00110100" // /* MW 9 */
+ 6424 "00000000" // /* MW 8 */
+ 6425 "10110000" // /* MW 7 */
+ 6426 "11000000" // /* MW 6 */
+ 6427 "00100000" // /* MW 5 */
+ 6428 "00000000" // /* MW 4 */
+ 6429 "11110000" // /* MW 3 */
+ 6430 "00101100" // /* MW 2 */
+ 6431 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4
+.return_address
+ 6432 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6433 "00111001" // /* MW 3 */
+ 6434 "11111000" // /* MW 2 */
+ 6435 "00000111" // /* MW 1 */
+ 6436 "00011000" // LDA p1, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6437 "10011001" // /* MW 3 */
+ 6438 "11111100" // /* MW 2 */
+ 6439 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 353 23 first
+ 6440 "00011000" // ST.s16 r3, [p7, #10] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6441 "01110111" // /* MW 3 */
+ 6442 "01010100" // /* MW 2 */
+ 6443 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4 first
+ 6444 "11000100" // PADDXM [sp], #-256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6445 "00000001" // /* MW 5 */
+ 6446 "00000000" // /* MW 4 */
+ 6447 "00000000" // /* MW 3 */
+ 6448 "11100000" // /* MW 2 */
+ 6449 "11111111" // /* MW 1 */
+ 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6451 "00000000" // /* MW 1 */
+ 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6453 "00000000" // /* MW 1 */
+ 6454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6455 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4
+ 6456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 6457 "00000000" // /* MW 3 */
+ 6458 "00101000" // /* MW 2 */
+ 6459 "00010000" // /* MW 1 */
+.delay_slot
+ 6460 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6461 "11000000" // /* MW 3 */
+ 6462 "01100010" // /* MW 2 */
+ 6463 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6470 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6471 "01111110" // /* MW 9 */
+ 6472 "10100101" // /* MW 8 */
+ 6473 "00000001" // /* MW 7 */
+ 6474 "00000000" // /* MW 6 */
+ 6475 "00010000" // /* MW 5 */
+ 6476 "00000000" // /* MW 4 */
+ 6477 "11110000" // /* MW 3 */
+ 6478 "00101100" // /* MW 2 */
+ 6479 "00000000" // /* MW 1 */
+.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 5 "blend.hpp" 163 48
+ 6480 "10111010" // MOVA r20, #255; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 6481 "00100000" // /* MW 9 */
+ 6482 "00000000" // /* MW 8 */
+ 6483 "00000000" // /* MW 7 */
+ 6484 "10111000" // /* MW 6 */
+ 6485 "00000010" // /* MW 5 */
+ 6486 "00000000" // /* MW 4 */
+ 6487 "00000000" // /* MW 3 */
+ 6488 "11110100" // /* MW 2 */
+ 6489 "00011111" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 6490 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6491 "00000001" // /* MW 3 */
+ 6492 "00101010" // /* MW 2 */
+ 6493 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6495 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6500 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6501 "10000001" // /* MW 11 */
+ 6502 "10101101" // /* MW 10 */
+ 6503 "00000000" // /* MW 9 */
+ 6504 "00000000" // /* MW 8 */
+ 6505 "00000000" // /* MW 7 */
+ 6506 "00000000" // /* MW 6 */
+ 6507 "00100000" // /* MW 5 */
+ 6508 "00000000" // /* MW 4 */
+ 6509 "11110000" // /* MW 3 */
+ 6510 "00101100" // /* MW 2 */
+ 6511 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6512 "00011000" // MOVX r5, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6513 "00010101" // /* MW 3 */
+ 6514 "00001010" // /* MW 2 */
+ 6515 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first
+ 6516 "10011000" // EQ r5, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6517 "01100111" // /* MW 3 */
+ 6518 "01001010" // /* MW 2 */
+ 6519 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6520 "10000100" // JNZ r5, #7264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7264 delay_slots=5 */
+ 6521 "00000001" // /* MW 5 */
+ 6522 "01000000" // /* MW 4 */
+ 6523 "00110000" // /* MW 3 */
+ 6524 "00001110" // /* MW 2 */
+ 6525 "00101000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6527 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6535 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6536 "00011000" // MOVX r7, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6537 "00011001" // /* MW 3 */
+ 6538 "00001110" // /* MW 2 */
+ 6539 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6540 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6541 "01100111" // /* MW 3 */
+ 6542 "11001110" // /* MW 2 */
+ 6543 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6544 "10000100" // JNZ r7, #7504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7504 delay_slots=5 */
+ 6545 "00000001" // /* MW 5 */
+ 6546 "01000000" // /* MW 4 */
+ 6547 "10101000" // /* MW 3 */
+ 6548 "00001110" // /* MW 2 */
+ 6549 "00111000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.delay_slot
+ 6550 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6551 "01000001" // /* MW 3 */
+ 6552 "00001010" // /* MW 2 */
+ 6553 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6555 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6557 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6559 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6561 "00000000" // /* MW 15 */
+ 6562 "00000000" // /* MW 14 */
+ 6563 "01111000" // /* MW 13 */
+ 6564 "10100101" // /* MW 12 */
+ 6565 "00000001" // /* MW 11 */
+ 6566 "00000000" // /* MW 10 */
+ 6567 "00000000" // /* MW 9 */
+ 6568 "00000000" // /* MW 8 */
+ 6569 "01011011" // /* MW 7 */
+ 6570 "00000001" // /* MW 6 */
+ 6571 "00100000" // /* MW 5 */
+ 6572 "00000000" // /* MW 4 */
+ 6573 "11110000" // /* MW 3 */
+ 6574 "00101100" // /* MW 2 */
+ 6575 "00000000" // /* MW 1 */
+.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first
+.src_ref 3 "reduce_mean_c8_impl.h" 202 30
+ 6576 "10111010" // LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6577 "01111000" // /* MW 9 */
+ 6578 "11110000" // /* MW 8 */
+ 6579 "01100000" // /* MW 7 */
+ 6580 "11101010" // /* MW 6 */
+ 6581 "00010000" // /* MW 5 */
+ 6582 "00000001" // /* MW 4 */
+ 6583 "01010000" // /* MW 3 */
+ 6584 "00011110" // /* MW 2 */
+ 6585 "01001000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 202 30 first
+ 6586 "01100100" // NE r6, r17, r6; MOV r17, #257 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6587 "00000101" // /* MW 5 */
+ 6588 "10100100" // /* MW 4 */
+ 6589 "00011000" // /* MW 3 */
+ 6590 "10001101" // /* MW 2 */
+ 6591 "10001001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 202 12
+ 6592 "10000100" // JNZ r6, #7232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7232 delay_slots=5 */
+ 6593 "00000001" // /* MW 5 */
+ 6594 "01000000" // /* MW 4 */
+ 6595 "00100000" // /* MW 3 */
+ 6596 "00001110" // /* MW 2 */
+ 6597 "00110000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6599 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6601 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6603 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6605 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49 first
+.delay_slot
+ 6606 "10011000" // ASHL r5, r7, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6607 "01011110" // /* MW 3 */
+ 6608 "11001010" // /* MW 2 */
+ 6609 "00010001" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 199 120
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first
+.src_ref 3 "reduce_mean_c8_impl.h" 206 35
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22
+ 6610 "01110110" // MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6611 "00010000" // /* MW 11 */
+ 6612 "00001000" // /* MW 10 */
+ 6613 "01111101" // /* MW 9 */
+ 6614 "00000100" // /* MW 8 */
+ 6615 "00000000" // /* MW 7 */
+ 6616 "00000000" // /* MW 6 */
+ 6617 "10001011" // /* MW 5 */
+ 6618 "10000100" // /* MW 4 */
+ 6619 "10000000" // /* MW 3 */
+ 6620 "10001010" // /* MW 2 */
+ 6621 "00000100" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9
+.src_ref 3 "reduce_mean_c8_impl.h" 206 35
+ 6622 "01110110" // LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6623 "00010000" // /* MW 11 */
+ 6624 "00111000" // /* MW 10 */
+ 6625 "10111101" // /* MW 9 */
+ 6626 "00000101" // /* MW 8 */
+ 6627 "00000000" // /* MW 7 */
+ 6628 "10000000" // /* MW 6 */
+ 6629 "10100101" // /* MW 5 */
+ 6630 "11111101" // /* MW 4 */
+ 6631 "11010111" // /* MW 3 */
+ 6632 "00011110" // /* MW 2 */
+ 6633 "01001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first
+ 6634 "10011000" // VLDA bmll2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6635 "00010101" // /* MW 3 */
+ 6636 "00011101" // /* MW 2 */
+ 6637 "00000000" // /* MW 1 */
+ 6638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6639 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+ 6640 "11111000" // VMOV bmhh4, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6641 "10010010" // /* MW 3 */
+ 6642 "11000010" // /* MW 2 */
+ 6643 "00011100" // /* MW 1 */
+ 6644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6645 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+.src_ref 5 "add.hpp" 28 49 first
+ 6646 "01100010" // VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6647 "00111101" // /* MW 7 */
+ 6648 "01101000" // /* MW 6 */
+ 6649 "00010001" // /* MW 5 */
+ 6650 "11100110" // /* MW 4 */
+ 6651 "00010010" // /* MW 3 */
+ 6652 "00010011" // /* MW 2 */
+ 6653 "00000011" // /* MW 1 */
+ 6654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6655 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first
+ 6656 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6657 "00000000" // /* MW 15 */
+ 6658 "00000000" // /* MW 14 */
+ 6659 "11001000" // /* MW 13 */
+ 6660 "11111111" // /* MW 12 */
+ 6661 "10111001" // /* MW 11 */
+ 6662 "00000010" // /* MW 10 */
+ 6663 "00000000" // /* MW 9 */
+ 6664 "00000000" // /* MW 8 */
+ 6665 "01011011" // /* MW 7 */
+ 6666 "00000001" // /* MW 6 */
+ 6667 "00100000" // /* MW 5 */
+ 6668 "00000000" // /* MW 4 */
+ 6669 "11110000" // /* MW 3 */
+ 6670 "00101100" // /* MW 2 */
+ 6671 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first
+.begin_of_loop
+.loop_nesting 1
+ 6672 "11100001" // VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6673 "00000000" // /* MW 15 */
+ 6674 "00000000" // /* MW 14 */
+ 6675 "01111000" // /* MW 13 */
+ 6676 "10100101" // /* MW 12 */
+ 6677 "00000001" // /* MW 11 */
+ 6678 "00000000" // /* MW 10 */
+ 6679 "00000000" // /* MW 9 */
+ 6680 "00000000" // /* MW 8 */
+ 6681 "01011011" // /* MW 7 */
+ 6682 "00000001" // /* MW 6 */
+ 6683 "00100000" // /* MW 5 */
+ 6684 "00000000" // /* MW 4 */
+ 6685 "10110000" // /* MW 3 */
+ 6686 "10100010" // /* MW 2 */
+ 6687 "00000011" // /* MW 1 */
+ 6688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6689 "00000000" // /* MW 15 */
+ 6690 "00000000" // /* MW 14 */
+ 6691 "01111000" // /* MW 13 */
+ 6692 "10100101" // /* MW 12 */
+ 6693 "00000001" // /* MW 11 */
+ 6694 "00000000" // /* MW 10 */
+ 6695 "00000000" // /* MW 9 */
+ 6696 "00000000" // /* MW 8 */
+ 6697 "01011011" // /* MW 7 */
+ 6698 "00000001" // /* MW 6 */
+ 6699 "00100000" // /* MW 5 */
+ 6700 "00000000" // /* MW 4 */
+ 6701 "11110000" // /* MW 3 */
+ 6702 "00101100" // /* MW 2 */
+ 6703 "00000000" // /* MW 1 */
+ 6704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6705 "00000000" // /* MW 15 */
+ 6706 "00000000" // /* MW 14 */
+ 6707 "01111000" // /* MW 13 */
+ 6708 "10100101" // /* MW 12 */
+ 6709 "00000001" // /* MW 11 */
+ 6710 "00000000" // /* MW 10 */
+ 6711 "00000000" // /* MW 9 */
+ 6712 "00000000" // /* MW 8 */
+ 6713 "01011011" // /* MW 7 */
+ 6714 "00000001" // /* MW 6 */
+ 6715 "00100000" // /* MW 5 */
+ 6716 "00000000" // /* MW 4 */
+ 6717 "11110000" // /* MW 3 */
+ 6718 "00101100" // /* MW 2 */
+ 6719 "00000000" // /* MW 1 */
+ 6720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6721 "00000000" // /* MW 15 */
+ 6722 "00000000" // /* MW 14 */
+ 6723 "01111000" // /* MW 13 */
+ 6724 "10100101" // /* MW 12 */
+ 6725 "00000001" // /* MW 11 */
+ 6726 "00000000" // /* MW 10 */
+ 6727 "00000000" // /* MW 9 */
+ 6728 "00000000" // /* MW 8 */
+ 6729 "01011011" // /* MW 7 */
+ 6730 "00000001" // /* MW 6 */
+ 6731 "00100000" // /* MW 5 */
+ 6732 "00000000" // /* MW 4 */
+ 6733 "11110000" // /* MW 3 */
+ 6734 "00101100" // /* MW 2 */
+ 6735 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 150 115 first
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id first
+ 6736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6737 "00000000" // /* MW 15 */
+ 6738 "00000000" // /* MW 14 */
+ 6739 "01111000" // /* MW 13 */
+ 6740 "00001001" // /* MW 12 */
+ 6741 "01100010" // /* MW 11 */
+ 6742 "00000010" // /* MW 10 */
+ 6743 "00000000" // /* MW 9 */
+ 6744 "00000000" // /* MW 8 */
+ 6745 "01011011" // /* MW 7 */
+ 6746 "00000001" // /* MW 6 */
+ 6747 "00100000" // /* MW 5 */
+ 6748 "00000000" // /* MW 4 */
+ 6749 "11110000" // /* MW 3 */
+ 6750 "00101100" // /* MW 2 */
+ 6751 "00000000" // /* MW 1 */
+.src_ref 5 "add.hpp" 28 49 first
+.aggressive_scheduled_block_id 22
+.noswbrkpt
+ 6752 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6753 "01000001" // /* MW 15 */
+ 6754 "10001011" // /* MW 14 */
+ 6755 "01111000" // /* MW 13 */
+ 6756 "10100101" // /* MW 12 */
+ 6757 "00000001" // /* MW 11 */
+ 6758 "00000000" // /* MW 10 */
+ 6759 "00000000" // /* MW 9 */
+ 6760 "00000000" // /* MW 8 */
+ 6761 "01011011" // /* MW 7 */
+ 6762 "00000001" // /* MW 6 */
+ 6763 "00100000" // /* MW 5 */
+ 6764 "00000000" // /* MW 4 */
+ 6765 "11110000" // /* MW 3 */
+ 6766 "00101100" // /* MW 2 */
+ 6767 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920
+.src_ref 7 "accum.hpp" 199 120 first
+.end_of_loop
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6768 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6769 "00000000" // /* MW 15 */
+ 6770 "00000000" // /* MW 14 */
+ 6771 "01111000" // /* MW 13 */
+ 6772 "10001001" // /* MW 12 */
+ 6773 "10001001" // /* MW 11 */
+ 6774 "00000001" // /* MW 10 */
+ 6775 "00000000" // /* MW 9 */
+ 6776 "00000000" // /* MW 8 */
+ 6777 "01011011" // /* MW 7 */
+ 6778 "00000001" // /* MW 6 */
+ 6779 "00100000" // /* MW 5 */
+ 6780 "00000000" // /* MW 4 */
+ 6781 "11110000" // /* MW 3 */
+ 6782 "00101100" // /* MW 2 */
+ 6783 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id first
+.loop_nesting 0
+ 6784 "10111010" // MOVA r16, #16; MOVXM p7, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6785 "00010000" // /* MW 9 */
+ 6786 "01111000" // /* MW 8 */
+ 6787 "10110010" // /* MW 7 */
+ 6788 "11110011" // /* MW 6 */
+ 6789 "00000001" // /* MW 5 */
+ 6790 "00000000" // /* MW 4 */
+ 6791 "00000000" // /* MW 3 */
+ 6792 "00010000" // /* MW 2 */
+ 6793 "00000010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6794 "10111010" // LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6795 "01011000" // /* MW 9 */
+ 6796 "00000001" // /* MW 8 */
+ 6797 "10011000" // /* MW 7 */
+ 6798 "00001000" // /* MW 6 */
+ 6799 "01100001" // /* MW 5 */
+ 6800 "00000000" // /* MW 4 */
+ 6801 "01010000" // /* MW 3 */
+ 6802 "10010000" // /* MW 2 */
+ 6803 "11100000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6804 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6805 "00000101" // /* MW 3 */
+ 6806 "00100010" // /* MW 2 */
+ 6807 "00010000" // /* MW 1 */
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6809 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 150 115 first
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6810 "11111000" // VMOV bmhh4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6811 "00010010" // /* MW 3 */
+ 6812 "11000100" // /* MW 2 */
+ 6813 "00011100" // /* MW 1 */
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6815 "00000000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6816 "11111000" // VMOV x2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6817 "00010010" // /* MW 3 */
+ 6818 "00110011" // /* MW 2 */
+ 6819 "00011001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6820 "11011000" // VSHIFT x2, x2, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6821 "00010010" // /* MW 3 */
+ 6822 "00010000" // /* MW 2 */
+ 6823 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 1108 103
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6824 "01011010" // MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6825 "00111101" // /* MW 9 */
+ 6826 "01000000" // /* MW 8 */
+ 6827 "00010000" // /* MW 7 */
+ 6828 "00101111" // /* MW 6 */
+ 6829 "01001001" // /* MW 5 */
+ 6830 "00000000" // /* MW 4 */
+ 6831 "10000000" // /* MW 3 */
+ 6832 "00111010" // /* MW 2 */
+ 6833 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6834 "11111000" // VMOV bmll2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6835 "00010010" // /* MW 3 */
+ 6836 "00010011" // /* MW 2 */
+ 6837 "00011010" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+ 6838 "11111000" // VBCST.32 x2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6839 "01110010" // /* MW 3 */
+ 6840 "00010110" // /* MW 2 */
+ 6841 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+ 6842 "11111000" // VMOV bmll1, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6843 "10010010" // /* MW 3 */
+ 6844 "00000100" // /* MW 2 */
+ 6845 "00011001" // /* MW 1 */
+ 6846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6847 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+ 6848 "11111000" // VMOV bmll2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6849 "00010010" // /* MW 3 */
+ 6850 "00000100" // /* MW 2 */
+ 6851 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id first
+ 6852 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6853 "00010010" // /* MW 3 */
+ 6854 "00100000" // /* MW 2 */
+ 6855 "00011001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 24
+.noswbrkpt
+ 6856 "01100010" // VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6857 "00111101" // /* MW 7 */
+ 6858 "00001100" // /* MW 6 */
+ 6859 "00010000" // /* MW 5 */
+ 6860 "11000110" // /* MW 4 */
+ 6861 "01000010" // /* MW 3 */
+ 6862 "00010000" // /* MW 2 */
+ 6863 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6864 "11111000" // VMOV bmll3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6865 "10010010" // /* MW 3 */
+ 6866 "00000100" // /* MW 2 */
+ 6867 "00011011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 6868 "11111000" // VMOV x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6869 "10010010" // /* MW 3 */
+ 6870 "00100000" // /* MW 2 */
+ 6871 "00011001" // /* MW 1 */
+ 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6873 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 112 19 first
+ 6874 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6875 "10010110" // /* MW 3 */
+ 6876 "01000000" // /* MW 2 */
+ 6877 "00001000" // /* MW 1 */
+ 6878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6879 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id first
+ 6880 "01100010" // VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6881 "10000011" // /* MW 7 */
+ 6882 "01000000" // /* MW 6 */
+ 6883 "00010100" // /* MW 5 */
+ 6884 "11100110" // /* MW 4 */
+ 6885 "00010010" // /* MW 3 */
+ 6886 "10100000" // /* MW 2 */
+ 6887 "00000001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 25
+.noswbrkpt
+ 6888 "01100010" // VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6889 "00111101" // /* MW 7 */
+ 6890 "00001000" // /* MW 6 */
+ 6891 "00010000" // /* MW 5 */
+ 6892 "11000110" // /* MW 4 */
+ 6893 "00011010" // /* MW 3 */
+ 6894 "10011000" // /* MW 2 */
+ 6895 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6896 "11111000" // VMOV bmll2, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6897 "10010010" // /* MW 3 */
+ 6898 "00000110" // /* MW 2 */
+ 6899 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 6900 "11111000" // VMOV x3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6901 "10010010" // /* MW 3 */
+ 6902 "10100100" // /* MW 2 */
+ 6903 "00011001" // /* MW 1 */
+ 6904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6905 "00000000" // /* MW 1 */
+ 6906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6907 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 19 first
+ 6908 "00011000" // VCONV.bf16.fp32 wl2, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6909 "00010110" // /* MW 3 */
+ 6910 "01000010" // /* MW 2 */
+ 6911 "00001001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id first
+ 6912 "11111000" // VMOV x5, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6913 "00010010" // /* MW 3 */
+ 6914 "10100000" // /* MW 2 */
+ 6915 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 26
+.noswbrkpt
+ 6916 "01100010" // VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6917 "00111101" // /* MW 7 */
+ 6918 "00001000" // /* MW 6 */
+ 6919 "00010000" // /* MW 5 */
+ 6920 "11000110" // /* MW 4 */
+ 6921 "00000010" // /* MW 3 */
+ 6922 "00101000" // /* MW 2 */
+ 6923 "00000011" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6924 "11111000" // VMOV bmll2, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6925 "10010010" // /* MW 3 */
+ 6926 "00001100" // /* MW 2 */
+ 6927 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6928 "11111000" // VMOV x5, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6929 "10010010" // /* MW 3 */
+ 6930 "10100110" // /* MW 2 */
+ 6931 "00011010" // /* MW 1 */
+ 6932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6933 "00000000" // /* MW 1 */
+ 6934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6935 "00000000" // /* MW 1 */
+ 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6937 "00000000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 6938 "11111000" // VMOV x6, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6939 "00010010" // /* MW 3 */
+ 6940 "00100000" // /* MW 2 */
+ 6941 "00011011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+ 6942 "10111000" // VEXTRACT.32 r0, x6, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6943 "00000001" // /* MW 3 */
+ 6944 "00011010" // /* MW 2 */
+ 6945 "00011000" // /* MW 1 */
+ 6946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6947 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 856 23 first
+ 6948 "01111000" // VINSERT.32 x6, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6949 "00010001" // /* MW 3 */
+ 6950 "00000000" // /* MW 2 */
+ 6951 "00011011" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36 first
+ 6952 "00111000" // VSEL.32 x1, x1, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6953 "00001000" // /* MW 3 */
+ 6954 "10001011" // /* MW 2 */
+ 6955 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+ 6956 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6957 "10010010" // /* MW 3 */
+ 6958 "00000010" // /* MW 2 */
+ 6959 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6960 "11111000" // VMOV x1, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6961 "10010010" // /* MW 3 */
+ 6962 "10101010" // /* MW 2 */
+ 6963 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6964 "00000010" // VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6965 "01110000" // /* MW 7 */
+ 6966 "01001001" // /* MW 6 */
+ 6967 "10010001" // /* MW 5 */
+ 6968 "00000001" // /* MW 4 */
+ 6969 "11000000" // /* MW 3 */
+ 6970 "00100010" // /* MW 2 */
+ 6971 "01011000" // /* MW 1 */
+ 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6973 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+ 6974 "01001000" // VMSC.f dm1, dm2, x5, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6975 "10000011" // /* MW 3 */
+ 6976 "01001010" // /* MW 2 */
+ 6977 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 9 first
+ 6978 "01001000" // VMUL.f dm0, x5, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6979 "01000001" // /* MW 3 */
+ 6980 "11101010" // /* MW 2 */
+ 6981 "00010000" // /* MW 1 */
+ 6982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6983 "00000000" // /* MW 1 */
+ 6984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6985 "00000000" // /* MW 1 */
+ 6986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6987 "00000000" // /* MW 1 */
+ 6988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6989 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 19 first
+ 6990 "00011000" // VCONV.bf16.fp32 wl1, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6991 "10010110" // /* MW 3 */
+ 6992 "11000000" // /* MW 2 */
+ 6993 "00001000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 6994 "01001000" // VMSC.f dm4, dm4, x2, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6995 "10000011" // /* MW 3 */
+ 6996 "10000100" // /* MW 2 */
+ 6997 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 6998 "01001000" // VMSC.f dm3, dm1, x1, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6999 "10000011" // /* MW 3 */
+ 7000 "00100010" // /* MW 2 */
+ 7001 "00010011" // /* MW 1 */
+ 7002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7003 "00000000" // /* MW 1 */
+ 7004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7005 "00000000" // /* MW 1 */
+ 7006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7007 "00000000" // /* MW 1 */
+ 7008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7009 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 7010 "00011000" // VCONV.bf16.fp32 wl3, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7011 "00010110" // /* MW 3 */
+ 7012 "11000010" // /* MW 2 */
+ 7013 "00001001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 7014 "00011000" // VCONV.bf16.fp32 wl6, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7015 "10010110" // /* MW 3 */
+ 7016 "01000001" // /* MW 2 */
+ 7017 "00001011" // /* MW 1 */
+ 7018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7019 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+ 7020 "01001000" // VMUL.f dm2, x6, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7021 "01100001" // /* MW 3 */
+ 7022 "11101100" // /* MW 2 */
+ 7023 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 9 first
+ 7024 "01001000" // VMUL.f dm3, x6, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7025 "01000001" // /* MW 3 */
+ 7026 "11101100" // /* MW 2 */
+ 7027 "00010011" // /* MW 1 */
+ 7028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7029 "00000000" // /* MW 1 */
+ 7030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7031 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id first
+ 7032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7033 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 9 first
+.aggressive_scheduled_block_id 27
+.noswbrkpt
+ 7034 "01001000" // VMUL.f dm3, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7035 "01100001" // /* MW 3 */
+ 7036 "11100010" // /* MW 2 */
+ 7037 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7038 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7039 "00010010" // /* MW 3 */
+ 7040 "01101000" // /* MW 2 */
+ 7041 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7042 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7043 "00111101" // /* MW 3 */
+ 7044 "01001100" // /* MW 2 */
+ 7045 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7046 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7047 "10010010" // /* MW 3 */
+ 7048 "00000101" // /* MW 2 */
+ 7049 "00011010" // /* MW 1 */
+ 7050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7051 "00000000" // /* MW 1 */
+ 7052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7053 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id first
+ 7054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7055 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 9 first
+.aggressive_scheduled_block_id 28
+.noswbrkpt
+ 7056 "01001000" // VMUL.f dm3, x5, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7057 "01100001" // /* MW 3 */
+ 7058 "11101010" // /* MW 2 */
+ 7059 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7060 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7061 "00010010" // /* MW 3 */
+ 7062 "01101000" // /* MW 2 */
+ 7063 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7064 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7065 "00111101" // /* MW 3 */
+ 7066 "01001100" // /* MW 2 */
+ 7067 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7068 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7069 "10010010" // /* MW 3 */
+ 7070 "00000001" // /* MW 2 */
+ 7071 "00011010" // /* MW 1 */
+ 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7073 "00000000" // /* MW 1 */
+ 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7075 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id first
+ 7076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7077 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 9 first
+.aggressive_scheduled_block_id 29
+.noswbrkpt
+ 7078 "01001000" // VMUL.f dm3, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7079 "01000001" // /* MW 3 */
+ 7080 "11100010" // /* MW 2 */
+ 7081 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7082 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7083 "00010010" // /* MW 3 */
+ 7084 "01101000" // /* MW 2 */
+ 7085 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7086 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7087 "00111101" // /* MW 3 */
+ 7088 "01001100" // /* MW 2 */
+ 7089 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7090 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7091 "10010010" // /* MW 3 */
+ 7092 "00000101" // /* MW 2 */
+ 7093 "00011010" // /* MW 1 */
+ 7094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7095 "00000000" // /* MW 1 */
+ 7096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7097 "00000000" // /* MW 1 */
+ 7098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7099 "00000000" // /* MW 1 */
+ 7100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7101 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id first
+ 7102 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7103 "00010010" // /* MW 3 */
+ 7104 "01101000" // /* MW 2 */
+ 7105 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 30
+.noswbrkpt
+ 7106 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7107 "00111101" // /* MW 3 */
+ 7108 "01001100" // /* MW 2 */
+ 7109 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7110 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7111 "10010010" // /* MW 3 */
+ 7112 "00000001" // /* MW 2 */
+ 7113 "00011010" // /* MW 1 */
+ 7114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7115 "00000000" // /* MW 1 */
+ 7116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7117 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 9 first
+ 7118 "01001000" // VMUL.f dm3, x0, x6, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7119 "11000001" // /* MW 3 */
+ 7120 "11100000" // /* MW 2 */
+ 7121 "00010011" // /* MW 1 */
+ 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7123 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id first
+ 7124 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7125 "00010010" // /* MW 3 */
+ 7126 "01101000" // /* MW 2 */
+ 7127 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 31
+.noswbrkpt
+ 7128 "01001000" // VADD.f dm3, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7129 "00111101" // /* MW 3 */
+ 7130 "01001100" // /* MW 2 */
+ 7131 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7132 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7133 "10010010" // /* MW 3 */
+ 7134 "00000101" // /* MW 2 */
+ 7135 "00011010" // /* MW 1 */
+ 7136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7137 "00000000" // /* MW 1 */
+ 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7139 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 9 first
+ 7140 "01001000" // VMUL.f dm1, x1, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7141 "00000001" // /* MW 3 */
+ 7142 "11100010" // /* MW 2 */
+ 7143 "00010001" // /* MW 1 */
+ 7144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7145 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id first
+ 7146 "11111000" // VMOV lfh0, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7147 "00010010" // /* MW 3 */
+ 7148 "01101100" // /* MW 2 */
+ 7149 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6 first
+.aggressive_scheduled_block_id 32
+.noswbrkpt
+ 7150 "01001000" // VADD.f dm1, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7151 "00111101" // /* MW 3 */
+ 7152 "01000100" // /* MW 2 */
+ 7153 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7154 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7155 "10010010" // /* MW 3 */
+ 7156 "00000001" // /* MW 2 */
+ 7157 "00011010" // /* MW 1 */
+ 7158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7159 "00000000" // /* MW 1 */
+ 7160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7161 "00000000" // /* MW 1 */
+ 7162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7163 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id first
+ 7164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7165 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 33
+.noswbrkpt
+ 7166 "01100010" // VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7167 "00111101" // /* MW 7 */
+ 7168 "01000000" // /* MW 6 */
+ 7169 "00010000" // /* MW 5 */
+ 7170 "11100110" // /* MW 4 */
+ 7171 "00010010" // /* MW 3 */
+ 7172 "00100100" // /* MW 2 */
+ 7173 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9 first
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7174 "01100010" // VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7175 "00000001" // /* MW 7 */
+ 7176 "11101010" // /* MW 6 */
+ 7177 "00010100" // /* MW 5 */
+ 7178 "11100110" // /* MW 4 */
+ 7179 "10010010" // /* MW 3 */
+ 7180 "00000000" // /* MW 2 */
+ 7181 "00000010" // /* MW 1 */
+ 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7183 "00000000" // /* MW 1 */
+ 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7185 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id first
+ 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7187 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.aggressive_scheduled_block_id 34
+.noswbrkpt
+ 7188 "01001000" // VADD.f dm0, dm2, dm4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7189 "00111101" // /* MW 3 */
+ 7190 "01010000" // /* MW 2 */
+ 7191 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7193 "00010010" // /* MW 3 */
+ 7194 "00000000" // /* MW 2 */
+ 7195 "00011010" // /* MW 1 */
+ 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7197 "00000000" // /* MW 1 */
+ 7198 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */
+ 7199 "00000000" // /* MW 5 */
+ 7200 "00000000" // /* MW 4 */
+ 7201 "01111000" // /* MW 3 */
+ 7202 "00001100" // /* MW 2 */
+ 7203 "00000000" // /* MW 1 */
+.delay_slot
+ 7204 "10011000" // ST dc4, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7205 "01100101" // /* MW 3 */
+ 7206 "11111010" // /* MW 2 */
+ 7207 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7209 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6 first
+.delay_slot
+ 7210 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7211 "00010010" // /* MW 3 */
+ 7212 "00000000" // /* MW 2 */
+ 7213 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7215 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 1108 103 first
+.delay_slot
+ 7216 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7217 "00000000" // /* MW 15 */
+ 7218 "00000000" // /* MW 14 */
+ 7219 "01111000" // /* MW 13 */
+ 7220 "10100101" // /* MW 12 */
+ 7221 "00000001" // /* MW 11 */
+ 7222 "00000000" // /* MW 10 */
+ 7223 "00000000" // /* MW 9 */
+ 7224 "10000000" // /* MW 8 */
+ 7225 "00010010" // /* MW 7 */
+ 7226 "00000101" // /* MW 6 */
+ 7227 "00100001" // /* MW 5 */
+ 7228 "00000000" // /* MW 4 */
+ 7229 "11110000" // /* MW 3 */
+ 7230 "00101100" // /* MW 2 */
+ 7231 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384
+.src_ref 5 "blend.hpp" 163 48
+ 7232 "10111010" // MOVA r20, #0; J #5616 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5616 delay_slots=5 */
+ 7233 "00100000" // /* MW 9 */
+ 7234 "00000000" // /* MW 8 */
+ 7235 "00000000" // /* MW 7 */
+ 7236 "10111110" // /* MW 6 */
+ 7237 "00000010" // /* MW 5 */
+ 7238 "00000000" // /* MW 4 */
+ 7239 "00000000" // /* MW 3 */
+ 7240 "00010100" // /* MW 2 */
+ 7241 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7242 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7243 "00000001" // /* MW 3 */
+ 7244 "00101010" // /* MW 2 */
+ 7245 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7247 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7249 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7251 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7252 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7253 "10000001" // /* MW 11 */
+ 7254 "10101101" // /* MW 10 */
+ 7255 "00000000" // /* MW 9 */
+ 7256 "00000000" // /* MW 8 */
+ 7257 "00000000" // /* MW 7 */
+ 7258 "00000000" // /* MW 6 */
+ 7259 "00100000" // /* MW 5 */
+ 7260 "00000000" // /* MW 4 */
+ 7261 "11110000" // /* MW 3 */
+ 7262 "00101100" // /* MW 2 */
+ 7263 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416
+ 7264 "10000100" // J #7456 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */
+ 7265 "00000000" // /* MW 5 */
+ 7266 "00000000" // /* MW 4 */
+ 7267 "10010000" // /* MW 3 */
+ 7268 "00001110" // /* MW 2 */
+ 7269 "00000000" // /* MW 1 */
+.delay_slot
+ 7270 "00000010" // ST p1, [sp, #-4]; MOV dc4, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7271 "01110000" // /* MW 7 */
+ 7272 "11110000" // /* MW 6 */
+ 7273 "01100000" // /* MW 5 */
+ 7274 "00000010" // /* MW 4 */
+ 7275 "10110000" // /* MW 3 */
+ 7276 "10010011" // /* MW 2 */
+ 7277 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7279 "00000000" // /* MW 1 */
+.delay_slot
+ 7280 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7281 "00110011" // /* MW 3 */
+ 7282 "11110000" // /* MW 2 */
+ 7283 "00001111" // /* MW 1 */
+.delay_slot
+ 7284 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7285 "00110011" // /* MW 3 */
+ 7286 "11110101" // /* MW 2 */
+ 7287 "00001111" // /* MW 1 */
+.delay_slot
+ 7288 "00000010" // VST x1, [sp, #-128]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7289 "01110000" // /* MW 7 */
+ 7290 "10100101" // /* MW 6 */
+ 7291 "00000001" // /* MW 5 */
+ 7292 "00000000" // /* MW 4 */
+ 7293 "01100000" // /* MW 3 */
+ 7294 "00001110" // /* MW 2 */
+ 7295 "11111111" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7296 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7297 "00000101" // /* MW 3 */
+ 7298 "00100010" // /* MW 2 */
+ 7299 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first
+ 7300 "10011000" // EQ r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7301 "01100111" // /* MW 3 */
+ 7302 "01100010" // /* MW 2 */
+ 7303 "00010100" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7304 "10000100" // JNZ r17, #7456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7456 delay_slots=5 */
+ 7305 "00000001" // /* MW 5 */
+ 7306 "01000000" // /* MW 4 */
+ 7307 "10010000" // /* MW 3 */
+ 7308 "00001110" // /* MW 2 */
+ 7309 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7311 "00000000" // /* MW 1 */
+.delay_slot
+ 7312 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7313 "00110011" // /* MW 3 */
+ 7314 "11110000" // /* MW 2 */
+ 7315 "00001111" // /* MW 1 */
+.delay_slot
+ 7316 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7317 "00110011" // /* MW 3 */
+ 7318 "11110101" // /* MW 2 */
+ 7319 "00001111" // /* MW 1 */
+.delay_slot
+ 7320 "00011000" // VST x1, [sp, #-128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7321 "01110011" // /* MW 3 */
+ 7322 "11111000" // /* MW 2 */
+ 7323 "00001111" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+.delay_slot
+ 7324 "00111010" // ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7325 "01111001" // /* MW 9 */
+ 7326 "11110000" // /* MW 8 */
+ 7327 "01100000" // /* MW 7 */
+ 7328 "01001010" // /* MW 6 */
+ 7329 "01110000" // /* MW 5 */
+ 7330 "00000000" // /* MW 4 */
+ 7331 "10110000" // /* MW 3 */
+ 7332 "10010011" // /* MW 2 */
+ 7333 "11111111" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7334 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7335 "01100111" // /* MW 3 */
+ 7336 "11001110" // /* MW 2 */
+ 7337 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7338 "10000100" // JNZ r7, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */
+ 7339 "00000001" // /* MW 5 */
+ 7340 "01000000" // /* MW 4 */
+ 7341 "10000000" // /* MW 3 */
+ 7342 "00001110" // /* MW 2 */
+ 7343 "00111000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7345 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7347 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7349 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7351 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7353 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7354 "10011000" // EQ r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7355 "01100111" // /* MW 3 */
+ 7356 "01001110" // /* MW 2 */
+ 7357 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7358 "10000100" // JNZ r7, #7392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7392 delay_slots=5 */
+ 7359 "00000001" // /* MW 5 */
+ 7360 "01000000" // /* MW 4 */
+ 7361 "01110000" // /* MW 3 */
+ 7362 "00001110" // /* MW 2 */
+ 7363 "00111000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.delay_slot
+ 7364 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7365 "01000001" // /* MW 3 */
+ 7366 "00001010" // /* MW 2 */
+ 7367 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7369 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7371 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7373 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7375 "00000000" // /* MW 1 */
+ 7376 "10000100" // J #6576 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6576 delay_slots=5 */
+ 7377 "00000000" // /* MW 5 */
+ 7378 "00000000" // /* MW 4 */
+ 7379 "11011000" // /* MW 3 */
+ 7380 "00001100" // /* MW 2 */
+ 7381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7383 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7385 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7387 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7391 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544
+.src_ref 5 "blend.hpp" 170 36
+ 7392 "10111010" // MOVA r17, #257; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 7393 "00100000" // /* MW 9 */
+ 7394 "00000000" // /* MW 8 */
+ 7395 "00000000" // /* MW 7 */
+ 7396 "10111000" // /* MW 6 */
+ 7397 "00000010" // /* MW 5 */
+ 7398 "00000000" // /* MW 4 */
+ 7399 "00000000" // /* MW 3 */
+ 7400 "00110001" // /* MW 2 */
+ 7401 "00100000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7402 "01100100" // MOVX r21, #0; MOV m4, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7403 "01000001" // /* MW 5 */
+ 7404 "00000000" // /* MW 4 */
+ 7405 "00101000" // /* MW 3 */
+ 7406 "01000000" // /* MW 2 */
+ 7407 "00000101" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48
+.delay_slot
+ 7408 "00011000" // MOVX r20, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7409 "00000001" // /* MW 3 */
+ 7410 "00101000" // /* MW 2 */
+ 7411 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7416 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7417 "00011100" // /* MW 7 */
+ 7418 "00000000" // /* MW 6 */
+ 7419 "00000000" // /* MW 5 */
+ 7420 "00000100" // /* MW 4 */
+ 7421 "11110000" // /* MW 3 */
+ 7422 "00101100" // /* MW 2 */
+ 7423 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576
+ 7424 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */
+ 7425 "00000000" // /* MW 5 */
+ 7426 "00000000" // /* MW 4 */
+ 7427 "10101000" // /* MW 3 */
+ 7428 "00001100" // /* MW 2 */
+ 7429 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7430 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7431 "11111110" // /* MW 5 */
+ 7432 "10111111" // /* MW 4 */
+ 7433 "11111000" // /* MW 3 */
+ 7434 "00000000" // /* MW 2 */
+ 7435 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7436 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7437 "00100000" // /* MW 3 */
+ 7438 "00000000" // /* MW 2 */
+ 7439 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7444 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7445 "10000001" // /* MW 11 */
+ 7446 "10101101" // /* MW 10 */
+ 7447 "00000000" // /* MW 9 */
+ 7448 "00000000" // /* MW 8 */
+ 7449 "00000000" // /* MW 7 */
+ 7450 "00000000" // /* MW 6 */
+ 7451 "00100000" // /* MW 5 */
+ 7452 "00000000" // /* MW 4 */
+ 7453 "11110000" // /* MW 3 */
+ 7454 "00101100" // /* MW 2 */
+ 7455 "00000000" // /* MW 1 */
+.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 7456 "10111010" // VLDA x0, [sp, #-256]; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 7457 "00100000" // /* MW 9 */
+ 7458 "00000000" // /* MW 8 */
+ 7459 "00000000" // /* MW 7 */
+ 7460 "10111000" // /* MW 6 */
+ 7461 "00000010" // /* MW 5 */
+ 7462 "00000000" // /* MW 4 */
+ 7463 "01110000" // /* MW 3 */
+ 7464 "00000111" // /* MW 2 */
+ 7465 "11111110" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "blend.hpp" 163 48
+.delay_slot
+ 7466 "10111010" // VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7467 "01011000" // /* MW 9 */
+ 7468 "00000000" // /* MW 8 */
+ 7469 "10001000" // /* MW 7 */
+ 7470 "10001010" // /* MW 6 */
+ 7471 "00000000" // /* MW 5 */
+ 7472 "00000000" // /* MW 4 */
+ 7473 "01110000" // /* MW 3 */
+ 7474 "10100111" // /* MW 2 */
+ 7475 "11111110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1280 49
+.src_ref 5 "vector.hpp" 1287 41
+.src_ref 5 "vector.hpp" 1288 16
+.src_ref 5 "vector.hpp" 1292 26
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 226 22
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7476 "10111010" // LDA p1, [sp, #-4]; MOVXM r16, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7477 "10010000" // /* MW 9 */
+ 7478 "11111111" // /* MW 8 */
+ 7479 "00001111" // /* MW 7 */
+ 7480 "00111110" // /* MW 6 */
+ 7481 "00000000" // /* MW 5 */
+ 7482 "00000000" // /* MW 4 */
+ 7483 "00100000" // /* MW 3 */
+ 7484 "10010011" // /* MW 2 */
+ 7485 "11111111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7486 "01100100" // MOVX r21, #0; MOV m4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7487 "10000001" // /* MW 5 */
+ 7488 "00000000" // /* MW 4 */
+ 7489 "00101000" // /* MW 3 */
+ 7490 "01000000" // /* MW 2 */
+ 7491 "00000101" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7492 "00011000" // MOVX r17, #257 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7493 "00000101" // /* MW 3 */
+ 7494 "00100010" // /* MW 2 */
+ 7495 "00010001" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7496 "00100010" // VLDA x1, [sp, #-128]; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7497 "00011100" // /* MW 7 */
+ 7498 "00000000" // /* MW 6 */
+ 7499 "00000000" // /* MW 5 */
+ 7500 "00000100" // /* MW 4 */
+ 7501 "01110000" // /* MW 3 */
+ 7502 "00001111" // /* MW 2 */
+ 7503 "11111111" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656
+ 7504 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */
+ 7505 "00000000" // /* MW 5 */
+ 7506 "00000000" // /* MW 4 */
+ 7507 "10101000" // /* MW 3 */
+ 7508 "00001100" // /* MW 2 */
+ 7509 "00000000" // /* MW 1 */
+.delay_slot
+ 7510 "11111000" // MOV dc4, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7511 "11100000" // /* MW 3 */
+ 7512 "11000001" // /* MW 2 */
+ 7513 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7514 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7515 "11111110" // /* MW 5 */
+ 7516 "10111111" // /* MW 4 */
+ 7517 "11111000" // /* MW 3 */
+ 7518 "00000000" // /* MW 2 */
+ 7519 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7520 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7521 "00100000" // /* MW 3 */
+ 7522 "00000000" // /* MW 2 */
+ 7523 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0
+ 7527 "00000000" // /* MW 1 */
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_reduce_mean_c8 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 8 "superkernels.cpp" 472
+.src_ref 8 "superkernels.cpp" 472 first
+.function_start
+ 7536 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7537 "00000001" // /* MW 5 */
+ 7538 "00000000" // /* MW 4 */
+ 7539 "00000000" // /* MW 3 */
+ 7540 "00010000" // /* MW 2 */
+ 7541 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7542 "00111010" // ST p7, [sp, #-20]; MOVXM p7, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7543 "00010001" // /* MW 9 */
+ 7544 "01100000" // /* MW 8 */
+ 7545 "10110010" // /* MW 7 */
+ 7546 "11110011" // /* MW 6 */
+ 7547 "00000001" // /* MW 5 */
+ 7548 "00000000" // /* MW 4 */
+ 7549 "10110000" // /* MW 3 */
+ 7550 "11110011" // /* MW 2 */
+ 7551 "11111101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7552 "10111010" // LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7553 "01110010" // /* MW 9 */
+ 7554 "01110000" // /* MW 8 */
+ 7555 "00001101" // /* MW 7 */
+ 7556 "10000010" // /* MW 6 */
+ 7557 "00011101" // /* MW 5 */
+ 7558 "11100111" // /* MW 4 */
+ 7559 "11010111" // /* MW 3 */
+ 7560 "11000010" // /* MW 2 */
+ 7561 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 22 first
+.src_ref 8 "superkernels.cpp" 569
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7562 "00111010" // ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7563 "01111001" // /* MW 9 */
+ 7564 "11110000" // /* MW 8 */
+ 7565 "01101000" // /* MW 7 */
+ 7566 "10000001" // /* MW 6 */
+ 7567 "00000100" // /* MW 5 */
+ 7568 "00100001" // /* MW 4 */
+ 7569 "10110000" // /* MW 3 */
+ 7570 "00101110" // /* MW 2 */
+ 7571 "11111111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 30
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7572 "01011100" // ST r15, [sp, #-16]; ADD r17, r16, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7573 "11110110" // /* MW 5 */
+ 7574 "01000111" // /* MW 4 */
+ 7575 "10111000" // /* MW 3 */
+ 7576 "00111110" // /* MW 2 */
+ 7577 "11111110" // /* MW 1 */
+ 7578 "10011000" // ST r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7579 "10110101" // /* MW 3 */
+ 7580 "11101001" // /* MW 2 */
+ 7581 "00001111" // /* MW 1 */
+ 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7583 "00000000" // /* MW 1 */
+ 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7585 "00000000" // /* MW 1 */
+ 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7587 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6 first
+.src_ref 8 "superkernels.cpp" 477 16 first
+ 7588 "10000100" // JNZ r16, #8160 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8160 delay_slots=5 */
+ 7589 "00000001" // /* MW 5 */
+ 7590 "01000000" // /* MW 4 */
+ 7591 "11110000" // /* MW 3 */
+ 7592 "00001111" // /* MW 2 */
+ 7593 "10000000" // /* MW 1 */
+.delay_slot
+ 7594 "10011000" // ST r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7595 "10010101" // /* MW 3 */
+ 7596 "11111101" // /* MW 2 */
+ 7597 "00001111" // /* MW 1 */
+.delay_slot
+ 7598 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7599 "11010101" // /* MW 3 */
+ 7600 "11110101" // /* MW 2 */
+ 7601 "00001111" // /* MW 1 */
+.delay_slot
+ 7602 "10011000" // ST p0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7603 "00011101" // /* MW 3 */
+ 7604 "11100000" // /* MW 2 */
+ 7605 "00001111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 11
+.delay_slot
+ 7606 "01000100" // MOVXM p6, #509128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7607 "10010000" // /* MW 5 */
+ 7608 "11001001" // /* MW 4 */
+ 7609 "11001100" // /* MW 3 */
+ 7610 "00000111" // /* MW 2 */
+ 7611 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 11 first
+.delay_slot
+ 7612 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7613 "00110001" // /* MW 3 */
+ 7614 "00000110" // /* MW 2 */
+ 7615 "00001110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 5 "tile.hpp" 74 8
+.src_ref 5 "tile.hpp" 74 8
+ 7616 "01110110" // MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7617 "00010000" // /* MW 11 */
+ 7618 "01110110" // /* MW 10 */
+ 7619 "00110010" // /* MW 9 */
+ 7620 "11110001" // /* MW 8 */
+ 7621 "00000001" // /* MW 7 */
+ 7622 "00000000" // /* MW 6 */
+ 7623 "10001011" // /* MW 5 */
+ 7624 "10001000" // /* MW 4 */
+ 7625 "00000111" // /* MW 3 */
+ 7626 "00110001" // /* MW 2 */
+ 7627 "00000000" // /* MW 1 */
+.src_ref 5 "tile.hpp" 74 8 first
+.src_ref 5 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7628 "00111010" // ST r17, [p2]; MOVXM p2, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7629 "00010001" // /* MW 9 */
+ 7630 "01111000" // /* MW 8 */
+ 7631 "00110010" // /* MW 7 */
+ 7632 "11110001" // /* MW 6 */
+ 7633 "00000001" // /* MW 5 */
+ 7634 "00000000" // /* MW 4 */
+ 7635 "00110000" // /* MW 3 */
+ 7636 "11000110" // /* MW 2 */
+ 7637 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 44
+.src_ref 5 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7638 "11010100" // ST.s8 r16, [p2]; MOV p6, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7639 "10000001" // /* MW 5 */
+ 7640 "11000101" // /* MW 4 */
+ 7641 "11101100" // /* MW 3 */
+ 7642 "11000000" // /* MW 2 */
+ 7643 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 480 4 first
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 7644 "00000100" // JL #2576 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2576 delay_slots=5 */
+ 7645 "00000001" // /* MW 5 */
+ 7646 "00000000" // /* MW 4 */
+ 7647 "00001000" // /* MW 3 */
+ 7648 "00000101" // /* MW 2 */
+ 7649 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 480 4
+.delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7650 "01000100" // MOVXM p0, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7651 "10000000" // /* MW 5 */
+ 7652 "11001000" // /* MW 4 */
+ 7653 "11000000" // /* MW 3 */
+ 7654 "00000111" // /* MW 2 */
+ 7655 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7659 "00000000" // /* MW 1 */
+.src_ref 5 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7660 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7661 "00110001" // /* MW 3 */
+ 7662 "00100000" // /* MW 2 */
+ 7663 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7665 "00000000" // /* MW 15 */
+ 7666 "00000000" // /* MW 14 */
+ 7667 "01111000" // /* MW 13 */
+ 7668 "10100101" // /* MW 12 */
+ 7669 "00000001" // /* MW 11 */
+ 7670 "00000000" // /* MW 10 */
+ 7671 "00000000" // /* MW 9 */
+ 7672 "00000000" // /* MW 8 */
+ 7673 "01011011" // /* MW 7 */
+ 7674 "00000001" // /* MW 6 */
+ 7675 "00100000" // /* MW 5 */
+ 7676 "00000000" // /* MW 4 */
+ 7677 "11110000" // /* MW 3 */
+ 7678 "00101100" // /* MW 2 */
+ 7679 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 51
+.src_ref 8 "superkernels.cpp" 487 47
+.return_address
+ 7680 "10111010" // MOVA r17, #0; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7681 "00010000" // /* MW 9 */
+ 7682 "00100000" // /* MW 8 */
+ 7683 "00110010" // /* MW 7 */
+ 7684 "11110001" // /* MW 6 */
+ 7685 "00000001" // /* MW 5 */
+ 7686 "00000000" // /* MW 4 */
+ 7687 "00000000" // /* MW 3 */
+ 7688 "00010001" // /* MW 2 */
+ 7689 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 18
+.src_ref 8 "superkernels.cpp" 481 51 first
+ 7690 "10111010" // LDA r14, [p2]; MOVXM p2, #509128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7691 "00010000" // /* MW 9 */
+ 7692 "01100100" // /* MW 8 */
+ 7693 "00110010" // /* MW 7 */
+ 7694 "11110001" // /* MW 6 */
+ 7695 "00000001" // /* MW 5 */
+ 7696 "00000000" // /* MW 4 */
+ 7697 "11010000" // /* MW 3 */
+ 7698 "10111010" // /* MW 2 */
+ 7699 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 18
+.src_ref 8 "superkernels.cpp" 481 85
+ 7700 "10111010" // LDA r18, [p2]; MOVXM p2, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7701 "00010000" // /* MW 9 */
+ 7702 "00100010" // /* MW 8 */
+ 7703 "00110010" // /* MW 7 */
+ 7704 "11110001" // /* MW 6 */
+ 7705 "00000001" // /* MW 5 */
+ 7706 "00000000" // /* MW 4 */
+ 7707 "11010000" // /* MW 3 */
+ 7708 "11001010" // /* MW 2 */
+ 7709 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 85
+.src_ref 8 "superkernels.cpp" 482 16
+ 7710 "10111010" // LDA r13, [p2], #4; MOVXM p3, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7711 "00010000" // /* MW 9 */
+ 7712 "01101000" // /* MW 8 */
+ 7713 "10110010" // /* MW 7 */
+ 7714 "11110001" // /* MW 6 */
+ 7715 "00000001" // /* MW 5 */
+ 7716 "00000000" // /* MW 4 */
+ 7717 "11010000" // /* MW 3 */
+ 7718 "10110110" // /* MW 2 */
+ 7719 "01000011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 16
+.src_ref 8 "superkernels.cpp" 482 40 first
+ 7720 "10111010" // LDA el0, [p2, #4]; MOVXM p1, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7721 "00010000" // /* MW 9 */
+ 7722 "01100110" // /* MW 8 */
+ 7723 "10110010" // /* MW 7 */
+ 7724 "11110000" // /* MW 6 */
+ 7725 "00000001" // /* MW 5 */
+ 7726 "00000000" // /* MW 4 */
+ 7727 "11010000" // /* MW 3 */
+ 7728 "10000101" // /* MW 2 */
+ 7729 "01000010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 120 first
+.src_ref 8 "superkernels.cpp" 483 44
+ 7730 "11010100" // LDA r15, [p2]; MOV r16, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7731 "10000001" // /* MW 5 */
+ 7732 "00111001" // /* MW 4 */
+ 7733 "11011000" // /* MW 3 */
+ 7734 "10111110" // /* MW 2 */
+ 7735 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 44
+ 7736 "00011000" // ADD.NC p2, r16, #40 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7737 "00010100" // /* MW 3 */
+ 7738 "01101000" // /* MW 2 */
+ 7739 "00011010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7740 "01000100" // MOVXM p6, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7741 "00000000" // /* MW 5 */
+ 7742 "11001010" // /* MW 4 */
+ 7743 "11001100" // /* MW 3 */
+ 7744 "00000111" // /* MW 2 */
+ 7745 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 13
+ 7746 "01000100" // MOVXM p0, #509160 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7747 "11010000" // /* MW 5 */
+ 7748 "11001001" // /* MW 4 */
+ 7749 "11000000" // /* MW 3 */
+ 7750 "00000111" // /* MW 2 */
+ 7751 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 27
+ 7752 "10011000" // MUL r18, r14, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7753 "00101111" // /* MW 3 */
+ 7754 "10100101" // /* MW 2 */
+ 7755 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7756 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7757 "00000000" // /* MW 5 */
+ 7758 "00100000" // /* MW 4 */
+ 7759 "00001000" // /* MW 3 */
+ 7760 "00000000" // /* MW 2 */
+ 7761 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 61
+.src_ref 8 "superkernels.cpp" 482 16 first
+ 7762 "01011100" // ST el0, [p3]; MUL r18, r13, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7763 "01011111" // /* MW 5 */
+ 7764 "11001010" // /* MW 4 */
+ 7765 "00110110" // /* MW 3 */
+ 7766 "10000101" // /* MW 2 */
+ 7767 "01100000" // /* MW 1 */
+ 7768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7769 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 96 first
+ 7770 "10011000" // MUL r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7771 "00101111" // /* MW 3 */
+ 7772 "11100101" // /* MW 2 */
+ 7773 "00010011" // /* MW 1 */
+ 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7775 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 16
+ 7776 "10011000" // ST r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7777 "01010001" // /* MW 3 */
+ 7778 "00000110" // /* MW 2 */
+ 7779 "00001001" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 15 first
+ 7780 "10011000" // LDA el0, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7781 "00101110" // /* MW 3 */
+ 7782 "01001100" // /* MW 2 */
+ 7783 "00000010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47 first
+ 7784 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7785 "00110001" // /* MW 3 */
+ 7786 "00011110" // /* MW 2 */
+ 7787 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7788 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7789 "00110001" // /* MW 3 */
+ 7790 "00011110" // /* MW 2 */
+ 7791 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7792 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7793 "00110001" // /* MW 3 */
+ 7794 "00011110" // /* MW 2 */
+ 7795 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7796 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7797 "00110001" // /* MW 3 */
+ 7798 "00011110" // /* MW 2 */
+ 7799 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7800 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7801 "00110001" // /* MW 3 */
+ 7802 "00011110" // /* MW 2 */
+ 7803 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7804 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7805 "00110001" // /* MW 3 */
+ 7806 "00011110" // /* MW 2 */
+ 7807 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 13 first
+ 7808 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7809 "00101001" // /* MW 3 */
+ 7810 "00000100" // /* MW 2 */
+ 7811 "00001000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47 first
+ 7812 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7813 "00110001" // /* MW 3 */
+ 7814 "00011110" // /* MW 2 */
+ 7815 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7816 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7817 "00110001" // /* MW 3 */
+ 7818 "00011110" // /* MW 2 */
+ 7819 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7820 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7821 "00110001" // /* MW 3 */
+ 7822 "00011110" // /* MW 2 */
+ 7823 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40 first
+ 7824 "10011000" // LDA r1, [p2], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7825 "00110110" // /* MW 3 */
+ 7826 "11011100" // /* MW 2 */
+ 7827 "00000010" // /* MW 1 */
+ 7828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7829 "00000000" // /* MW 1 */
+ 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7831 "00000000" // /* MW 1 */
+ 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7833 "00000000" // /* MW 1 */
+ 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7835 "00000000" // /* MW 1 */
+ 7836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7837 "00000000" // /* MW 1 */
+ 7838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7839 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7840 "10011000" // GEU r17, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7841 "00001011" // /* MW 3 */
+ 7842 "01100011" // /* MW 2 */
+ 7843 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7844 "10000100" // JNZ r17, #7920 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7920 delay_slots=5 */
+ 7845 "00000001" // /* MW 5 */
+ 7846 "01000000" // /* MW 4 */
+ 7847 "01111000" // /* MW 3 */
+ 7848 "00001111" // /* MW 2 */
+ 7849 "10001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+.delay_slot
+ 7850 "11111000" // MOV r12, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7851 "11000000" // /* MW 3 */
+ 7852 "00011110" // /* MW 2 */
+ 7853 "00011011" // /* MW 1 */
+.delay_slot
+ 7854 "10011000" // ST p2, [sp, #-40] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7855 "00011101" // /* MW 3 */
+ 7856 "11011001" // /* MW 2 */
+ 7857 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7859 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7861 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7863 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.no_stack_arguments
+ 7864 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */
+ 7865 "00000001" // /* MW 5 */
+ 7866 "00000000" // /* MW 4 */
+ 7867 "01010000" // /* MW 3 */
+ 7868 "00010101" // /* MW 2 */
+ 7869 "00000000" // /* MW 1 */
+.delay_slot
+ 7870 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7871 "10010101" // /* MW 3 */
+ 7872 "11011101" // /* MW 2 */
+ 7873 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7875 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7877 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7879 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7880 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7881 "00011100" // /* MW 7 */
+ 7882 "00000000" // /* MW 6 */
+ 7883 "00000000" // /* MW 5 */
+ 7884 "00000100" // /* MW 4 */
+ 7885 "11110000" // /* MW 3 */
+ 7886 "00101100" // /* MW 2 */
+ 7887 "00000000" // /* MW 1 */
+.return_address
+ 7888 "10000100" // J #7984 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7984 delay_slots=5 */
+ 7889 "00000000" // /* MW 5 */
+ 7890 "00000000" // /* MW 4 */
+ 7891 "10011000" // /* MW 3 */
+ 7892 "00001111" // /* MW 2 */
+ 7893 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7894 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7895 "11100000" // /* MW 5 */
+ 7896 "11001001" // /* MW 4 */
+ 7897 "11001110" // /* MW 3 */
+ 7898 "00000111" // /* MW 2 */
+ 7899 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7901 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7903 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7905 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7906 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7907 "00011100" // /* MW 13 */
+ 7908 "00000000" // /* MW 12 */
+ 7909 "00000000" // /* MW 11 */
+ 7910 "01010111" // /* MW 10 */
+ 7911 "00011010" // /* MW 9 */
+ 7912 "01000000" // /* MW 8 */
+ 7913 "00000000" // /* MW 7 */
+ 7914 "00000000" // /* MW 6 */
+ 7915 "10110110" // /* MW 5 */
+ 7916 "00000010" // /* MW 4 */
+ 7917 "11110000" // /* MW 3 */
+ 7918 "00101100" // /* MW 2 */
+ 7919 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384
+.src_ref 8 "superkernels.cpp" 491 40
+.no_stack_arguments
+ 7920 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */
+ 7921 "00000001" // /* MW 5 */
+ 7922 "00000000" // /* MW 4 */
+ 7923 "01010000" // /* MW 3 */
+ 7924 "00010101" // /* MW 2 */
+ 7925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7932 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7933 "01100111" // /* MW 3 */
+ 7934 "00000001" // /* MW 2 */
+ 7935 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7936 "11100001" // NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7937 "00000000" // /* MW 15 */
+ 7938 "00000000" // /* MW 14 */
+ 7939 "01111000" // /* MW 13 */
+ 7940 "10100101" // /* MW 12 */
+ 7941 "00000001" // /* MW 11 */
+ 7942 "00001100" // /* MW 10 */
+ 7943 "00011000" // /* MW 9 */
+ 7944 "00000010" // /* MW 8 */
+ 7945 "01011011" // /* MW 7 */
+ 7946 "00000001" // /* MW 6 */
+ 7947 "00100000" // /* MW 5 */
+ 7948 "00000000" // /* MW 4 */
+ 7949 "11110000" // /* MW 3 */
+ 7950 "00101100" // /* MW 2 */
+ 7951 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.return_address
+.no_stack_arguments
+ 7952 "00000100" // JL #12416 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12416 delay_slots=5 */
+ 7953 "00000001" // /* MW 5 */
+ 7954 "00000000" // /* MW 4 */
+ 7955 "01000000" // /* MW 3 */
+ 7956 "00011000" // /* MW 2 */
+ 7957 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7958 "11111000" // MOV r1, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7959 "00100000" // /* MW 3 */
+ 7960 "01010000" // /* MW 2 */
+ 7961 "00011000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7962 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7963 "11100000" // /* MW 5 */
+ 7964 "11001001" // /* MW 4 */
+ 7965 "11001110" // /* MW 3 */
+ 7966 "00000111" // /* MW 2 */
+ 7967 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7968 "01000100" // MOVXM r2, #1325400064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7969 "00000000" // /* MW 5 */
+ 7970 "00100000" // /* MW 4 */
+ 7971 "00000001" // /* MW 3 */
+ 7972 "00000000" // /* MW 2 */
+ 7973 "01001111" // /* MW 1 */
+.delay_slot
+ 7974 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7975 "10010101" // /* MW 3 */
+ 7976 "11011101" // /* MW 2 */
+ 7977 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7978 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7979 "00100000" // /* MW 5 */
+ 7980 "00000000" // /* MW 4 */
+ 7981 "11110000" // /* MW 3 */
+ 7982 "00101100" // /* MW 2 */
+ 7983 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 491 40
+.return_address
+ 7984 "10111010" // LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7985 "10111000" // /* MW 9 */
+ 7986 "00001000" // /* MW 8 */
+ 7987 "00000000" // /* MW 7 */
+ 7988 "00000000" // /* MW 6 */
+ 7989 "11010010" // /* MW 5 */
+ 7990 "00000010" // /* MW 4 */
+ 7991 "01010000" // /* MW 3 */
+ 7992 "11000000" // /* MW 2 */
+ 7993 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 492 38
+.src_ref 8 "superkernels.cpp" 492 38
+ 7994 "10111010" // MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7995 "01111000" // /* MW 9 */
+ 7996 "01001001" // /* MW 8 */
+ 7997 "00000000" // /* MW 7 */
+ 7998 "00001000" // /* MW 6 */
+ 7999 "10000000" // /* MW 5 */
+ 8000 "00000001" // /* MW 4 */
+ 8001 "10000000" // /* MW 3 */
+ 8002 "01000000" // /* MW 2 */
+ 8003 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 13
+.src_ref 8 "superkernels.cpp" 498 15
+ 8004 "10111010" // LDA p2, [sp, #-40]; MOVXM p3, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8005 "00010000" // /* MW 9 */
+ 8006 "01101010" // /* MW 8 */
+ 8007 "10110010" // /* MW 7 */
+ 8008 "11110001" // /* MW 6 */
+ 8009 "00000001" // /* MW 5 */
+ 8010 "00000000" // /* MW 4 */
+ 8011 "00100000" // /* MW 3 */
+ 8012 "00100011" // /* MW 2 */
+ 8013 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 14
+ 8014 "01000100" // MOVXM p1, #509144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8015 "10110000" // /* MW 5 */
+ 8016 "11001001" // /* MW 4 */
+ 8017 "11000010" // /* MW 3 */
+ 8018 "00000111" // /* MW 2 */
+ 8019 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 7
+.src_ref 8 "superkernels.cpp" 508 7
+.src_ref 8 "superkernels.cpp" 511 7
+ 8020 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8021 "10100000" // /* MW 5 */
+ 8022 "11001001" // /* MW 4 */
+ 8023 "11001110" // /* MW 3 */
+ 8024 "00000111" // /* MW 2 */
+ 8025 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8027 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 38
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 8028 "00011000" // ST.s16 r16, [p6], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8029 "00010111" // /* MW 3 */
+ 8030 "00011110" // /* MW 2 */
+ 8031 "00000110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8032 "00011000" // MOVX crRnd, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8033 "10000000" // /* MW 3 */
+ 8034 "00111010" // /* MW 2 */
+ 8035 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8036 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8037 "00010110" // /* MW 3 */
+ 8038 "01000000" // /* MW 2 */
+ 8039 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8041 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8042 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8043 "00000001" // /* MW 3 */
+ 8044 "00000001" // /* MW 2 */
+ 8045 "00011100" // /* MW 1 */
+ 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8047 "00000000" // /* MW 1 */
+ 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8049 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 492 38 first
+ 8050 "00011000" // ST.s8 r24, [p6], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8051 "00000111" // /* MW 3 */
+ 8052 "00001011" // /* MW 2 */
+ 8053 "00000110" // /* MW 1 */
+ 8054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8055 "00000000" // /* MW 1 */
+ 8056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8057 "00000000" // /* MW 1 */
+ 8058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8059 "00000000" // /* MW 1 */
+ 8060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8061 "00000000" // /* MW 1 */
+ 8062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8063 "00000000" // /* MW 1 */
+ 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8065 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 494 25 first
+ 8066 "10011000" // ST r14, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8067 "11010001" // /* MW 3 */
+ 8068 "00011101" // /* MW 2 */
+ 8069 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 495 24 first
+ 8070 "10011000" // ST r15, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8071 "11110001" // /* MW 3 */
+ 8072 "00000101" // /* MW 2 */
+ 8073 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 496 24 first
+ 8074 "10011000" // ST r13, [p6, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8075 "10110001" // /* MW 3 */
+ 8076 "00010101" // /* MW 2 */
+ 8077 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 15 first
+ 8078 "10011000" // LDA el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8079 "00101110" // /* MW 3 */
+ 8080 "00011100" // /* MW 2 */
+ 8081 "00000010" // /* MW 1 */
+ 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8083 "00000000" // /* MW 1 */
+ 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8085 "00000000" // /* MW 1 */
+ 8086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8087 "00000000" // /* MW 1 */
+ 8088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8089 "00000000" // /* MW 1 */
+ 8090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8091 "00000000" // /* MW 1 */
+ 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8093 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 13
+ 8094 "10011000" // ST el0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8095 "00101001" // /* MW 3 */
+ 8096 "00000100" // /* MW 2 */
+ 8097 "00001011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 16 first
+ 8098 "10011000" // LDA el0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8099 "00101110" // /* MW 3 */
+ 8100 "00000100" // /* MW 2 */
+ 8101 "00000010" // /* MW 1 */
+ 8102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8103 "00000000" // /* MW 1 */
+ 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8105 "00000000" // /* MW 1 */
+ 8106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8107 "00000000" // /* MW 1 */
+ 8108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8109 "00000000" // /* MW 1 */
+ 8110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8111 "00000000" // /* MW 1 */
+ 8112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8113 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 14
+ 8114 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8115 "00101001" // /* MW 3 */
+ 8116 "00000100" // /* MW 2 */
+ 8117 "00001001" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 15 first
+ 8118 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8119 "00101110" // /* MW 3 */
+ 8120 "00010100" // /* MW 2 */
+ 8121 "00000010" // /* MW 1 */
+ 8122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8123 "00000000" // /* MW 1 */
+ 8124 "10000100" // J #8176 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8176 delay_slots=5 */
+ 8125 "00000000" // /* MW 5 */
+ 8126 "00000000" // /* MW 4 */
+ 8127 "11111000" // /* MW 3 */
+ 8128 "00001111" // /* MW 2 */
+ 8129 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 13
+.delay_slot
+ 8130 "01000100" // MOVXM p0, #509148 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8131 "10111000" // /* MW 5 */
+ 8132 "11001001" // /* MW 4 */
+ 8133 "11000000" // /* MW 3 */
+ 8134 "00000111" // /* MW 2 */
+ 8135 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8137 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8139 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8140 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8141 "01100111" // /* MW 3 */
+ 8142 "00000001" // /* MW 2 */
+ 8143 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 13
+.delay_slot
+ 8144 "11100001" // NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8145 "00000000" // /* MW 15 */
+ 8146 "00000000" // /* MW 14 */
+ 8147 "01111000" // /* MW 13 */
+ 8148 "10100101" // /* MW 12 */
+ 8149 "00000001" // /* MW 11 */
+ 8150 "00000000" // /* MW 10 */
+ 8151 "00000000" // /* MW 9 */
+ 8152 "10000000" // /* MW 8 */
+ 8153 "00101001" // /* MW 7 */
+ 8154 "00000100" // /* MW 6 */
+ 8155 "00100000" // /* MW 5 */
+ 8156 "00000000" // /* MW 4 */
+ 8157 "11110000" // /* MW 3 */
+ 8158 "00101100" // /* MW 2 */
+ 8159 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624
+.src_ref 8 "superkernels.cpp" 505 7
+.src_ref 8 "superkernels.cpp" 508 7
+.src_ref 8 "superkernels.cpp" 511 7
+ 8160 "00111010" // ST p2, [sp, #-36]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8161 "00010001" // /* MW 9 */
+ 8162 "01101000" // /* MW 8 */
+ 8163 "10110010" // /* MW 7 */
+ 8164 "11110011" // /* MW 6 */
+ 8165 "00000001" // /* MW 5 */
+ 8166 "00000000" // /* MW 4 */
+ 8167 "10110000" // /* MW 3 */
+ 8168 "10100011" // /* MW 2 */
+ 8169 "11111011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+ 8170 "11010100" // NOPA; MOV r12, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8171 "10000001" // /* MW 5 */
+ 8172 "00101001" // /* MW 4 */
+ 8173 "11110110" // /* MW 3 */
+ 8174 "00101100" // /* MW 2 */
+ 8175 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640
+.src_ref 8 "superkernels.cpp" 505 7 first
+.src_ref 8 "superkernels.cpp" 505 19
+ 8176 "00101100" // LDA r16, [p7]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8177 "00001010" // /* MW 5 */
+ 8178 "01000100" // /* MW 4 */
+ 8179 "11010000" // /* MW 3 */
+ 8180 "11000010" // /* MW 2 */
+ 8181 "11100000" // /* MW 1 */
+ 8182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8183 "00000000" // /* MW 1 */
+ 8184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8185 "00000000" // /* MW 1 */
+ 8186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8187 "00000000" // /* MW 1 */
+ 8188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8189 "00000000" // /* MW 1 */
+ 8190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8191 "00000000" // /* MW 1 */
+ 8192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8193 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 19
+ 8194 "10011000" // NE r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8195 "00001000" // /* MW 3 */
+ 8196 "01100011" // /* MW 2 */
+ 8197 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 25
+ 8198 "10000100" // JNZ r17, #8368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8368 delay_slots=5 */
+ 8199 "00000001" // /* MW 5 */
+ 8200 "01000000" // /* MW 4 */
+ 8201 "01011000" // /* MW 3 */
+ 8202 "00010000" // /* MW 2 */
+ 8203 "10001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.delay_slot
+ 8204 "00011000" // ADD.NC p6, r12, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8205 "00000110" // /* MW 3 */
+ 8206 "01100110" // /* MW 2 */
+ 8207 "00011110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8209 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8211 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8213 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8215 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 29
+ 8216 "01000100" // MOVXM p2, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8217 "10001000" // /* MW 5 */
+ 8218 "11001001" // /* MW 4 */
+ 8219 "11000100" // /* MW 3 */
+ 8220 "00000111" // /* MW 2 */
+ 8221 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 29 first
+.src_ref 8 "superkernels.cpp" 505 65
+ 8222 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8223 "00010000" // /* MW 9 */
+ 8224 "00110000" // /* MW 8 */
+ 8225 "00110010" // /* MW 7 */
+ 8226 "11110001" // /* MW 6 */
+ 8227 "00000001" // /* MW 5 */
+ 8228 "00000000" // /* MW 4 */
+ 8229 "11010000" // /* MW 3 */
+ 8230 "11000010" // /* MW 2 */
+ 8231 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 65
+ 8232 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8233 "00111010" // /* MW 3 */
+ 8234 "00000100" // /* MW 2 */
+ 8235 "00000010" // /* MW 1 */
+ 8236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8237 "00000000" // /* MW 1 */
+ 8238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8239 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.no_stack_arguments
+ 8240 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8241 "00000001" // /* MW 5 */
+ 8242 "00000000" // /* MW 4 */
+ 8243 "11111000" // /* MW 3 */
+ 8244 "00010011" // /* MW 2 */
+ 8245 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8246 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8247 "00000001" // /* MW 3 */
+ 8248 "00011010" // /* MW 2 */
+ 8249 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8251 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8252 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8253 "11011010" // /* MW 3 */
+ 8254 "00110110" // /* MW 2 */
+ 8255 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8256 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8257 "01000001" // /* MW 5 */
+ 8258 "10111011" // /* MW 4 */
+ 8259 "00110111" // /* MW 3 */
+ 8260 "01100000" // /* MW 2 */
+ 8261 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8262 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8263 "00010010" // /* MW 9 */
+ 8264 "00000001" // /* MW 8 */
+ 8265 "00000100" // /* MW 7 */
+ 8266 "00000000" // /* MW 6 */
+ 8267 "01011011" // /* MW 5 */
+ 8268 "00000001" // /* MW 4 */
+ 8269 "11110000" // /* MW 3 */
+ 8270 "00101100" // /* MW 2 */
+ 8271 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.src_ref 8 "superkernels.cpp" 505 41
+.return_address
+ 8272 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8273 "01000001" // /* MW 5 */
+ 8274 "10101111" // /* MW 4 */
+ 8275 "00111101" // /* MW 3 */
+ 8276 "00000110" // /* MW 2 */
+ 8277 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+ 8278 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8279 "00000010" // /* MW 3 */
+ 8280 "11100001" // /* MW 2 */
+ 8281 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 6
+.src_ref 8 "superkernels.cpp" 505 76
+ 8282 "10000100" // JNZ r16, #8352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8352 delay_slots=5 */
+ 8283 "00000001" // /* MW 5 */
+ 8284 "01000000" // /* MW 4 */
+ 8285 "01010000" // /* MW 3 */
+ 8286 "00010000" // /* MW 2 */
+ 8287 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8289 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8293 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8295 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8297 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8298 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8299 "10000001" // /* MW 5 */
+ 8300 "11011001" // /* MW 4 */
+ 8301 "10100100" // /* MW 3 */
+ 8302 "00011111" // /* MW 2 */
+ 8303 "11111100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8304 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8305 "01110110" // /* MW 3 */
+ 8306 "11111111" // /* MW 2 */
+ 8307 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8308 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8309 "00110110" // /* MW 3 */
+ 8310 "11111110" // /* MW 2 */
+ 8311 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8312 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8313 "01010110" // /* MW 3 */
+ 8314 "11111110" // /* MW 2 */
+ 8315 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8317 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 8318 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8319 "00110110" // /* MW 3 */
+ 8320 "01000110" // /* MW 2 */
+ 8321 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8323 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8325 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8327 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8329 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8330 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8331 "00010010" // /* MW 3 */
+ 8332 "10100011" // /* MW 2 */
+ 8333 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8334 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8335 "00110001" // /* MW 3 */
+ 8336 "00000110" // /* MW 2 */
+ 8337 "00001010" // /* MW 1 */
+ 8338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8339 "00000000" // /* MW 1 */
+ 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8341 "00000000" // /* MW 1 */
+ 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8343 "00000000" // /* MW 1 */
+ 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8345 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8346 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8347 "00010000" // /* MW 5 */
+ 8348 "10100110" // /* MW 4 */
+ 8349 "11111000" // /* MW 3 */
+ 8350 "00101100" // /* MW 2 */
+ 8351 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816
+ 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8353 "00000000" // /* MW 1 */
+ 8354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8355 "00000000" // /* MW 1 */
+ 8356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8357 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 7 first
+ 8358 "10111010" // LDA r16, [p7]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8359 "01111110" // /* MW 9 */
+ 8360 "10100101" // /* MW 8 */
+ 8361 "00000001" // /* MW 7 */
+ 8362 "00000000" // /* MW 6 */
+ 8363 "00010000" // /* MW 5 */
+ 8364 "00000000" // /* MW 4 */
+ 8365 "11010000" // /* MW 3 */
+ 8366 "11000010" // /* MW 2 */
+ 8367 "11100000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832
+.src_ref 8 "superkernels.cpp" 508 19
+.src_ref 8 "superkernels.cpp" 522 6
+.src_ref 8 "superkernels.cpp" 558 19
+ 8368 "00011000" // MOVX r14, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8369 "00001001" // /* MW 3 */
+ 8370 "00011100" // /* MW 2 */
+ 8371 "00010000" // /* MW 1 */
+ 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8373 "00000000" // /* MW 1 */
+ 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8375 "00000000" // /* MW 1 */
+ 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8377 "00000000" // /* MW 1 */
+ 8378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8379 "00000000" // /* MW 1 */
+ 8380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8381 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 19
+ 8382 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8383 "00001000" // /* MW 3 */
+ 8384 "10100001" // /* MW 2 */
+ 8385 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 25
+ 8386 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */
+ 8387 "00000001" // /* MW 5 */
+ 8388 "01000000" // /* MW 4 */
+ 8389 "10110000" // /* MW 3 */
+ 8390 "00010000" // /* MW 2 */
+ 8391 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8395 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8397 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8399 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8401 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 29
+ 8402 "01000100" // MOVXM p2, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8403 "11000000" // /* MW 5 */
+ 8404 "11001001" // /* MW 4 */
+ 8405 "11000100" // /* MW 3 */
+ 8406 "00000111" // /* MW 2 */
+ 8407 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 29
+.src_ref 8 "superkernels.cpp" 508 65
+ 8408 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8409 "00010000" // /* MW 9 */
+ 8410 "00110000" // /* MW 8 */
+ 8411 "00110010" // /* MW 7 */
+ 8412 "11110001" // /* MW 6 */
+ 8413 "00000001" // /* MW 5 */
+ 8414 "00000000" // /* MW 4 */
+ 8415 "11010000" // /* MW 3 */
+ 8416 "11000010" // /* MW 2 */
+ 8417 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 65
+ 8418 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8419 "00111010" // /* MW 3 */
+ 8420 "00000100" // /* MW 2 */
+ 8421 "00000010" // /* MW 1 */
+ 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8423 "00000000" // /* MW 1 */
+ 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8425 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.no_stack_arguments
+ 8426 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8427 "00000001" // /* MW 5 */
+ 8428 "00000000" // /* MW 4 */
+ 8429 "11111000" // /* MW 3 */
+ 8430 "00010011" // /* MW 2 */
+ 8431 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8432 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8433 "00000001" // /* MW 3 */
+ 8434 "00011010" // /* MW 2 */
+ 8435 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8437 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8438 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8439 "11011010" // /* MW 3 */
+ 8440 "00110110" // /* MW 2 */
+ 8441 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8442 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8443 "01000001" // /* MW 5 */
+ 8444 "10111011" // /* MW 4 */
+ 8445 "00110111" // /* MW 3 */
+ 8446 "01100000" // /* MW 2 */
+ 8447 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8448 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8449 "00000000" // /* MW 15 */
+ 8450 "00000000" // /* MW 14 */
+ 8451 "01111000" // /* MW 13 */
+ 8452 "10100101" // /* MW 12 */
+ 8453 "00000001" // /* MW 11 */
+ 8454 "10010000" // /* MW 10 */
+ 8455 "00001000" // /* MW 9 */
+ 8456 "00100000" // /* MW 8 */
+ 8457 "01011011" // /* MW 7 */
+ 8458 "00000001" // /* MW 6 */
+ 8459 "00100000" // /* MW 5 */
+ 8460 "00000000" // /* MW 4 */
+ 8461 "11110000" // /* MW 3 */
+ 8462 "00101100" // /* MW 2 */
+ 8463 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.src_ref 8 "superkernels.cpp" 508 41
+.return_address
+ 8464 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8465 "01000001" // /* MW 5 */
+ 8466 "10101111" // /* MW 4 */
+ 8467 "00111101" // /* MW 3 */
+ 8468 "00000110" // /* MW 2 */
+ 8469 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+ 8470 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8471 "00000010" // /* MW 3 */
+ 8472 "11100001" // /* MW 2 */
+ 8473 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 6
+.src_ref 8 "superkernels.cpp" 508 76
+ 8474 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */
+ 8475 "00000001" // /* MW 5 */
+ 8476 "01000000" // /* MW 4 */
+ 8477 "10110000" // /* MW 3 */
+ 8478 "00010000" // /* MW 2 */
+ 8479 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8483 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8485 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8487 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8489 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8490 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8491 "10000001" // /* MW 5 */
+ 8492 "11011001" // /* MW 4 */
+ 8493 "10100100" // /* MW 3 */
+ 8494 "00011111" // /* MW 2 */
+ 8495 "11111100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8496 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8497 "01110110" // /* MW 3 */
+ 8498 "11111111" // /* MW 2 */
+ 8499 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8500 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8501 "00110110" // /* MW 3 */
+ 8502 "11111110" // /* MW 2 */
+ 8503 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8504 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8505 "01010110" // /* MW 3 */
+ 8506 "11111110" // /* MW 2 */
+ 8507 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 8508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8509 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 8510 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8511 "00110110" // /* MW 3 */
+ 8512 "01000110" // /* MW 2 */
+ 8513 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8515 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8517 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8519 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8521 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8522 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8523 "00010010" // /* MW 3 */
+ 8524 "10100011" // /* MW 2 */
+ 8525 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8526 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8527 "00110001" // /* MW 3 */
+ 8528 "00000110" // /* MW 2 */
+ 8529 "00001010" // /* MW 1 */
+ 8530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8531 "00000000" // /* MW 1 */
+ 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8533 "00000000" // /* MW 1 */
+ 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8535 "00000000" // /* MW 1 */
+ 8536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8537 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8538 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8539 "00010000" // /* MW 5 */
+ 8540 "10100110" // /* MW 4 */
+ 8541 "11111000" // /* MW 3 */
+ 8542 "00101100" // /* MW 2 */
+ 8543 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008
+ 8544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8545 "00000000" // /* MW 1 */
+ 8546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8547 "00000000" // /* MW 1 */
+ 8548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8549 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 7 first
+.src_ref 8 "superkernels.cpp" 511 29
+ 8550 "10111010" // LDA r16, [p7]; MOVXM p7, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8551 "00010000" // /* MW 9 */
+ 8552 "01110010" // /* MW 8 */
+ 8553 "10110010" // /* MW 7 */
+ 8554 "11110011" // /* MW 6 */
+ 8555 "00000001" // /* MW 5 */
+ 8556 "00000000" // /* MW 4 */
+ 8557 "11010000" // /* MW 3 */
+ 8558 "11000010" // /* MW 2 */
+ 8559 "11100000" // /* MW 1 */
+ 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8561 "00000000" // /* MW 1 */
+ 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8563 "00000000" // /* MW 1 */
+ 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8565 "00000000" // /* MW 1 */
+ 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8567 "00000000" // /* MW 1 */
+ 8568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8569 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 19
+ 8570 "00011000" // MOVX r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8571 "00010001" // /* MW 3 */
+ 8572 "00100100" // /* MW 2 */
+ 8573 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 19
+ 8574 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8575 "00001000" // /* MW 3 */
+ 8576 "10100001" // /* MW 2 */
+ 8577 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 25
+ 8578 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */
+ 8579 "00000001" // /* MW 5 */
+ 8580 "01000000" // /* MW 4 */
+ 8581 "00100000" // /* MW 3 */
+ 8582 "00010001" // /* MW 2 */
+ 8583 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 66
+.delay_slot
+ 8584 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8585 "11000000" // /* MW 5 */
+ 8586 "11001000" // /* MW 4 */
+ 8587 "11000100" // /* MW 3 */
+ 8588 "00000111" // /* MW 2 */
+ 8589 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8591 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8593 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8595 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8596 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8597 "00000001" // /* MW 3 */
+ 8598 "00100010" // /* MW 2 */
+ 8599 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 29
+.src_ref 8 "superkernels.cpp" 511 42
+ 8600 "00101100" // LDA r16, [p7]; MOVX r13, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8601 "00000010" // /* MW 5 */
+ 8602 "00110100" // /* MW 4 */
+ 8603 "11010000" // /* MW 3 */
+ 8604 "11000010" // /* MW 2 */
+ 8605 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 66
+ 8606 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8607 "00111010" // /* MW 3 */
+ 8608 "00000100" // /* MW 2 */
+ 8609 "00000010" // /* MW 1 */
+ 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8611 "00000000" // /* MW 1 */
+ 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8613 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.no_stack_arguments
+ 8614 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8615 "00000001" // /* MW 5 */
+ 8616 "00000000" // /* MW 4 */
+ 8617 "11111000" // /* MW 3 */
+ 8618 "00010011" // /* MW 2 */
+ 8619 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8621 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8623 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8624 "10011000" // LT r27, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8625 "00011010" // /* MW 3 */
+ 8626 "00110111" // /* MW 2 */
+ 8627 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8628 "11100100" // SUB r17, r17, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8629 "01000001" // /* MW 5 */
+ 8630 "10111011" // /* MW 4 */
+ 8631 "00110111" // /* MW 3 */
+ 8632 "01100000" // /* MW 2 */
+ 8633 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8634 "00101100" // NOPA; SEL.EQZ r0, r16, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8635 "00100100" // /* MW 5 */
+ 8636 "00000010" // /* MW 4 */
+ 8637 "11111000" // /* MW 3 */
+ 8638 "00101100" // /* MW 2 */
+ 8639 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.src_ref 8 "superkernels.cpp" 511 42
+.return_address
+ 8640 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8641 "01000001" // /* MW 5 */
+ 8642 "10101111" // /* MW 4 */
+ 8643 "00111101" // /* MW 3 */
+ 8644 "00000110" // /* MW 2 */
+ 8645 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+ 8646 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8647 "00000010" // /* MW 3 */
+ 8648 "11100001" // /* MW 2 */
+ 8649 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 6
+.src_ref 8 "superkernels.cpp" 511 77
+ 8650 "10000100" // JNZ r16, #8736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8736 delay_slots=5 */
+ 8651 "00000001" // /* MW 5 */
+ 8652 "01000000" // /* MW 4 */
+ 8653 "00010000" // /* MW 3 */
+ 8654 "00010001" // /* MW 2 */
+ 8655 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8659 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8661 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8663 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8665 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8666 "10111010" // LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8667 "01011000" // /* MW 9 */
+ 8668 "00000001" // /* MW 8 */
+ 8669 "00001000" // /* MW 7 */
+ 8670 "11101010" // /* MW 6 */
+ 8671 "00010111" // /* MW 5 */
+ 8672 "00111111" // /* MW 4 */
+ 8673 "11010000" // /* MW 3 */
+ 8674 "11101110" // /* MW 2 */
+ 8675 "11011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8676 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8677 "01010110" // /* MW 3 */
+ 8678 "11111110" // /* MW 2 */
+ 8679 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8680 "10011000" // LDA r19, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8681 "01110110" // /* MW 3 */
+ 8682 "11111110" // /* MW 2 */
+ 8683 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 8684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8685 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 8686 "10011000" // LDA r18, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8687 "01010110" // /* MW 3 */
+ 8688 "01000110" // /* MW 2 */
+ 8689 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8691 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8693 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8695 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8697 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8698 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8699 "00100010" // /* MW 3 */
+ 8700 "11100101" // /* MW 2 */
+ 8701 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8702 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8703 "01010001" // /* MW 3 */
+ 8704 "00000110" // /* MW 2 */
+ 8705 "00001110" // /* MW 1 */
+ 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8707 "00000000" // /* MW 1 */
+ 8708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8709 "00000000" // /* MW 1 */
+ 8710 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */
+ 8711 "00000000" // /* MW 5 */
+ 8712 "00000000" // /* MW 4 */
+ 8713 "00101000" // /* MW 3 */
+ 8714 "00010001" // /* MW 2 */
+ 8715 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8717 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+.delay_slot
+ 8718 "00011000" // ACQ r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8719 "00011000" // /* MW 3 */
+ 8720 "10010011" // /* MW 2 */
+ 8721 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8723 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8726 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8727 "01111110" // /* MW 9 */
+ 8728 "10100101" // /* MW 8 */
+ 8729 "00000001" // /* MW 7 */
+ 8730 "00000000" // /* MW 6 */
+ 8731 "00010000" // /* MW 5 */
+ 8732 "00000000" // /* MW 4 */
+ 8733 "11110000" // /* MW 3 */
+ 8734 "00101100" // /* MW 2 */
+ 8735 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200
+ 8736 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */
+ 8737 "00000000" // /* MW 5 */
+ 8738 "00000000" // /* MW 4 */
+ 8739 "00101000" // /* MW 3 */
+ 8740 "00010001" // /* MW 2 */
+ 8741 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+.delay_slot
+ 8742 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8743 "00000101" // /* MW 3 */
+ 8744 "00100000" // /* MW 2 */
+ 8745 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8747 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8749 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8751 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8753 "00000000" // /* MW 15 */
+ 8754 "00000000" // /* MW 14 */
+ 8755 "01111000" // /* MW 13 */
+ 8756 "10100101" // /* MW 12 */
+ 8757 "00000001" // /* MW 11 */
+ 8758 "00000000" // /* MW 10 */
+ 8759 "00000000" // /* MW 9 */
+ 8760 "00000000" // /* MW 8 */
+ 8761 "01011011" // /* MW 7 */
+ 8762 "00000001" // /* MW 6 */
+ 8763 "00100000" // /* MW 5 */
+ 8764 "00000000" // /* MW 4 */
+ 8765 "11110000" // /* MW 3 */
+ 8766 "00101100" // /* MW 2 */
+ 8767 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+ 8768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8769 "00000000" // /* MW 15 */
+ 8770 "00000000" // /* MW 14 */
+ 8771 "01111000" // /* MW 13 */
+ 8772 "10100101" // /* MW 12 */
+ 8773 "00000001" // /* MW 11 */
+ 8774 "00101000" // /* MW 10 */
+ 8775 "00000000" // /* MW 9 */
+ 8776 "00000001" // /* MW 8 */
+ 8777 "01011011" // /* MW 7 */
+ 8778 "00000001" // /* MW 6 */
+ 8779 "00100000" // /* MW 5 */
+ 8780 "00000000" // /* MW 4 */
+ 8781 "11110000" // /* MW 3 */
+ 8782 "00101100" // /* MW 2 */
+ 8783 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248
+.src_ref 8 "superkernels.cpp" 516 47
+.src_ref 1 "io_buffer_main.h" 125 25
+ 8784 "10111010" // LDA p7, [sp, #-32]; MOVXM p6, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8785 "00010000" // /* MW 9 */
+ 8786 "01100110" // /* MW 8 */
+ 8787 "00110010" // /* MW 7 */
+ 8788 "11110011" // /* MW 6 */
+ 8789 "00000001" // /* MW 5 */
+ 8790 "00000000" // /* MW 4 */
+ 8791 "00100000" // /* MW 3 */
+ 8792 "01110011" // /* MW 2 */
+ 8793 "11111100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 47 first
+.src_ref 8 "superkernels.cpp" 522 6
+ 8794 "10111010" // LDA r21, [p6]; MOVXM p2, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8795 "00010000" // /* MW 9 */
+ 8796 "01101000" // /* MW 8 */
+ 8797 "00110010" // /* MW 7 */
+ 8798 "11110001" // /* MW 6 */
+ 8799 "00000001" // /* MW 5 */
+ 8800 "00000000" // /* MW 4 */
+ 8801 "11010000" // /* MW 3 */
+ 8802 "11010110" // /* MW 2 */
+ 8803 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8804 "10111010" // LDA r17, [p2]; MOVXM p6, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8805 "00010000" // /* MW 9 */
+ 8806 "01100000" // /* MW 8 */
+ 8807 "00110010" // /* MW 7 */
+ 8808 "11110011" // /* MW 6 */
+ 8809 "00000001" // /* MW 5 */
+ 8810 "00000000" // /* MW 4 */
+ 8811 "11010000" // /* MW 3 */
+ 8812 "11000110" // /* MW 2 */
+ 8813 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2 first
+ 8814 "10011000" // LDA r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8815 "10010110" // /* MW 3 */
+ 8816 "00000110" // /* MW 2 */
+ 8817 "00000110" // /* MW 1 */
+ 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8819 "00000000" // /* MW 1 */
+ 8820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8821 "00000000" // /* MW 1 */
+ 8822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8823 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 8824 "10011000" // LDA r19, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8825 "01110110" // /* MW 3 */
+ 8826 "00000110" // /* MW 2 */
+ 8827 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45 first
+ 8828 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8829 "00001101" // /* MW 3 */
+ 8830 "01101011" // /* MW 2 */
+ 8831 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8832 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8833 "00000111" // /* MW 3 */
+ 8834 "01100001" // /* MW 2 */
+ 8835 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8836 "10000100" // JNZ r16, #9232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9232 delay_slots=5 */
+ 8837 "00000001" // /* MW 5 */
+ 8838 "01000000" // /* MW 4 */
+ 8839 "00001000" // /* MW 3 */
+ 8840 "00010010" // /* MW 2 */
+ 8841 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2 first
+.delay_slot
+ 8842 "00011000" // ADD r20, r20, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8843 "00000111" // /* MW 3 */
+ 8844 "00101000" // /* MW 2 */
+ 8845 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2
+.delay_slot
+ 8846 "10011000" // ST r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8847 "10010001" // /* MW 3 */
+ 8848 "00000110" // /* MW 2 */
+ 8849 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8851 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45 first
+.delay_slot
+ 8852 "01011000" // ADD.NC p0, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8853 "11010101" // /* MW 3 */
+ 8854 "01101001" // /* MW 2 */
+ 8855 "00011000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 12
+.src_ref 8 "superkernels.cpp" 522 6
+.delay_slot
+ 8856 "01011100" // ST p0, [sp, #-68]; MOVX r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8857 "00100010" // /* MW 5 */
+ 8858 "01001000" // /* MW 4 */
+ 8859 "10110000" // /* MW 3 */
+ 8860 "10000011" // /* MW 2 */
+ 8861 "11110111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8862 "10011000" // EQ r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8863 "00100111" // /* MW 3 */
+ 8864 "01100001" // /* MW 2 */
+ 8865 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8866 "10000100" // JNZ r16, #9088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9088 delay_slots=5 */
+ 8867 "00000001" // /* MW 5 */
+ 8868 "01000000" // /* MW 4 */
+ 8869 "11000000" // /* MW 3 */
+ 8870 "00010001" // /* MW 2 */
+ 8871 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8873 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8875 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8877 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8879 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8881 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8882 "10011000" // NE r16, r17, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8883 "11101000" // /* MW 3 */
+ 8884 "01100000" // /* MW 2 */
+ 8885 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8886 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */
+ 8887 "00000001" // /* MW 5 */
+ 8888 "01000000" // /* MW 4 */
+ 8889 "10101000" // /* MW 3 */
+ 8890 "00010001" // /* MW 2 */
+ 8891 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 26
+.delay_slot
+ 8892 "01000100" // MOVXM p6, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8893 "11000000" // /* MW 5 */
+ 8894 "11001001" // /* MW 4 */
+ 8895 "11001100" // /* MW 3 */
+ 8896 "00000111" // /* MW 2 */
+ 8897 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8899 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8901 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8903 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8905 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 26 first
+.src_ref 8 "superkernels.cpp" 523 61
+ 8906 "10111010" // LDA r18, [p6]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8907 "00010000" // /* MW 9 */
+ 8908 "00100100" // /* MW 8 */
+ 8909 "00110010" // /* MW 7 */
+ 8910 "11110011" // /* MW 6 */
+ 8911 "00000001" // /* MW 5 */
+ 8912 "00000000" // /* MW 4 */
+ 8913 "11010000" // /* MW 3 */
+ 8914 "11001010" // /* MW 2 */
+ 8915 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 61
+.src_ref 8 "superkernels.cpp" 524 44
+ 8916 "10111010" // LDA r16, [p6]; MOVXM p6, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8917 "00010000" // /* MW 9 */
+ 8918 "01101010" // /* MW 8 */
+ 8919 "00110010" // /* MW 7 */
+ 8920 "11110011" // /* MW 6 */
+ 8921 "00000001" // /* MW 5 */
+ 8922 "00000000" // /* MW 4 */
+ 8923 "11010000" // /* MW 3 */
+ 8924 "11000010" // /* MW 2 */
+ 8925 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+.src_ref 8 "superkernels.cpp" 524 44 first
+ 8926 "00101100" // LDA r17, [p6]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8927 "00000010" // /* MW 5 */
+ 8928 "01100000" // /* MW 4 */
+ 8929 "11010000" // /* MW 3 */
+ 8930 "11000110" // /* MW 2 */
+ 8931 "11000000" // /* MW 1 */
+ 8932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8933 "00000000" // /* MW 1 */
+ 8934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8935 "00000000" // /* MW 1 */
+ 8936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8937 "00000000" // /* MW 1 */
+ 8938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8939 "00000000" // /* MW 1 */
+ 8940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8941 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 37 first
+ 8942 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8943 "00001111" // /* MW 3 */
+ 8944 "10100101" // /* MW 2 */
+ 8945 "00010100" // /* MW 1 */
+ 8946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8947 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30 first
+.src_ref 8 "superkernels.cpp" 524 30 first
+ 8948 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8949 "10000010" // /* MW 5 */
+ 8950 "00110010" // /* MW 4 */
+ 8951 "00111010" // /* MW 3 */
+ 8952 "11100100" // /* MW 2 */
+ 8953 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8954 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8955 "00011100" // /* MW 3 */
+ 8956 "00110111" // /* MW 2 */
+ 8957 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8958 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8959 "00000010" // /* MW 3 */
+ 8960 "11100111" // /* MW 2 */
+ 8961 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 42
+ 8962 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8963 "00011100" // /* MW 3 */
+ 8964 "10110111" // /* MW 2 */
+ 8965 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8966 "00011000" // SEL.EQZ r17, r24, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8967 "00110010" // /* MW 3 */
+ 8968 "00100011" // /* MW 2 */
+ 8969 "00010110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 65 first
+ 8970 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8971 "00010001" // /* MW 3 */
+ 8972 "00100101" // /* MW 2 */
+ 8973 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 526 36 first
+ 8974 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8975 "00001000" // /* MW 3 */
+ 8976 "01100001" // /* MW 2 */
+ 8977 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 8978 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */
+ 8979 "00000001" // /* MW 5 */
+ 8980 "01000000" // /* MW 4 */
+ 8981 "01000000" // /* MW 3 */
+ 8982 "00010010" // /* MW 2 */
+ 8983 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 32
+.delay_slot
+ 8984 "01000100" // MOVXM p6, #509200 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8985 "00100000" // /* MW 5 */
+ 8986 "11001010" // /* MW 4 */
+ 8987 "11001100" // /* MW 3 */
+ 8988 "00000111" // /* MW 2 */
+ 8989 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 32 first
+.delay_slot
+ 8990 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8991 "01010001" // /* MW 3 */
+ 8992 "00000110" // /* MW 2 */
+ 8993 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8995 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8997 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8999 "00000000" // /* MW 1 */
+ 9000 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */
+ 9001 "00000000" // /* MW 5 */
+ 9002 "00000000" // /* MW 4 */
+ 9003 "11111000" // /* MW 3 */
+ 9004 "00010001" // /* MW 2 */
+ 9005 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.src_ref 8 "superkernels.cpp" 558 19
+.delay_slot
+ 9006 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9007 "00010000" // /* MW 9 */
+ 9008 "01101000" // /* MW 8 */
+ 9009 "10110010" // /* MW 7 */
+ 9010 "11110011" // /* MW 6 */
+ 9011 "00000001" // /* MW 5 */
+ 9012 "00000000" // /* MW 4 */
+ 9013 "00000000" // /* MW 3 */
+ 9014 "01001110" // /* MW 2 */
+ 9015 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9016 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9017 "00010000" // /* MW 9 */
+ 9018 "00100000" // /* MW 8 */
+ 9019 "00110010" // /* MW 7 */
+ 9020 "11110001" // /* MW 6 */
+ 9021 "00000001" // /* MW 5 */
+ 9022 "00000000" // /* MW 4 */
+ 9023 "00000000" // /* MW 3 */
+ 9024 "00101111" // /* MW 2 */
+ 9025 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9026 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9027 "00000001" // /* MW 3 */
+ 9028 "00011010" // /* MW 2 */
+ 9029 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9031 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9032 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9033 "00011100" // /* MW 7 */
+ 9034 "00000000" // /* MW 6 */
+ 9035 "00000000" // /* MW 5 */
+ 9036 "00000100" // /* MW 4 */
+ 9037 "11110000" // /* MW 3 */
+ 9038 "00101100" // /* MW 2 */
+ 9039 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504
+ 9040 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */
+ 9041 "00000000" // /* MW 5 */
+ 9042 "00000000" // /* MW 4 */
+ 9043 "11111000" // /* MW 3 */
+ 9044 "00010001" // /* MW 2 */
+ 9045 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.src_ref 8 "superkernels.cpp" 558 19
+.delay_slot
+ 9046 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9047 "00010000" // /* MW 9 */
+ 9048 "01101000" // /* MW 8 */
+ 9049 "10110010" // /* MW 7 */
+ 9050 "11110011" // /* MW 6 */
+ 9051 "00000001" // /* MW 5 */
+ 9052 "00000000" // /* MW 4 */
+ 9053 "00000000" // /* MW 3 */
+ 9054 "01001110" // /* MW 2 */
+ 9055 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9056 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9057 "00010000" // /* MW 9 */
+ 9058 "00100000" // /* MW 8 */
+ 9059 "00110010" // /* MW 7 */
+ 9060 "11110001" // /* MW 6 */
+ 9061 "00000001" // /* MW 5 */
+ 9062 "00000000" // /* MW 4 */
+ 9063 "00000000" // /* MW 3 */
+ 9064 "00101111" // /* MW 2 */
+ 9065 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9066 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9067 "00000001" // /* MW 3 */
+ 9068 "00011010" // /* MW 2 */
+ 9069 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9073 "00000000" // /* MW 15 */
+ 9074 "00000000" // /* MW 14 */
+ 9075 "01111000" // /* MW 13 */
+ 9076 "10100101" // /* MW 12 */
+ 9077 "00000001" // /* MW 11 */
+ 9078 "00000000" // /* MW 10 */
+ 9079 "00000000" // /* MW 9 */
+ 9080 "00000000" // /* MW 8 */
+ 9081 "01011011" // /* MW 7 */
+ 9082 "00000001" // /* MW 6 */
+ 9083 "00100000" // /* MW 5 */
+ 9084 "00000000" // /* MW 4 */
+ 9085 "11110000" // /* MW 3 */
+ 9086 "00101100" // /* MW 2 */
+ 9087 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552
+.src_ref 8 "superkernels.cpp" 532 27
+.src_ref 8 "superkernels.cpp" 533 31
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+ 9088 "10111010" // MOVA r13, #0; MOVXM p6, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9089 "00010000" // /* MW 9 */
+ 9090 "01110010" // /* MW 8 */
+ 9091 "00110010" // /* MW 7 */
+ 9092 "11110011" // /* MW 6 */
+ 9093 "00000001" // /* MW 5 */
+ 9094 "00000000" // /* MW 4 */
+ 9095 "00000000" // /* MW 3 */
+ 9096 "00001101" // /* MW 2 */
+ 9097 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 27 first
+.src_ref 8 "superkernels.cpp" 532 63
+.src_ref 8 "superkernels.cpp" 552 2
+ 9098 "10111010" // LDA r18, [p6]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9099 "00010000" // /* MW 9 */
+ 9100 "00100000" // /* MW 8 */
+ 9101 "00110010" // /* MW 7 */
+ 9102 "11110001" // /* MW 6 */
+ 9103 "00000001" // /* MW 5 */
+ 9104 "00000000" // /* MW 4 */
+ 9105 "11010000" // /* MW 3 */
+ 9106 "11001010" // /* MW 2 */
+ 9107 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 63
+.src_ref 8 "superkernels.cpp" 533 46
+ 9108 "10111010" // LDA r16, [p2]; MOVXM p6, #509144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9109 "00010000" // /* MW 9 */
+ 9110 "01101100" // /* MW 8 */
+ 9111 "00110010" // /* MW 7 */
+ 9112 "11110011" // /* MW 6 */
+ 9113 "00000001" // /* MW 5 */
+ 9114 "00000000" // /* MW 4 */
+ 9115 "11010000" // /* MW 3 */
+ 9116 "11000010" // /* MW 2 */
+ 9117 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 46 first
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+ 9118 "00101100" // LDA r17, [p6]; MOVX r15, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9119 "00001010" // /* MW 5 */
+ 9120 "00111100" // /* MW 4 */
+ 9121 "11010000" // /* MW 3 */
+ 9122 "11000110" // /* MW 2 */
+ 9123 "11000000" // /* MW 1 */
+ 9124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9125 "00000000" // /* MW 1 */
+ 9126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9127 "00000000" // /* MW 1 */
+ 9128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9129 "00000000" // /* MW 1 */
+ 9130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9131 "00000000" // /* MW 1 */
+ 9132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9133 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 39 first
+ 9134 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9135 "00001111" // /* MW 3 */
+ 9136 "10100101" // /* MW 2 */
+ 9137 "00010100" // /* MW 1 */
+ 9138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9139 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31 first
+.src_ref 8 "superkernels.cpp" 533 31 first
+ 9140 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9141 "10000010" // /* MW 5 */
+ 9142 "00110010" // /* MW 4 */
+ 9143 "00111010" // /* MW 3 */
+ 9144 "11100100" // /* MW 2 */
+ 9145 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9146 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9147 "00011100" // /* MW 3 */
+ 9148 "00110111" // /* MW 2 */
+ 9149 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9150 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9151 "00000010" // /* MW 3 */
+ 9152 "11100111" // /* MW 2 */
+ 9153 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 44
+ 9154 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9155 "00011100" // /* MW 3 */
+ 9156 "10110111" // /* MW 2 */
+ 9157 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9158 "00011000" // SEL.EQZ r17, r13, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9159 "00110010" // /* MW 3 */
+ 9160 "01100011" // /* MW 2 */
+ 9161 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 67 first
+ 9162 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9163 "00010001" // /* MW 3 */
+ 9164 "00100101" // /* MW 2 */
+ 9165 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 535 37 first
+ 9166 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9167 "00001000" // /* MW 3 */
+ 9168 "01100001" // /* MW 2 */
+ 9169 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 9170 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */
+ 9171 "00000001" // /* MW 5 */
+ 9172 "01000000" // /* MW 4 */
+ 9173 "01000000" // /* MW 3 */
+ 9174 "00010010" // /* MW 2 */
+ 9175 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 33
+.delay_slot
+ 9176 "01000100" // MOVXM p6, #509208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9177 "00110000" // /* MW 5 */
+ 9178 "11001010" // /* MW 4 */
+ 9179 "11001100" // /* MW 3 */
+ 9180 "00000111" // /* MW 2 */
+ 9181 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 33 first
+.delay_slot
+ 9182 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9183 "01010001" // /* MW 3 */
+ 9184 "00000110" // /* MW 2 */
+ 9185 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9187 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9189 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.delay_slot
+ 9190 "10111010" // NOPA; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9191 "00010000" // /* MW 9 */
+ 9192 "01101000" // /* MW 8 */
+ 9193 "10110010" // /* MW 7 */
+ 9194 "11110011" // /* MW 6 */
+ 9195 "00000001" // /* MW 5 */
+ 9196 "00000000" // /* MW 4 */
+ 9197 "11110000" // /* MW 3 */
+ 9198 "00101100" // /* MW 2 */
+ 9199 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9200 "00111010" // MOVS p6, r12; J #9408 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */
+ 9201 "00100001" // /* MW 9 */
+ 9202 "00000000" // /* MW 8 */
+ 9203 "00000000" // /* MW 7 */
+ 9204 "10011000" // /* MW 6 */
+ 9205 "00000100" // /* MW 5 */
+ 9206 "00000000" // /* MW 4 */
+ 9207 "01100000" // /* MW 3 */
+ 9208 "10000001" // /* MW 2 */
+ 9209 "11010001" // /* MW 1 */
+.delay_slot
+ 9210 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9211 "10010001" // /* MW 3 */
+ 9212 "11100101" // /* MW 2 */
+ 9213 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9215 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9217 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9219 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9220 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9221 "10000001" // /* MW 11 */
+ 9222 "10101101" // /* MW 10 */
+ 9223 "00000000" // /* MW 9 */
+ 9224 "00000000" // /* MW 8 */
+ 9225 "00000000" // /* MW 7 */
+ 9226 "00000000" // /* MW 6 */
+ 9227 "00100000" // /* MW 5 */
+ 9228 "00000000" // /* MW 4 */
+ 9229 "11110000" // /* MW 3 */
+ 9230 "00101100" // /* MW 2 */
+ 9231 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696
+.src_ref 8 "superkernels.cpp" 541 26
+ 9232 "01000100" // MOVXM p6, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9233 "10001000" // /* MW 5 */
+ 9234 "11001001" // /* MW 4 */
+ 9235 "11001100" // /* MW 3 */
+ 9236 "00000111" // /* MW 2 */
+ 9237 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 26 first
+.src_ref 8 "superkernels.cpp" 541 61
+ 9238 "10111010" // LDA r19, [p6]; MOVXM p6, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9239 "00010000" // /* MW 9 */
+ 9240 "00100010" // /* MW 8 */
+ 9241 "00110010" // /* MW 7 */
+ 9242 "11110011" // /* MW 6 */
+ 9243 "00000001" // /* MW 5 */
+ 9244 "00000000" // /* MW 4 */
+ 9245 "11010000" // /* MW 3 */
+ 9246 "11001110" // /* MW 2 */
+ 9247 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 61
+.src_ref 8 "superkernels.cpp" 542 44
+ 9248 "10111010" // LDA r16, [p6]; MOVXM p6, #509148 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9249 "00010000" // /* MW 9 */
+ 9250 "01101110" // /* MW 8 */
+ 9251 "00110010" // /* MW 7 */
+ 9252 "11110011" // /* MW 6 */
+ 9253 "00000001" // /* MW 5 */
+ 9254 "00000000" // /* MW 4 */
+ 9255 "11010000" // /* MW 3 */
+ 9256 "11000010" // /* MW 2 */
+ 9257 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 44 first
+ 9258 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9259 "01010110" // /* MW 3 */
+ 9260 "00000110" // /* MW 2 */
+ 9261 "00000110" // /* MW 1 */
+ 9262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9263 "00000000" // /* MW 1 */
+ 9264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9265 "00000000" // /* MW 1 */
+ 9266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9267 "00000000" // /* MW 1 */
+ 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9269 "00000000" // /* MW 1 */
+ 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9271 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 37 first
+ 9272 "10011000" // MUL r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9273 "00001111" // /* MW 3 */
+ 9274 "11100111" // /* MW 2 */
+ 9275 "00010100" // /* MW 1 */
+ 9276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9277 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30 first
+.src_ref 8 "superkernels.cpp" 542 30 first
+ 9278 "10100100" // SUB r20, r18, r19; ADD.NC r21, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9279 "10000010" // /* MW 5 */
+ 9280 "10110011" // /* MW 4 */
+ 9281 "00111010" // /* MW 3 */
+ 9282 "00100110" // /* MW 2 */
+ 9283 "10010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9284 "10011000" // LTU r27, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9285 "00101100" // /* MW 3 */
+ 9286 "01110111" // /* MW 2 */
+ 9287 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9288 "00011000" // SEL.EQZ r20, r20, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9289 "00000010" // /* MW 3 */
+ 9290 "00101001" // /* MW 2 */
+ 9291 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+.src_ref 8 "superkernels.cpp" 542 42
+ 9292 "01100100" // LTU r27, r19, r18; MOV r17, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9293 "00000001" // /* MW 5 */
+ 9294 "10100000" // /* MW 4 */
+ 9295 "10011000" // /* MW 3 */
+ 9296 "11100101" // /* MW 2 */
+ 9297 "10011110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9298 "00011000" // SEL.EQZ r17, r17, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9299 "01000010" // /* MW 3 */
+ 9300 "01100011" // /* MW 2 */
+ 9301 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 69 first
+ 9302 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9303 "00010001" // /* MW 3 */
+ 9304 "00100101" // /* MW 2 */
+ 9305 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 544 38 first
+ 9306 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9307 "00000111" // /* MW 3 */
+ 9308 "01100001" // /* MW 2 */
+ 9309 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 9310 "10000100" // JNZ r16, #10176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10176 delay_slots=5 */
+ 9311 "00000001" // /* MW 5 */
+ 9312 "01000000" // /* MW 4 */
+ 9313 "11100000" // /* MW 3 */
+ 9314 "00010011" // /* MW 2 */
+ 9315 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 34
+.delay_slot
+ 9316 "01000100" // MOVXM p6, #509216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9317 "01000000" // /* MW 5 */
+ 9318 "11001010" // /* MW 4 */
+ 9319 "11001100" // /* MW 3 */
+ 9320 "00000111" // /* MW 2 */
+ 9321 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 34 first
+.delay_slot
+ 9322 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9323 "01010001" // /* MW 3 */
+ 9324 "00000110" // /* MW 2 */
+ 9325 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9327 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9329 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 9330 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 9331 "00011100" // /* MW 13 */
+ 9332 "00000000" // /* MW 12 */
+ 9333 "00000000" // /* MW 11 */
+ 9334 "01010111" // /* MW 10 */
+ 9335 "00011010" // /* MW 9 */
+ 9336 "01000000" // /* MW 8 */
+ 9337 "00000000" // /* MW 7 */
+ 9338 "00000000" // /* MW 6 */
+ 9339 "10110110" // /* MW 5 */
+ 9340 "00000010" // /* MW 4 */
+ 9341 "11110000" // /* MW 3 */
+ 9342 "00101100" // /* MW 2 */
+ 9343 "00000000" // /* MW 1 */
+.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 8 "superkernels.cpp" 558 19
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 9344 "01110110" // LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9345 "01011000" // /* MW 11 */
+ 9346 "00000001" // /* MW 10 */
+ 9347 "11101000" // /* MW 9 */
+ 9348 "01001001" // /* MW 8 */
+ 9349 "11100000" // /* MW 7 */
+ 9350 "00000000" // /* MW 6 */
+ 9351 "00001011" // /* MW 5 */
+ 9352 "10001100" // /* MW 4 */
+ 9353 "00100110" // /* MW 3 */
+ 9354 "10000011" // /* MW 2 */
+ 9355 "11110111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9356 "00011000" // LDA p1, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9357 "10011001" // /* MW 3 */
+ 9358 "10111100" // /* MW 2 */
+ 9359 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9360 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9361 "10010001" // /* MW 3 */
+ 9362 "11100101" // /* MW 2 */
+ 9363 "00000111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 287 11 first
+.aggressive_scheduled_block_id 7
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9364 "00000100" // JL #4176 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4176 delay_slots=5 */
+ 9365 "00000001" // /* MW 5 */
+ 9366 "00000000" // /* MW 4 */
+ 9367 "00101000" // /* MW 3 */
+ 9368 "00001000" // /* MW 2 */
+ 9369 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9370 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9371 "11000000" // /* MW 3 */
+ 9372 "01100000" // /* MW 2 */
+ 9373 "00011111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9374 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9375 "00000001" // /* MW 3 */
+ 9376 "00011010" // /* MW 2 */
+ 9377 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9379 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9381 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 287 11
+.delay_slot
+ 9382 "10111010" // NOPA; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9383 "00010000" // /* MW 9 */
+ 9384 "10000000" // /* MW 8 */
+ 9385 "00110010" // /* MW 7 */
+ 9386 "11110001" // /* MW 6 */
+ 9387 "00000001" // /* MW 5 */
+ 9388 "00000000" // /* MW 4 */
+ 9389 "11110000" // /* MW 3 */
+ 9390 "00101100" // /* MW 2 */
+ 9391 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 552 2
+.return_address
+ 9392 "00111010" // MOVS p0, p7; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9393 "00010001" // /* MW 9 */
+ 9394 "00100000" // /* MW 8 */
+ 9395 "00110010" // /* MW 7 */
+ 9396 "11110001" // /* MW 6 */
+ 9397 "00000001" // /* MW 5 */
+ 9398 "00000000" // /* MW 4 */
+ 9399 "01100000" // /* MW 3 */
+ 9400 "10010001" // /* MW 2 */
+ 9401 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+ 9402 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9403 "10100000" // /* MW 5 */
+ 9404 "11001001" // /* MW 4 */
+ 9405 "11001110" // /* MW 3 */
+ 9406 "00000111" // /* MW 2 */
+ 9407 "00000000" // /* MW 1 */
+.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9408 "10011000" // LDA p1, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9409 "10011110" // /* MW 3 */
+ 9410 "01011100" // /* MW 2 */
+ 9411 "00000110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2 first
+.no_stack_arguments
+ 9412 "00000100" // JL #4848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4848 delay_slots=5 */
+ 9413 "00000001" // /* MW 5 */
+ 9414 "00000000" // /* MW 4 */
+ 9415 "01111000" // /* MW 3 */
+ 9416 "00001001" // /* MW 2 */
+ 9417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9421 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9423 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 9427 "00011100" // /* MW 13 */
+ 9428 "00000000" // /* MW 12 */
+ 9429 "00000000" // /* MW 11 */
+ 9430 "01010111" // /* MW 10 */
+ 9431 "00011010" // /* MW 9 */
+ 9432 "01000000" // /* MW 8 */
+ 9433 "00000000" // /* MW 7 */
+ 9434 "00000000" // /* MW 6 */
+ 9435 "10110110" // /* MW 5 */
+ 9436 "00000010" // /* MW 4 */
+ 9437 "11110000" // /* MW 3 */
+ 9438 "00101100" // /* MW 2 */
+ 9439 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7 first
+.return_address
+ 9440 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9441 "00010110" // /* MW 3 */
+ 9442 "00000110" // /* MW 2 */
+ 9443 "00000111" // /* MW 1 */
+ 9444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9445 "00000000" // /* MW 1 */
+ 9446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9447 "00000000" // /* MW 1 */
+ 9448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9449 "00000000" // /* MW 1 */
+ 9450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9451 "00000000" // /* MW 1 */
+ 9452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9453 "00000000" // /* MW 1 */
+ 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9455 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 19
+ 9456 "10011000" // NE r17, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9457 "00001000" // /* MW 3 */
+ 9458 "11100011" // /* MW 2 */
+ 9459 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 25
+ 9460 "10000100" // JNZ r17, #9664 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9664 delay_slots=5 */
+ 9461 "00000001" // /* MW 5 */
+ 9462 "01000000" // /* MW 4 */
+ 9463 "11100000" // /* MW 3 */
+ 9464 "00010010" // /* MW 2 */
+ 9465 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9475 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 555 15
+ 9476 "01000100" // MOVXM p7, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9477 "10001000" // /* MW 5 */
+ 9478 "11001001" // /* MW 4 */
+ 9479 "11001110" // /* MW 3 */
+ 9480 "00000111" // /* MW 2 */
+ 9481 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 554 67
+ 9482 "10111010" // LDA r16, [p7]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9483 "00010000" // /* MW 9 */
+ 9484 "00110000" // /* MW 8 */
+ 9485 "00110010" // /* MW 7 */
+ 9486 "11110001" // /* MW 6 */
+ 9487 "00000001" // /* MW 5 */
+ 9488 "00000000" // /* MW 4 */
+ 9489 "11010000" // /* MW 3 */
+ 9490 "11000010" // /* MW 2 */
+ 9491 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 67
+ 9492 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9493 "00111010" // /* MW 3 */
+ 9494 "00000100" // /* MW 2 */
+ 9495 "00000010" // /* MW 1 */
+ 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9497 "00000000" // /* MW 1 */
+ 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9499 "00000000" // /* MW 1 */
+ 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9501 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.no_stack_arguments
+ 9502 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9503 "00000001" // /* MW 5 */
+ 9504 "00000000" // /* MW 4 */
+ 9505 "11111000" // /* MW 3 */
+ 9506 "00010011" // /* MW 2 */
+ 9507 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9509 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.delay_slot
+ 9510 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9511 "00000111" // /* MW 3 */
+ 9512 "00100000" // /* MW 2 */
+ 9513 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9514 "01011100" // ST r16, [p7]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9515 "10110101" // /* MW 5 */
+ 9516 "01101101" // /* MW 4 */
+ 9517 "00111000" // /* MW 3 */
+ 9518 "11000010" // /* MW 2 */
+ 9519 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9520 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9521 "01000001" // /* MW 5 */
+ 9522 "10111011" // /* MW 4 */
+ 9523 "00110111" // /* MW 3 */
+ 9524 "01100000" // /* MW 2 */
+ 9525 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9526 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9527 "00010010" // /* MW 9 */
+ 9528 "00000001" // /* MW 8 */
+ 9529 "00000100" // /* MW 7 */
+ 9530 "00000000" // /* MW 6 */
+ 9531 "01011011" // /* MW 5 */
+ 9532 "00000001" // /* MW 4 */
+ 9533 "11110000" // /* MW 3 */
+ 9534 "00101100" // /* MW 2 */
+ 9535 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 9536 "10111010" // LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9537 "01111000" // /* MW 9 */
+ 9538 "11010000" // /* MW 8 */
+ 9539 "01101011" // /* MW 7 */
+ 9540 "10001111" // /* MW 6 */
+ 9541 "00000001" // /* MW 5 */
+ 9542 "00011011" // /* MW 4 */
+ 9543 "00100000" // /* MW 3 */
+ 9544 "10100011" // /* MW 2 */
+ 9545 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+ 9546 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9547 "00000010" // /* MW 3 */
+ 9548 "11100001" // /* MW 2 */
+ 9549 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 6
+.src_ref 8 "superkernels.cpp" 554 78
+ 9550 "10000100" // JNZ r16, #9632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9632 delay_slots=5 */
+ 9551 "00000001" // /* MW 5 */
+ 9552 "01000000" // /* MW 4 */
+ 9553 "11010000" // /* MW 3 */
+ 9554 "00010010" // /* MW 2 */
+ 9555 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9556 "00011000" // MOVX r15, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9557 "00000101" // /* MW 3 */
+ 9558 "00011110" // /* MW 2 */
+ 9559 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9561 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9563 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9565 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9567 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 555 15 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9568 "00001100" // LDA r16, [p2, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9569 "01100011" // /* MW 5 */
+ 9570 "00001011" // /* MW 4 */
+ 9571 "11011110" // /* MW 3 */
+ 9572 "11000010" // /* MW 2 */
+ 9573 "01001010" // /* MW 1 */
+ 9574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9575 "00000000" // /* MW 1 */
+ 9576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9577 "00000000" // /* MW 1 */
+ 9578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9579 "00000000" // /* MW 1 */
+ 9580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9581 "00000000" // /* MW 1 */
+ 9582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9583 "00000000" // /* MW 1 */
+ 9584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9585 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9586 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9587 "11111000" // /* MW 3 */
+ 9588 "00010000" // /* MW 2 */
+ 9589 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 7
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9590 "10111010" // LDA r16, [p6, #-8]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9591 "00010000" // /* MW 9 */
+ 9592 "01101000" // /* MW 8 */
+ 9593 "10110010" // /* MW 7 */
+ 9594 "11110011" // /* MW 6 */
+ 9595 "00000001" // /* MW 5 */
+ 9596 "00000000" // /* MW 4 */
+ 9597 "11010000" // /* MW 3 */
+ 9598 "11000010" // /* MW 2 */
+ 9599 "11011100" // /* MW 1 */
+ 9600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9601 "00000000" // /* MW 1 */
+ 9602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9603 "00000000" // /* MW 1 */
+ 9604 "10000100" // J #9648 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9648 delay_slots=5 */
+ 9605 "00000000" // /* MW 5 */
+ 9606 "00000000" // /* MW 4 */
+ 9607 "11011000" // /* MW 3 */
+ 9608 "00010010" // /* MW 2 */
+ 9609 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.delay_slot
+ 9616 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9617 "00000001" // /* MW 3 */
+ 9618 "11100001" // /* MW 2 */
+ 9619 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.delay_slot
+ 9620 "00110110" // NOPA; NOPB; ST r16, [p6, #-8]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9621 "11000001" // /* MW 11 */
+ 9622 "00001000" // /* MW 10 */
+ 9623 "01110011" // /* MW 9 */
+ 9624 "00000011" // /* MW 8 */
+ 9625 "00000000" // /* MW 7 */
+ 9626 "00000000" // /* MW 6 */
+ 9627 "00100000" // /* MW 5 */
+ 9628 "00000000" // /* MW 4 */
+ 9629 "11110000" // /* MW 3 */
+ 9630 "00101100" // /* MW 2 */
+ 9631 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096
+.src_ref 8 "superkernels.cpp" 558 7
+ 9632 "11100001" // NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9633 "00000000" // /* MW 15 */
+ 9634 "00000000" // /* MW 14 */
+ 9635 "00010000" // /* MW 13 */
+ 9636 "01101000" // /* MW 12 */
+ 9637 "10110010" // /* MW 11 */
+ 9638 "11110011" // /* MW 10 */
+ 9639 "00000001" // /* MW 9 */
+ 9640 "00000000" // /* MW 8 */
+ 9641 "01011011" // /* MW 7 */
+ 9642 "00000001" // /* MW 6 */
+ 9643 "00100000" // /* MW 5 */
+ 9644 "00000000" // /* MW 4 */
+ 9645 "11110000" // /* MW 3 */
+ 9646 "00101100" // /* MW 2 */
+ 9647 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112
+.src_ref 8 "superkernels.cpp" 558 7 first
+ 9648 "11100001" // LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9649 "00000000" // /* MW 15 */
+ 9650 "00000000" // /* MW 14 */
+ 9651 "01111000" // /* MW 13 */
+ 9652 "10100101" // /* MW 12 */
+ 9653 "00000001" // /* MW 11 */
+ 9654 "00000000" // /* MW 10 */
+ 9655 "00000000" // /* MW 9 */
+ 9656 "00000000" // /* MW 8 */
+ 9657 "01011011" // /* MW 7 */
+ 9658 "00000001" // /* MW 6 */
+ 9659 "00100000" // /* MW 5 */
+ 9660 "00000000" // /* MW 4 */
+ 9661 "11010000" // /* MW 3 */
+ 9662 "11000010" // /* MW 2 */
+ 9663 "11100000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128
+.src_ref 8 "superkernels.cpp" 558 43
+ 9664 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9665 "00000001" // /* MW 3 */
+ 9666 "00100010" // /* MW 2 */
+ 9667 "00010000" // /* MW 1 */
+ 9668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9669 "00000000" // /* MW 1 */
+ 9670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9671 "00000000" // /* MW 1 */
+ 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9673 "00000000" // /* MW 1 */
+ 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9675 "00000000" // /* MW 1 */
+ 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9677 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 19
+ 9678 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9679 "00001000" // /* MW 3 */
+ 9680 "10100001" // /* MW 2 */
+ 9681 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 25
+ 9682 "10000100" // JNZ r16, #9872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9872 delay_slots=5 */
+ 9683 "00000001" // /* MW 5 */
+ 9684 "01000000" // /* MW 4 */
+ 9685 "01001000" // /* MW 3 */
+ 9686 "00010011" // /* MW 2 */
+ 9687 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.src_ref 8 "superkernels.cpp" 559 15
+.delay_slot
+ 9688 "01000100" // MOVXM p7, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9689 "11000000" // /* MW 5 */
+ 9690 "11001001" // /* MW 4 */
+ 9691 "11001110" // /* MW 3 */
+ 9692 "00000111" // /* MW 2 */
+ 9693 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 67
+.delay_slot
+ 9694 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9695 "11000000" // /* MW 5 */
+ 9696 "11001000" // /* MW 4 */
+ 9697 "11000100" // /* MW 3 */
+ 9698 "00000111" // /* MW 2 */
+ 9699 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9701 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9703 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9705 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+ 9706 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9707 "00010110" // /* MW 3 */
+ 9708 "00000110" // /* MW 2 */
+ 9709 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 67
+ 9710 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9711 "00111010" // /* MW 3 */
+ 9712 "00000100" // /* MW 2 */
+ 9713 "00000010" // /* MW 1 */
+ 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9715 "00000000" // /* MW 1 */
+ 9716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9717 "00000000" // /* MW 1 */
+ 9718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9719 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.no_stack_arguments
+ 9720 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9721 "00000001" // /* MW 5 */
+ 9722 "00000000" // /* MW 4 */
+ 9723 "11111000" // /* MW 3 */
+ 9724 "00010011" // /* MW 2 */
+ 9725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9727 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.delay_slot
+ 9728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9729 "00000111" // /* MW 3 */
+ 9730 "00100000" // /* MW 2 */
+ 9731 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9732 "01011100" // ST r16, [p7]; LT r27, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9733 "00110101" // /* MW 5 */
+ 9734 "01101110" // /* MW 4 */
+ 9735 "00111000" // /* MW 3 */
+ 9736 "11000010" // /* MW 2 */
+ 9737 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9738 "11100100" // SUB r17, r17, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9739 "01000001" // /* MW 5 */
+ 9740 "00111011" // /* MW 4 */
+ 9741 "00110111" // /* MW 3 */
+ 9742 "01100000" // /* MW 2 */
+ 9743 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9744 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9745 "00000000" // /* MW 15 */
+ 9746 "00000000" // /* MW 14 */
+ 9747 "01111000" // /* MW 13 */
+ 9748 "10100101" // /* MW 12 */
+ 9749 "00000001" // /* MW 11 */
+ 9750 "10010000" // /* MW 10 */
+ 9751 "00001000" // /* MW 9 */
+ 9752 "00100000" // /* MW 8 */
+ 9753 "01011011" // /* MW 7 */
+ 9754 "00000001" // /* MW 6 */
+ 9755 "00100000" // /* MW 5 */
+ 9756 "00000000" // /* MW 4 */
+ 9757 "11110000" // /* MW 3 */
+ 9758 "00101100" // /* MW 2 */
+ 9759 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 9760 "10111010" // LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9761 "01111000" // /* MW 9 */
+ 9762 "10010000" // /* MW 8 */
+ 9763 "01101011" // /* MW 7 */
+ 9764 "10001111" // /* MW 6 */
+ 9765 "00000001" // /* MW 5 */
+ 9766 "00011011" // /* MW 4 */
+ 9767 "00100000" // /* MW 3 */
+ 9768 "10010011" // /* MW 2 */
+ 9769 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+ 9770 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9771 "00000010" // /* MW 3 */
+ 9772 "11100001" // /* MW 2 */
+ 9773 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 6
+.src_ref 8 "superkernels.cpp" 558 78
+ 9774 "10000100" // JNZ r16, #9840 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9840 delay_slots=5 */
+ 9775 "00000001" // /* MW 5 */
+ 9776 "01000000" // /* MW 4 */
+ 9777 "00111000" // /* MW 3 */
+ 9778 "00010011" // /* MW 2 */
+ 9779 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 7
+.delay_slot
+ 9780 "01000100" // MOVXM p2, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9781 "10100000" // /* MW 5 */
+ 9782 "11001001" // /* MW 4 */
+ 9783 "11000100" // /* MW 3 */
+ 9784 "00000111" // /* MW 2 */
+ 9785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9789 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9791 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9793 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 559 15 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9794 "00001100" // LDA r16, [p1, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9795 "01100011" // /* MW 5 */
+ 9796 "00001011" // /* MW 4 */
+ 9797 "11011110" // /* MW 3 */
+ 9798 "11000010" // /* MW 2 */
+ 9799 "00101010" // /* MW 1 */
+ 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9801 "00000000" // /* MW 1 */
+ 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9803 "00000000" // /* MW 1 */
+ 9804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9805 "00000000" // /* MW 1 */
+ 9806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9807 "00000000" // /* MW 1 */
+ 9808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9809 "00000000" // /* MW 1 */
+ 9810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9812 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9813 "11111000" // /* MW 3 */
+ 9814 "00010000" // /* MW 2 */
+ 9815 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9816 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9817 "00010110" // /* MW 3 */
+ 9818 "11100110" // /* MW 2 */
+ 9819 "00000110" // /* MW 1 */
+ 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9821 "00000000" // /* MW 1 */
+ 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9823 "00000000" // /* MW 1 */
+ 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9825 "00000000" // /* MW 1 */
+ 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9827 "00000000" // /* MW 1 */
+ 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9829 "00000000" // /* MW 1 */
+ 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9831 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 9832 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9833 "00000001" // /* MW 3 */
+ 9834 "11100001" // /* MW 2 */
+ 9835 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 9836 "10011000" // ST r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9837 "00010001" // /* MW 3 */
+ 9838 "11100110" // /* MW 2 */
+ 9839 "00001110" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304
+ 9840 "10000100" // J #9888 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9888 delay_slots=5 */
+ 9841 "00000000" // /* MW 5 */
+ 9842 "00000000" // /* MW 4 */
+ 9843 "01010000" // /* MW 3 */
+ 9844 "00010011" // /* MW 2 */
+ 9845 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.delay_slot
+ 9846 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9847 "11000000" // /* MW 3 */
+ 9848 "01100010" // /* MW 2 */
+ 9849 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9851 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9853 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9855 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9856 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9857 "00000000" // /* MW 15 */
+ 9858 "00000000" // /* MW 14 */
+ 9859 "01111000" // /* MW 13 */
+ 9860 "10100101" // /* MW 12 */
+ 9861 "00000001" // /* MW 11 */
+ 9862 "00000000" // /* MW 10 */
+ 9863 "00000000" // /* MW 9 */
+ 9864 "00000000" // /* MW 8 */
+ 9865 "01011011" // /* MW 7 */
+ 9866 "00000001" // /* MW 6 */
+ 9867 "00100000" // /* MW 5 */
+ 9868 "00000000" // /* MW 4 */
+ 9869 "11110000" // /* MW 3 */
+ 9870 "00101100" // /* MW 2 */
+ 9871 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336
+.src_ref 8 "superkernels.cpp" 562 7
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9872 "11100001" // LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9873 "00000000" // /* MW 15 */
+ 9874 "00000000" // /* MW 14 */
+ 9875 "00010000" // /* MW 13 */
+ 9876 "01101000" // /* MW 12 */
+ 9877 "00110010" // /* MW 11 */
+ 9878 "11110001" // /* MW 10 */
+ 9879 "00000001" // /* MW 9 */
+ 9880 "00000000" // /* MW 8 */
+ 9881 "01011011" // /* MW 7 */
+ 9882 "00000001" // /* MW 6 */
+ 9883 "00100000" // /* MW 5 */
+ 9884 "00000000" // /* MW 4 */
+ 9885 "00100000" // /* MW 3 */
+ 9886 "11110011" // /* MW 2 */
+ 9887 "11111011" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352
+.src_ref 8 "superkernels.cpp" 562 7 first
+.src_ref 8 "superkernels.cpp" 562 19
+ 9888 "00101100" // LDA r16, [p2]; MOVX r17, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9889 "00100010" // /* MW 5 */
+ 9890 "01000100" // /* MW 4 */
+ 9891 "11010000" // /* MW 3 */
+ 9892 "11000010" // /* MW 2 */
+ 9893 "01000000" // /* MW 1 */
+ 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9895 "00000000" // /* MW 1 */
+ 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9897 "00000000" // /* MW 1 */
+ 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9899 "00000000" // /* MW 1 */
+ 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9901 "00000000" // /* MW 1 */
+ 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9903 "00000000" // /* MW 1 */
+ 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9905 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 19
+ 9906 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9907 "00001000" // /* MW 3 */
+ 9908 "01100001" // /* MW 2 */
+ 9909 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 25
+ 9910 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */
+ 9911 "00000001" // /* MW 5 */
+ 9912 "01000000" // /* MW 4 */
+ 9913 "10101000" // /* MW 3 */
+ 9914 "00010011" // /* MW 2 */
+ 9915 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.delay_slot
+ 9916 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9917 "11001000" // /* MW 5 */
+ 9918 "11001001" // /* MW 4 */
+ 9919 "11000100" // /* MW 3 */
+ 9920 "00000111" // /* MW 2 */
+ 9921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9929 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.src_ref 8 "superkernels.cpp" 562 68
+ 9930 "10111010" // LDA r16, [p2]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9931 "00010000" // /* MW 9 */
+ 9932 "00110000" // /* MW 8 */
+ 9933 "10110010" // /* MW 7 */
+ 9934 "11110000" // /* MW 6 */
+ 9935 "00000001" // /* MW 5 */
+ 9936 "00000000" // /* MW 4 */
+ 9937 "11010000" // /* MW 3 */
+ 9938 "11000010" // /* MW 2 */
+ 9939 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 68
+ 9940 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9941 "00111010" // /* MW 3 */
+ 9942 "00000100" // /* MW 2 */
+ 9943 "00000001" // /* MW 1 */
+ 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9945 "00000000" // /* MW 1 */
+ 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9947 "00000000" // /* MW 1 */
+ 9948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9949 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.no_stack_arguments
+ 9950 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9951 "00000001" // /* MW 5 */
+ 9952 "00000000" // /* MW 4 */
+ 9953 "11111000" // /* MW 3 */
+ 9954 "00010011" // /* MW 2 */
+ 9955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9957 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.delay_slot
+ 9958 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9959 "00000111" // /* MW 3 */
+ 9960 "00100000" // /* MW 2 */
+ 9961 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9962 "01011100" // ST r16, [p2]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9963 "10110101" // /* MW 5 */
+ 9964 "01101101" // /* MW 4 */
+ 9965 "00111000" // /* MW 3 */
+ 9966 "11000010" // /* MW 2 */
+ 9967 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9968 "11100100" // SUB r17, r13, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9969 "01000001" // /* MW 5 */
+ 9970 "00111011" // /* MW 4 */
+ 9971 "00110111" // /* MW 3 */
+ 9972 "01100000" // /* MW 2 */
+ 9973 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9974 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9975 "00010010" // /* MW 9 */
+ 9976 "00000001" // /* MW 8 */
+ 9977 "00000100" // /* MW 7 */
+ 9978 "00000000" // /* MW 6 */
+ 9979 "01011011" // /* MW 5 */
+ 9980 "00000001" // /* MW 4 */
+ 9981 "11110000" // /* MW 3 */
+ 9982 "00101100" // /* MW 2 */
+ 9983 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 562 44
+.return_address
+ 9984 "11100100" // SUB r16, r13, r3; MOV r27, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9985 "01000001" // /* MW 5 */
+ 9986 "10101110" // /* MW 4 */
+ 9987 "00111101" // /* MW 3 */
+ 9988 "00000110" // /* MW 2 */
+ 9989 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+ 9990 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9991 "00000010" // /* MW 3 */
+ 9992 "11100001" // /* MW 2 */
+ 9993 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 6
+.src_ref 8 "superkernels.cpp" 562 79
+ 9994 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */
+ 9995 "00000001" // /* MW 5 */
+ 9996 "01000000" // /* MW 4 */
+ 9997 "10101000" // /* MW 3 */
+ 9998 "00010011" // /* MW 2 */
+ 9999 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 563 16
+.delay_slot
+ 10000 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10001 "11001000" // /* MW 5 */
+ 10002 "11001001" // /* MW 4 */
+ 10003 "11000100" // /* MW 3 */
+ 10004 "00000111" // /* MW 2 */
+ 10005 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10007 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10013 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 563 16 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 10014 "00001100" // LDA r16, [p7, #20]; ST r13, [p2] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10015 "01100011" // /* MW 5 */
+ 10016 "00001011" // /* MW 4 */
+ 10017 "11010100" // /* MW 3 */
+ 10018 "11000010" // /* MW 2 */
+ 10019 "11101010" // /* MW 1 */
+ 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10021 "00000000" // /* MW 1 */
+ 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10023 "00000000" // /* MW 1 */
+ 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10025 "00000000" // /* MW 1 */
+ 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10027 "00000000" // /* MW 1 */
+ 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10029 "00000000" // /* MW 1 */
+ 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10031 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 10032 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10033 "11111000" // /* MW 3 */
+ 10034 "00010000" // /* MW 2 */
+ 10035 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 10036 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10037 "00010110" // /* MW 3 */
+ 10038 "11100110" // /* MW 2 */
+ 10039 "00000110" // /* MW 1 */
+ 10040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10041 "00000000" // /* MW 1 */
+ 10042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10043 "00000000" // /* MW 1 */
+ 10044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10045 "00000000" // /* MW 1 */
+ 10046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10047 "00000000" // /* MW 1 */
+ 10048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10049 "00000000" // /* MW 1 */
+ 10050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10051 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 10052 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10053 "00000001" // /* MW 3 */
+ 10054 "11100001" // /* MW 2 */
+ 10055 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 10056 "00000010" // ST r16, [p6, #-8]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10057 "01110000" // /* MW 7 */
+ 10058 "10100101" // /* MW 6 */
+ 10059 "00000001" // /* MW 5 */
+ 10060 "00000000" // /* MW 4 */
+ 10061 "00110000" // /* MW 3 */
+ 10062 "11000010" // /* MW 2 */
+ 10063 "11011100" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528
+.src_ref 8 "superkernels.cpp" 566 6
+.src_ref 8 "superkernels.cpp" 567 14
+ 10064 "01000100" // MOVXM p6, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10065 "10000000" // /* MW 5 */
+ 10066 "11001001" // /* MW 4 */
+ 10067 "11001100" // /* MW 3 */
+ 10068 "00000111" // /* MW 2 */
+ 10069 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 6 first
+.src_ref 8 "superkernels.cpp" 566 19
+ 10070 "10111010" // LDA r16, [p6]; MOVXM p2, #509160 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10071 "00010000" // /* MW 9 */
+ 10072 "01110100" // /* MW 8 */
+ 10073 "00110010" // /* MW 7 */
+ 10074 "11110001" // /* MW 6 */
+ 10075 "00000001" // /* MW 5 */
+ 10076 "00000000" // /* MW 4 */
+ 10077 "11010000" // /* MW 3 */
+ 10078 "11000010" // /* MW 2 */
+ 10079 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 19
+ 10080 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10081 "00110110" // /* MW 3 */
+ 10082 "00000110" // /* MW 2 */
+ 10083 "00000010" // /* MW 1 */
+ 10084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10085 "00000000" // /* MW 1 */
+ 10086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10087 "00000000" // /* MW 1 */
+ 10088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10089 "00000000" // /* MW 1 */
+ 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10091 "00000000" // /* MW 1 */
+ 10092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10093 "00000000" // /* MW 1 */
+ 10094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10095 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 16
+ 10096 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10097 "00001000" // /* MW 3 */
+ 10098 "01100001" // /* MW 2 */
+ 10099 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 6
+ 10100 "10000100" // JNZ r16, #10128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10128 delay_slots=5 */
+ 10101 "00000001" // /* MW 5 */
+ 10102 "01000000" // /* MW 4 */
+ 10103 "11001000" // /* MW 3 */
+ 10104 "00010011" // /* MW 2 */
+ 10105 "10000000" // /* MW 1 */
+.delay_slot
+ 10106 "00011000" // LDA p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10107 "10011001" // /* MW 3 */
+ 10108 "11101111" // /* MW 2 */
+ 10109 "00000111" // /* MW 1 */
+.delay_slot
+ 10110 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10111 "11110001" // /* MW 3 */
+ 10112 "11110001" // /* MW 2 */
+ 10113 "00000111" // /* MW 1 */
+.delay_slot
+ 10114 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10115 "11010001" // /* MW 3 */
+ 10116 "11110101" // /* MW 2 */
+ 10117 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10119 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10121 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 567 14 first
+ 10122 "00001100" // NOPA; ST r13, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10123 "01100011" // /* MW 5 */
+ 10124 "00001011" // /* MW 4 */
+ 10125 "11111100" // /* MW 3 */
+ 10126 "00101100" // /* MW 2 */
+ 10127 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592
+.src_ref 8 "superkernels.cpp" 569
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 10128 "11010100" // LDA r11, [sp, #-8]; MOV lr, r11 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10129 "01000001" // /* MW 5 */
+ 10130 "11101011" // /* MW 4 */
+ 10131 "00101110" // /* MW 3 */
+ 10132 "00101110" // /* MW 2 */
+ 10133 "11111111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 10134 "00011000" // LDA r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10135 "10010001" // /* MW 3 */
+ 10136 "11111101" // /* MW 2 */
+ 10137 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10138 "00011000" // LDA r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10139 "10110001" // /* MW 3 */
+ 10140 "11101001" // /* MW 2 */
+ 10141 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 569 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10142 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10143 "00000000" // /* MW 3 */
+ 10144 "00101000" // /* MW 2 */
+ 10145 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10146 "11111000" // MOV p6, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10147 "00100000" // /* MW 3 */
+ 10148 "01100110" // /* MW 2 */
+ 10149 "00011110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 569
+.delay_slot
+ 10150 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10151 "00000001" // /* MW 5 */
+ 10152 "00000000" // /* MW 4 */
+ 10153 "00000000" // /* MW 3 */
+ 10154 "11110000" // /* MW 2 */
+ 10155 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10157 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10159 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10161 "00000000" // /* MW 15 */
+ 10162 "00000000" // /* MW 14 */
+ 10163 "01111000" // /* MW 13 */
+ 10164 "10100101" // /* MW 12 */
+ 10165 "00000001" // /* MW 11 */
+ 10166 "00000000" // /* MW 10 */
+ 10167 "00000000" // /* MW 9 */
+ 10168 "00000000" // /* MW 8 */
+ 10169 "01011011" // /* MW 7 */
+ 10170 "00000001" // /* MW 6 */
+ 10171 "00100000" // /* MW 5 */
+ 10172 "00000000" // /* MW 4 */
+ 10173 "11110000" // /* MW 3 */
+ 10174 "00101100" // /* MW 2 */
+ 10175 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 10176 "01110110" // MOVA r13, #0; MOVS p6, r12; J #9408 /* MW 12 */ /* control_operation: words=12 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */
+ 10177 "00100000" // /* MW 11 */
+ 10178 "00000000" // /* MW 10 */
+ 10179 "00000000" // /* MW 9 */
+ 10180 "10011000" // /* MW 8 */
+ 10181 "00000100" // /* MW 7 */
+ 10182 "00000000" // /* MW 6 */
+ 10183 "00001011" // /* MW 5 */
+ 10184 "10001100" // /* MW 4 */
+ 10185 "00000110" // /* MW 3 */
+ 10186 "00001101" // /* MW 2 */
+ 10187 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 8 "superkernels.cpp" 558 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 10188 "01100100" // MOVX r15, #1; MOV r14, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10189 "00001001" // /* MW 5 */
+ 10190 "00100000" // /* MW 4 */
+ 10191 "10100111" // /* MW 3 */
+ 10192 "11000000" // /* MW 2 */
+ 10193 "00000011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.delay_slot
+ 10194 "01000100" // MOVXM p2, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10195 "10000000" // /* MW 5 */
+ 10196 "11001000" // /* MW 4 */
+ 10197 "11000100" // /* MW 3 */
+ 10198 "00000111" // /* MW 2 */
+ 10199 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.delay_slot
+ 10200 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10201 "10100000" // /* MW 5 */
+ 10202 "11001001" // /* MW 4 */
+ 10203 "11001110" // /* MW 3 */
+ 10204 "00000111" // /* MW 2 */
+ 10205 "00000000" // /* MW 1 */
+.delay_slot
+ 10206 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10207 "10010001" // /* MW 3 */
+ 10208 "11100101" // /* MW 2 */
+ 10209 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 10211 "00000000" // /* MW 1 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_
+.src_ref 9 "me_div.c" 108 19
+.src_ref 9 "me_div.c" 108 19
+.src_ref 9 "me_div.c" 115 4 first
+.function_start
+ 10224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10225 "01000001" // /* MW 5 */
+ 10226 "10100000" // /* MW 4 */
+ 10227 "00101111" // /* MW 3 */
+ 10228 "11000000" // /* MW 2 */
+ 10229 "00000000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10231 "00011100" // /* MW 3 */
+ 10232 "11000110" // /* MW 2 */
+ 10233 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10235 "00011100" // /* MW 3 */
+ 10236 "11000110" // /* MW 2 */
+ 10237 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10239 "00011100" // /* MW 3 */
+ 10240 "11000110" // /* MW 2 */
+ 10241 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10243 "00011100" // /* MW 3 */
+ 10244 "11000110" // /* MW 2 */
+ 10245 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10247 "00011100" // /* MW 3 */
+ 10248 "11000110" // /* MW 2 */
+ 10249 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10251 "00011100" // /* MW 3 */
+ 10252 "11000110" // /* MW 2 */
+ 10253 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10255 "00011100" // /* MW 3 */
+ 10256 "11000110" // /* MW 2 */
+ 10257 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10259 "00011100" // /* MW 3 */
+ 10260 "11000110" // /* MW 2 */
+ 10261 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10263 "00011100" // /* MW 3 */
+ 10264 "11000110" // /* MW 2 */
+ 10265 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10267 "00011100" // /* MW 3 */
+ 10268 "11000110" // /* MW 2 */
+ 10269 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10271 "00011100" // /* MW 3 */
+ 10272 "11000110" // /* MW 2 */
+ 10273 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10275 "00011100" // /* MW 3 */
+ 10276 "11000110" // /* MW 2 */
+ 10277 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10279 "00011100" // /* MW 3 */
+ 10280 "11000110" // /* MW 2 */
+ 10281 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10283 "00011100" // /* MW 3 */
+ 10284 "11000110" // /* MW 2 */
+ 10285 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10287 "00011100" // /* MW 3 */
+ 10288 "11000110" // /* MW 2 */
+ 10289 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10291 "00011100" // /* MW 3 */
+ 10292 "11000110" // /* MW 2 */
+ 10293 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10295 "00011100" // /* MW 3 */
+ 10296 "11000110" // /* MW 2 */
+ 10297 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10299 "00011100" // /* MW 3 */
+ 10300 "11000110" // /* MW 2 */
+ 10301 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10303 "00011100" // /* MW 3 */
+ 10304 "11000110" // /* MW 2 */
+ 10305 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10307 "00011100" // /* MW 3 */
+ 10308 "11000110" // /* MW 2 */
+ 10309 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10311 "00011100" // /* MW 3 */
+ 10312 "11000110" // /* MW 2 */
+ 10313 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10315 "00011100" // /* MW 3 */
+ 10316 "11000110" // /* MW 2 */
+ 10317 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10319 "00011100" // /* MW 3 */
+ 10320 "11000110" // /* MW 2 */
+ 10321 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10323 "00011100" // /* MW 3 */
+ 10324 "11000110" // /* MW 2 */
+ 10325 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10327 "00011100" // /* MW 3 */
+ 10328 "11000110" // /* MW 2 */
+ 10329 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10331 "00011100" // /* MW 3 */
+ 10332 "11000110" // /* MW 2 */
+ 10333 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10335 "00011100" // /* MW 3 */
+ 10336 "11000110" // /* MW 2 */
+ 10337 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10339 "00011100" // /* MW 3 */
+ 10340 "11000110" // /* MW 2 */
+ 10341 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 119 first
+ 10342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10343 "00000000" // /* MW 3 */
+ 10344 "00101000" // /* MW 2 */
+ 10345 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19 first
+.delay_slot
+ 10346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10347 "00011100" // /* MW 3 */
+ 10348 "11000110" // /* MW 2 */
+ 10349 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10351 "00011100" // /* MW 3 */
+ 10352 "11000110" // /* MW 2 */
+ 10353 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10355 "00011100" // /* MW 3 */
+ 10356 "11000110" // /* MW 2 */
+ 10357 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10359 "00011100" // /* MW 3 */
+ 10360 "11000110" // /* MW 2 */
+ 10361 "00010000" // /* MW 1 */
+.delay_slot
+ 10362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10363 "10100000" // /* MW 3 */
+ 10364 "10011111" // /* MW 2 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+ 10365 "00011000" // /* MW 1 */
+.label _ZL19propagateFloat32NaNjj
+.function propagateFloat32NaN _ZL19propagateFloat32NaNjj
+.src_ref 10 "softfloat-specialize" 78 24
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 143 4 first
+.function_start
+ 10368 "10111010" // MOVA r3, #-22; MOVXM r18, #-16777216 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10369 "00010000" // /* MW 9 */
+ 10370 "00000000" // /* MW 8 */
+ 10371 "01001000" // /* MW 7 */
+ 10372 "00000010" // /* MW 6 */
+ 10373 "11000000" // /* MW 5 */
+ 10374 "00111111" // /* MW 4 */
+ 10375 "00000000" // /* MW 3 */
+ 10376 "01000011" // /* MW 2 */
+ 10377 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 140 6
+.src_ref 10 "softfloat-specialize" 141 6
+ 10378 "10111010" // MOVA r7, #511; MOVXM r0, #4194304 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10379 "00010000" // /* MW 9 */
+ 10380 "00000000" // /* MW 8 */
+ 10381 "00001000" // /* MW 7 */
+ 10382 "00000000" // /* MW 6 */
+ 10383 "00010000" // /* MW 5 */
+ 10384 "00000000" // /* MW 4 */
+ 10385 "00000000" // /* MW 3 */
+ 10386 "11100111" // /* MW 2 */
+ 10387 "00111111" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 38
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 140 6 first
+ 10388 "10111010" // MOVA r16, #1; OR r4, r1, r0; MOV r5, #510 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10389 "01011000" // /* MW 9 */
+ 10390 "11111110" // /* MW 8 */
+ 10391 "10101001" // /* MW 7 */
+ 10392 "00101100" // /* MW 6 */
+ 10393 "01000000" // /* MW 5 */
+ 10394 "00000010" // /* MW 4 */
+ 10395 "00000000" // /* MW 3 */
+ 10396 "00110000" // /* MW 2 */
+ 10397 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 141 6 first
+ 10398 "10011000" // OR r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10399 "00000101" // /* MW 3 */
+ 10400 "10000000" // /* MW 2 */
+ 10401 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10402 "10011000" // LSHL r6, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10403 "00111101" // /* MW 3 */
+ 10404 "01001100" // /* MW 2 */
+ 10405 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+ 10406 "10011000" // LSHL r3, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10407 "00111101" // /* MW 3 */
+ 10408 "10000110" // /* MW 2 */
+ 10409 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22
+ 10410 "10011000" // AND r3, r7, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10411 "00110100" // /* MW 3 */
+ 10412 "11000110" // /* MW 2 */
+ 10413 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10414 "10011000" // AND r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10415 "01100100" // /* MW 3 */
+ 10416 "11001100" // /* MW 2 */
+ 10417 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+ 10418 "10011000" // EQ r6, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10419 "01100111" // /* MW 3 */
+ 10420 "01001100" // /* MW 2 */
+ 10421 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 38 first
+ 10422 "10011000" // LSHL r17, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10423 "00001101" // /* MW 3 */
+ 10424 "10100011" // /* MW 2 */
+ 10425 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 24
+ 10426 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10427 "00011100" // /* MW 3 */
+ 10428 "10110111" // /* MW 2 */
+ 10429 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 62 first
+ 10430 "00011000" // SEL.EQZ r17, r4, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10431 "00000010" // /* MW 3 */
+ 10432 "00100010" // /* MW 2 */
+ 10433 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+ 10434 "01000100" // MOVXM r16, #4194303 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10435 "11111110" // /* MW 5 */
+ 10436 "00111111" // /* MW 4 */
+ 10437 "11111000" // /* MW 3 */
+ 10438 "00111111" // /* MW 2 */
+ 10439 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+ 10440 "10011000" // AND r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10441 "00000100" // /* MW 3 */
+ 10442 "10000101" // /* MW 2 */
+ 10443 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22
+ 10444 "00011000" // NEZ r2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10445 "11110000" // /* MW 3 */
+ 10446 "10000100" // /* MW 2 */
+ 10447 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10448 "10011000" // AND r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10449 "00000100" // /* MW 3 */
+ 10450 "01000011" // /* MW 2 */
+ 10451 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+ 10452 "00011000" // NEZ r1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10453 "11110000" // /* MW 3 */
+ 10454 "01000010" // /* MW 2 */
+ 10455 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 4 first
+ 10456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10457 "00000000" // /* MW 3 */
+ 10458 "00101000" // /* MW 2 */
+ 10459 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+.delay_slot
+ 10460 "10011000" // AND r27, r1, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10461 "01100100" // /* MW 3 */
+ 10462 "01110110" // /* MW 2 */
+ 10463 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+.delay_slot
+ 10464 "10011000" // EQ r1, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10465 "01010111" // /* MW 3 */
+ 10466 "11000010" // /* MW 2 */
+ 10467 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 49 first
+.delay_slot
+ 10468 "00011000" // SEL.EQZ r3, r17, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10469 "01000010" // /* MW 3 */
+ 10470 "01000110" // /* MW 2 */
+ 10471 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+.delay_slot
+ 10472 "10011000" // AND r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10473 "00100100" // /* MW 3 */
+ 10474 "01110110" // /* MW 2 */
+ 10475 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 27 first
+.delay_slot
+ 10476 "00011000" // SEL.EQZ r0, r3, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10477 "00000010" // /* MW 3 */
+ 10478 "11000000" // /* MW 2 */
+.label _ZL19propagateFloat32NaNjj__end
+ 10479 "00010000" // /* MW 1 */
+.label _ZL19roundAndPackFloat32iij
+.function roundAndPackFloat32 _ZL19roundAndPackFloat32iij
+.src_ref 10 "softfloat.c" 154 first
+.src_ref 10 "softfloat.c" 161 19
+.src_ref 10 "softfloat.c" 203 30
+.function_start
+ 10480 "10111010" // MOVA r0, #64; MOVXM p0, #509172 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10481 "00010000" // /* MW 9 */
+ 10482 "01111010" // /* MW 8 */
+ 10483 "00110010" // /* MW 7 */
+ 10484 "11110000" // /* MW 6 */
+ 10485 "00000001" // /* MW 5 */
+ 10486 "00000000" // /* MW 4 */
+ 10487 "00000000" // /* MW 3 */
+ 10488 "00000000" // /* MW 2 */
+ 10489 "00001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 161 19 first
+.src_ref 10 "softfloat.c" 171 16
+.src_ref 10 "softfloat.c" 174 16
+.src_ref 10 "softfloat.c" 178 21
+.src_ref 10 "softfloat.c" 194 29
+ 10490 "00101100" // LDA r4, [p0]; MOVX r6, #127 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10491 "11111010" // /* MW 5 */
+ 10492 "10011001" // /* MW 4 */
+ 10493 "11010000" // /* MW 3 */
+ 10494 "10010010" // /* MW 2 */
+ 10495 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10497 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10499 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10501 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10503 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10505 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10507 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 162 36 first
+.src_ref 10 "softfloat.c" 164 4 first
+ 10508 "10000100" // JZ r4, #10576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10576 delay_slots=5 */
+ 10509 "00000001" // /* MW 5 */
+ 10510 "00000000" // /* MW 4 */
+ 10511 "10101000" // /* MW 3 */
+ 10512 "00010100" // /* MW 2 */
+ 10513 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 40
+.src_ref 10 "softfloat.c" 185 68
+.src_ref 10 "softfloat.c" 202 18
+.delay_slot
+ 10514 "00011000" // MOVX r5, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10515 "00000001" // /* MW 3 */
+ 10516 "01001010" // /* MW 2 */
+ 10517 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10519 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10521 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10525 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 8
+.src_ref 10 "softfloat.c" 171 16
+.src_ref 10 "softfloat.c" 171 34
+.src_ref 10 "softfloat.c" 174 16
+.src_ref 10 "softfloat.c" 174 34
+ 10526 "10111010" // MOVA r16, #3; MOVX r7, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10527 "01011000" // /* MW 9 */
+ 10528 "00000000" // /* MW 8 */
+ 10529 "00001000" // /* MW 7 */
+ 10530 "01001011" // /* MW 6 */
+ 10531 "01110000" // /* MW 5 */
+ 10532 "00000000" // /* MW 4 */
+ 10533 "00000000" // /* MW 3 */
+ 10534 "01110000" // /* MW 2 */
+ 10535 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 26
+.src_ref 10 "softfloat.c" 171 34 first
+ 10536 "01100100" // EQ r27, r7, r4; MOV r5, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10537 "00000101" // /* MW 5 */
+ 10538 "10100000" // /* MW 4 */
+ 10539 "11110010" // /* MW 3 */
+ 10540 "11001000" // /* MW 2 */
+ 10541 "00111110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 171 16
+ 10542 "00011000" // SEL.EQZ r7, r6, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10543 "10000010" // /* MW 3 */
+ 10544 "10001111" // /* MW 2 */
+ 10545 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 174 34 first
+ 10546 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10547 "00000111" // /* MW 3 */
+ 10548 "00110111" // /* MW 2 */
+ 10549 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 170 12
+.src_ref 10 "softfloat.c" 174 16
+ 10550 "11100100" // SEL.EQZ r16, r6, r24, r27; MOV r27, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10551 "01000001" // /* MW 5 */
+ 10552 "10100001" // /* MW 4 */
+ 10553 "01001101" // /* MW 3 */
+ 10554 "00110000" // /* MW 2 */
+ 10555 "00110100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 170 12 first
+.src_ref 10 "softfloat.c" 170 12 first
+ 10556 "00011000" // SEL.EQZ r7, r16, r7, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10557 "01110010" // /* MW 3 */
+ 10558 "00001110" // /* MW 2 */
+ 10559 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 26 first
+ 10560 "10011000" // EQ r27, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10561 "01000111" // /* MW 3 */
+ 10562 "01110110" // /* MW 2 */
+ 10563 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 8
+ 10564 "00110110" // NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10565 "10000001" // /* MW 11 */
+ 10566 "10101101" // /* MW 10 */
+ 10567 "00000000" // /* MW 9 */
+ 10568 "00010000" // /* MW 8 */
+ 10569 "01011100" // /* MW 7 */
+ 10570 "00001110" // /* MW 6 */
+ 10571 "00100000" // /* MW 5 */
+ 10572 "00000000" // /* MW 4 */
+ 10573 "11110000" // /* MW 3 */
+ 10574 "00101100" // /* MW 2 */
+ 10575 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_96
+.src_ref 10 "softfloat.c" 179 14
+.src_ref 10 "softfloat.c" 179 17 first
+.src_ref 10 "softfloat.c" 180 23
+.src_ref 10 "softfloat.c" 181 28
+ 10576 "01100100" // EXTEND.u16 r18, r2; MOV r16, #253 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10577 "11110101" // /* MW 5 */
+ 10578 "00100011" // /* MW 4 */
+ 10579 "00001000" // /* MW 3 */
+ 10580 "10010110" // /* MW 2 */
+ 10581 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 179 14
+ 10582 "10011000" // LT r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10583 "00001010" // /* MW 3 */
+ 10584 "10100101" // /* MW 2 */
+ 10585 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 179 4
+ 10586 "10000100" // JNZ r18, #10768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10768 delay_slots=5 */
+ 10587 "00000001" // /* MW 5 */
+ 10588 "01000000" // /* MW 4 */
+ 10589 "00001000" // /* MW 3 */
+ 10590 "00010101" // /* MW 2 */
+ 10591 "10010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 178 21 first
+.delay_slot
+ 10592 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10593 "01100100" // /* MW 3 */
+ 10594 "11100010" // /* MW 2 */
+ 10595 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+.src_ref 10 "softfloat.c" 128 31
+.delay_slot
+ 10596 "00011000" // MOVX r7, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10597 "01111101" // /* MW 3 */
+ 10598 "00001110" // /* MW 2 */
+ 10599 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 10600 "10011000" // LSHL r1, r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10601 "01111101" // /* MW 3 */
+ 10602 "01000010" // /* MW 2 */
+ 10603 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10605 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10607 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 181 28 first
+.src_ref 10 "softfloat.c" 182 40 first
+.src_ref 10 "softfloat.c" 182 59
+ 10608 "10111010" // MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10609 "10101000" // /* MW 9 */
+ 10610 "11001010" // /* MW 8 */
+ 10611 "10001000" // /* MW 7 */
+ 10612 "00111110" // /* MW 6 */
+ 10613 "00111000" // /* MW 5 */
+ 10614 "00000101" // /* MW 4 */
+ 10615 "00000000" // /* MW 3 */
+ 10616 "00010010" // /* MW 2 */
+ 10617 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 59
+ 10618 "10011000" // LT r20, r20, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10619 "00101010" // /* MW 3 */
+ 10620 "00101001" // /* MW 2 */
+ 10621 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 180 23 first
+ 10622 "10011000" // LT r16, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10623 "00101010" // /* MW 3 */
+ 10624 "00100000" // /* MW 2 */
+ 10625 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 18 first
+ 10626 "10011000" // AND r19, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10627 "01000100" // /* MW 3 */
+ 10628 "11100111" // /* MW 2 */
+ 10629 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 181 13 first
+ 10630 "10011000" // OR r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10631 "00000101" // /* MW 3 */
+ 10632 "11100111" // /* MW 2 */
+ 10633 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 180 8 first
+ 10634 "10000100" // JNZ r19, #10848 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10848 delay_slots=5 */
+ 10635 "00000001" // /* MW 5 */
+ 10636 "01000000" // /* MW 4 */
+ 10637 "00110000" // /* MW 3 */
+ 10638 "00010101" // /* MW 2 */
+ 10639 "10011000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 187 18
+.src_ref 10 "softfloat.c" 192 39
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+.delay_slot
+ 10640 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10641 "00000001" // /* MW 3 */
+ 10642 "00100000" // /* MW 2 */
+ 10643 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10647 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10649 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10651 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 187 18 first
+ 10652 "10011000" // GE r19, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10653 "00001001" // /* MW 3 */
+ 10654 "10100111" // /* MW 2 */
+ 10655 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 187 8
+ 10656 "10000100" // JNZ r19, #10784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10784 delay_slots=5 */
+ 10657 "00000001" // /* MW 5 */
+ 10658 "01000000" // /* MW 4 */
+ 10659 "00010000" // /* MW 3 */
+ 10660 "00010101" // /* MW 2 */
+ 10661 "10011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10663 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10665 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10671 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 192 39 first
+ 10672 "10011000" // SUB r2, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10673 "00100001" // /* MW 3 */
+ 10674 "00000100" // /* MW 2 */
+ 10675 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 10676 "10000100" // JZ r2, #10736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10736 delay_slots=5 */
+ 10677 "00000001" // /* MW 5 */
+ 10678 "00000000" // /* MW 4 */
+ 10679 "11111000" // /* MW 3 */
+ 10680 "00010100" // /* MW 2 */
+ 10681 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10683 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10685 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10687 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10689 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10691 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 10692 "01100100" // SUB r17, r16, r2; MOV r19, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10693 "10000001" // /* MW 5 */
+ 10694 "10100000" // /* MW 4 */
+ 10695 "00111001" // /* MW 3 */
+ 10696 "01000100" // /* MW 2 */
+ 10697 "10000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 10698 "10011000" // AND r7, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10699 "00010100" // /* MW 3 */
+ 10700 "11001111" // /* MW 2 */
+ 10701 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 10702 "10011000" // LSHL r7, r3, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10703 "01111101" // /* MW 3 */
+ 10704 "11001110" // /* MW 2 */
+ 10705 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 10706 "10011000" // LSHL r17, r3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10707 "00011101" // /* MW 3 */
+ 10708 "11100011" // /* MW 2 */
+ 10709 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 10710 "10011000" // LT r27, r2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10711 "00111010" // /* MW 3 */
+ 10712 "10110111" // /* MW 2 */
+ 10713 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 10714 "00011000" // NEZ r7, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10715 "11110000" // /* MW 3 */
+ 10716 "11001110" // /* MW 2 */
+ 10717 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 10718 "00011000" // NEZ r3, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10719 "11110000" // /* MW 3 */
+ 10720 "11000110" // /* MW 2 */
+ 10721 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 10722 "10011000" // OR r2, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10723 "00010101" // /* MW 3 */
+ 10724 "11000101" // /* MW 2 */
+ 10725 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 10726 "01111010" // NOPA; NOPS; SEL.EQZ r3, r3, r2, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10727 "00100010" // /* MW 9 */
+ 10728 "11000110" // /* MW 8 */
+ 10729 "00000000" // /* MW 7 */
+ 10730 "00000000" // /* MW 6 */
+ 10731 "01011011" // /* MW 5 */
+ 10732 "00000001" // /* MW 4 */
+ 10733 "11110000" // /* MW 3 */
+ 10734 "00101100" // /* MW 2 */
+ 10735 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_256
+ 10736 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */
+ 10737 "00000000" // /* MW 5 */
+ 10738 "00000000" // /* MW 4 */
+ 10739 "00010000" // /* MW 3 */
+ 10740 "00010101" // /* MW 2 */
+ 10741 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 194 29 first
+.delay_slot
+ 10742 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10743 "01100100" // /* MW 3 */
+ 10744 "11100010" // /* MW 2 */
+ 10745 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+.delay_slot
+ 10746 "00011000" // MOVX r2, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10747 "00000001" // /* MW 3 */
+ 10748 "00000100" // /* MW 2 */
+ 10749 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10751 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10754 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 10755 "00011100" // /* MW 13 */
+ 10756 "00000000" // /* MW 12 */
+ 10757 "00000000" // /* MW 11 */
+ 10758 "01010111" // /* MW 10 */
+ 10759 "00011010" // /* MW 9 */
+ 10760 "01000000" // /* MW 8 */
+ 10761 "00000000" // /* MW 7 */
+ 10762 "00000000" // /* MW 6 */
+ 10763 "10110110" // /* MW 5 */
+ 10764 "00000010" // /* MW 4 */
+ 10765 "11110000" // /* MW 3 */
+ 10766 "00101100" // /* MW 2 */
+ 10767 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_288
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+ 10768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10769 "00000000" // /* MW 15 */
+ 10770 "00000000" // /* MW 14 */
+ 10771 "01111000" // /* MW 13 */
+ 10772 "10100101" // /* MW 12 */
+ 10773 "00000001" // /* MW 11 */
+ 10774 "00001000" // /* MW 10 */
+ 10775 "00000000" // /* MW 9 */
+ 10776 "00000001" // /* MW 8 */
+ 10777 "01011011" // /* MW 7 */
+ 10778 "00000001" // /* MW 6 */
+ 10779 "00100000" // /* MW 5 */
+ 10780 "00000000" // /* MW 4 */
+ 10781 "11110000" // /* MW 3 */
+ 10782 "00101100" // /* MW 2 */
+ 10783 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_304
+.src_ref 10 "softfloat.c" 202 18 first
+.src_ref 10 "softfloat.c" 202 36
+.src_ref 10 "softfloat.c" 203 30 first
+ 10784 "10111010" // MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10785 "10101000" // /* MW 9 */
+ 10786 "11001010" // /* MW 8 */
+ 10787 "10101000" // /* MW 7 */
+ 10788 "00110100" // /* MW 6 */
+ 10789 "00110000" // /* MW 5 */
+ 10790 "00100010" // /* MW 4 */
+ 10791 "00000000" // /* MW 3 */
+ 10792 "00100000" // /* MW 2 */
+ 10793 "11111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 59
+.src_ref 10 "softfloat.c" 203 12
+.src_ref 10 "softfloat.c" 203 46
+ 10794 "10111010" // MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10795 "01011000" // /* MW 9 */
+ 10796 "11111111" // /* MW 8 */
+ 10797 "10001111" // /* MW 7 */
+ 10798 "00101100" // /* MW 6 */
+ 10799 "01100010" // /* MW 5 */
+ 10800 "00000110" // /* MW 4 */
+ 10801 "00000000" // /* MW 3 */
+ 10802 "11100011" // /* MW 2 */
+ 10803 "00000010" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 46
+ 10804 "00011000" // EQZ r6, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10805 "11010000" // /* MW 3 */
+ 10806 "10001100" // /* MW 2 */
+ 10807 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 202 36
+ 10808 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10809 "00001101" // /* MW 3 */
+ 10810 "01000000" // /* MW 2 */
+ 10811 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 12
+ 10812 "10011000" // XOR r4, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10813 "01000110" // /* MW 3 */
+ 10814 "10001000" // /* MW 2 */
+ 10815 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 205 4 first
+ 10816 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10817 "00000000" // /* MW 3 */
+ 10818 "00101000" // /* MW 2 */
+ 10819 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 9 first
+.delay_slot
+ 10820 "10011000" // AND r27, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10821 "00000100" // /* MW 3 */
+ 10822 "00110110" // /* MW 2 */
+ 10823 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 204 4 first
+.src_ref 10 "softfloat.c" 204 14 first
+.delay_slot
+ 10824 "00011000" // SEL.EQZ r2, r16, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10825 "00100010" // /* MW 3 */
+ 10826 "00000100" // /* MW 2 */
+ 10827 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 59 first
+.delay_slot
+ 10828 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10829 "00111101" // /* MW 3 */
+ 10830 "10000100" // /* MW 2 */
+ 10831 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 10832 "10011000" // ADD r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10833 "00100000" // /* MW 3 */
+ 10834 "01000100" // /* MW 2 */
+ 10835 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 66
+.delay_slot
+ 10836 "00110110" // NOPA; NOPB; NOPS; ADD r0, r27, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10837 "10000001" // /* MW 11 */
+ 10838 "10101101" // /* MW 10 */
+ 10839 "00000000" // /* MW 9 */
+ 10840 "00000100" // /* MW 8 */
+ 10841 "00000001" // /* MW 7 */
+ 10842 "00110110" // /* MW 6 */
+ 10843 "00100000" // /* MW 5 */
+ 10844 "00000000" // /* MW 4 */
+ 10845 "11110000" // /* MW 3 */
+ 10846 "00101100" // /* MW 2 */
+ 10847 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_368
+.src_ref 10 "softfloat.c" 185 12 first
+ 10848 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10849 "00000000" // /* MW 3 */
+ 10850 "00101000" // /* MW 2 */
+ 10851 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 10852 "01000100" // MOVXM r2, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10853 "00000000" // /* MW 5 */
+ 10854 "00100000" // /* MW 4 */
+ 10855 "00000001" // /* MW 3 */
+ 10856 "10000000" // /* MW 2 */
+ 10857 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38 first
+.delay_slot
+ 10858 "10011000" // ADD r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10859 "00100000" // /* MW 3 */
+ 10860 "01000110" // /* MW 2 */
+ 10861 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 185 68 first
+.delay_slot
+ 10862 "00011000" // EQZ r2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10863 "11010000" // /* MW 3 */
+ 10864 "01000100" // /* MW 2 */
+ 10865 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 185 49
+.delay_slot
+ 10866 "10011000" // SUB r0, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10867 "00100001" // /* MW 3 */
+ 10868 "11000000" // /* MW 2 */
+ 10869 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL19roundAndPackFloat32iij__end
+ 10871 "00000000" // /* MW 1 */
+.label _ZL28normalizeRoundAndPackFloat32iij
+.function normalizeRoundAndPackFloat32 _ZL28normalizeRoundAndPackFloat32iij
+.src_ref 10 "softfloat.c" 218 first
+.src_ref 10 "softfloat.c" 224 11 first
+.tail_call
+.function_start
+ 10880 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 10881 "00000000" // /* MW 5 */
+ 10882 "00000000" // /* MW 4 */
+ 10883 "01111000" // /* MW 3 */
+ 10884 "00010100" // /* MW 2 */
+ 10885 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 552 53 first
+.delay_slot
+ 10886 "00011000" // CLZ r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10887 "00110000" // /* MW 3 */
+ 10888 "11100000" // /* MW 2 */
+ 10889 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 223 45 first
+.delay_slot
+ 10890 "00011000" // ADD r16, r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10891 "11111111" // /* MW 3 */
+ 10892 "00100001" // /* MW 2 */
+ 10893 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 224 44 first
+.delay_slot
+ 10894 "10011000" // SUB r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10895 "00000001" // /* MW 3 */
+ 10896 "10000101" // /* MW 2 */
+ 10897 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 224 62
+.delay_slot
+ 10898 "10011000" // LSHL r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10899 "00001101" // /* MW 3 */
+ 10900 "11000111" // /* MW 2 */
+ 10901 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL28normalizeRoundAndPackFloat32iij__end
+ 10903 "00000000" // /* MW 1 */
+.label int32_to_float32
+.function int32_to_float32 int32_to_float32
+.src_ref 10 "softfloat.c" 477 first
+.src_ref 10 "softfloat.c" 481 4
+.src_ref 10 "softfloat.c" 481 11 first
+.function_start
+ 10912 "10000100" // JZ r1, #10992 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10992 delay_slots=5 */
+ 10913 "00000001" // /* MW 5 */
+ 10914 "00000000" // /* MW 4 */
+ 10915 "01111000" // /* MW 3 */
+ 10916 "00010101" // /* MW 2 */
+ 10917 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10919 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10927 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 11
+ 10928 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10929 "00000000" // /* MW 5 */
+ 10930 "00100000" // /* MW 4 */
+ 10931 "00001000" // /* MW 3 */
+ 10932 "00000000" // /* MW 2 */
+ 10933 "10000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 11 first
+ 10934 "10011000" // EQ r16, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10935 "00000111" // /* MW 3 */
+ 10936 "01100001" // /* MW 2 */
+ 10937 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 4
+ 10938 "10000100" // JNZ r16, #11008 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11008 delay_slots=5 */
+ 10939 "00000001" // /* MW 5 */
+ 10940 "01000000" // /* MW 4 */
+ 10941 "10000000" // /* MW 3 */
+ 10942 "00010101" // /* MW 2 */
+ 10943 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10945 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10947 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10949 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10951 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10953 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 484 11
+.src_ref 10 "softfloat.c" 484 11 first
+.tail_call
+ 10954 "10111010" // MOVA r2, #156; J #10880 /* MW 10 */ /* control_operation: words=10 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */
+ 10955 "00100000" // /* MW 9 */
+ 10956 "00000000" // /* MW 8 */
+ 10957 "00000000" // /* MW 7 */
+ 10958 "01010000" // /* MW 6 */
+ 10959 "00000101" // /* MW 5 */
+ 10960 "00000000" // /* MW 4 */
+ 10961 "00000000" // /* MW 3 */
+ 10962 "10000010" // /* MW 2 */
+ 10963 "00010011" // /* MW 1 */
+.src_ref 10 "softfloat.c" 484 60
+.src_ref 10 "softfloat.c" 484 62
+.delay_slot
+ 10964 "00011000" // ABS r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10965 "00010000" // /* MW 3 */
+ 10966 "01000111" // /* MW 2 */
+ 10967 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 483 16
+.delay_slot
+ 10968 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10969 "00000001" // /* MW 3 */
+ 10970 "00100000" // /* MW 2 */
+ 10971 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 483 16 first
+.delay_slot
+ 10972 "10011000" // LT r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10973 "00001010" // /* MW 3 */
+ 10974 "01000011" // /* MW 2 */
+ 10975 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10977 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10978 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 10979 "00011100" // /* MW 13 */
+ 10980 "00000000" // /* MW 12 */
+ 10981 "00000000" // /* MW 11 */
+ 10982 "01010111" // /* MW 10 */
+ 10983 "00011010" // /* MW 9 */
+ 10984 "01000000" // /* MW 8 */
+ 10985 "00000000" // /* MW 7 */
+ 10986 "00000000" // /* MW 6 */
+ 10987 "10110110" // /* MW 5 */
+ 10988 "00000010" // /* MW 4 */
+ 10989 "11110000" // /* MW 3 */
+ 10990 "00101100" // /* MW 2 */
+ 10991 "00000000" // /* MW 1 */
+.label TGT_Fint32_to_float32_80
+.src_ref 10 "softfloat.c" 481 18 first
+.return_address
+ 10992 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10993 "00000000" // /* MW 3 */
+ 10994 "00101000" // /* MW 2 */
+ 10995 "00010000" // /* MW 1 */
+.delay_slot
+ 10996 "00011000" // MOVX r0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10997 "00000001" // /* MW 3 */
+ 10998 "00000000" // /* MW 2 */
+ 10999 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11001 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11003 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11005 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11007 "00000000" // /* MW 1 */
+.label TGT_Fint32_to_float32_96
+.src_ref 10 "softfloat.c" 482 37 first
+ 11008 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11009 "00000000" // /* MW 3 */
+ 11010 "00101000" // /* MW 2 */
+ 11011 "00010000" // /* MW 1 */
+.delay_slot
+ 11012 "01000100" // MOVXM r0, #-822083584 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11013 "00000000" // /* MW 5 */
+ 11014 "00100000" // /* MW 4 */
+ 11015 "00000000" // /* MW 3 */
+ 11016 "00000000" // /* MW 2 */
+ 11017 "11001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11019 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11023 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label int32_to_float32__end
+ 11025 "00000000" // /* MW 1 */
+.label _ZL14addFloat32Sigsjji
+.function addFloat32Sigs _ZL14addFloat32Sigsjji
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 81 14
+.src_ref 10 "softfloat.c" 734 first
+.function_start
+ 11040 "10111010" // MOVA r18, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11041 "10010000" // /* MW 9 */
+ 11042 "11111111" // /* MW 8 */
+ 11043 "00001111" // /* MW 7 */
+ 11044 "11111110" // /* MW 6 */
+ 11045 "00011111" // /* MW 5 */
+ 11046 "00000000" // /* MW 4 */
+ 11047 "00000000" // /* MW 3 */
+ 11048 "00110010" // /* MW 2 */
+ 11049 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14 first
+ 11050 "10011000" // LSHL r17, r1, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11051 "00101101" // /* MW 3 */
+ 11052 "01100011" // /* MW 2 */
+ 11053 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14
+ 11054 "10011000" // LSHL r4, r2, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11055 "00101101" // /* MW 3 */
+ 11056 "10001001" // /* MW 2 */
+ 11057 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11058 "00011000" // EXTEND.u8 r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11059 "10010000" // /* MW 3 */
+ 11060 "01110110" // /* MW 2 */
+ 11061 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11062 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11063 "10010000" // /* MW 3 */
+ 11064 "00110010" // /* MW 2 */
+ 11065 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 744 19 first
+.src_ref 10 "softfloat.c" 747 11
+.src_ref 10 "softfloat.c" 761 22
+.src_ref 10 "softfloat.c" 772 35
+.src_ref 10 "softfloat.c" 788 24
+ 11066 "01100100" // SUB r17, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11067 "00000001" // /* MW 5 */
+ 11068 "00100000" // /* MW 4 */
+ 11069 "00111100" // /* MW 3 */
+ 11070 "01110010" // /* MW 2 */
+ 11071 "11011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 747 11 first
+ 11072 "10011000" // LT r4, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11073 "00011010" // /* MW 3 */
+ 11074 "00001001" // /* MW 2 */
+ 11075 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 747 4
+ 11076 "10000100" // JNZ r4, #11248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11248 delay_slots=5 */
+ 11077 "00000001" // /* MW 5 */
+ 11078 "01000000" // /* MW 4 */
+ 11079 "11111000" // /* MW 3 */
+ 11080 "00010101" // /* MW 2 */
+ 11081 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+.delay_slot
+ 11082 "10011000" // AND r19, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11083 "00000100" // /* MW 3 */
+ 11084 "01100111" // /* MW 2 */
+ 11085 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 745 9
+.src_ref 10 "softfloat.c" 746 9
+.delay_slot
+ 11086 "01100100" // AND r16, r2, r16; MOV r0, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11087 "00011001" // /* MW 5 */
+ 11088 "00100000" // /* MW 4 */
+ 11089 "10010000" // /* MW 3 */
+ 11090 "00100000" // /* MW 2 */
+ 11091 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 745 9 first
+.delay_slot
+ 11092 "10011000" // LSHL r19, r19, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11093 "00001101" // /* MW 3 */
+ 11094 "11100110" // /* MW 2 */
+ 11095 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 746 9 first
+.src_ref 10 "softfloat.c" 748 18
+.src_ref 10 "softfloat.c" 762 18
+.delay_slot
+ 11096 "01100100" // LSHL r16, r16, r0; MOV r20, #255 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11097 "11111101" // /* MW 5 */
+ 11098 "00100011" // /* MW 4 */
+ 11099 "10111010" // /* MW 3 */
+ 11100 "00000001" // /* MW 2 */
+ 11101 "10000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+.src_ref 10 "softfloat.c" 128 31
+.src_ref 10 "softfloat.c" 748 18 first
+.delay_slot
+ 11102 "01100100" // EQ r0, r27, r20; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11103 "01111101" // /* MW 5 */
+ 11104 "00100000" // /* MW 4 */
+ 11105 "11111001" // /* MW 3 */
+ 11106 "00101000" // /* MW 2 */
+ 11107 "11011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 761 22 first
+ 11108 "10011000" // GE r5, r17, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11109 "10001001" // /* MW 3 */
+ 11110 "01001011" // /* MW 2 */
+ 11111 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 761 9
+ 11112 "10000100" // JNZ r5, #11440 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11440 delay_slots=5 */
+ 11113 "00000001" // /* MW 5 */
+ 11114 "01000000" // /* MW 4 */
+ 11115 "01011000" // /* MW 3 */
+ 11116 "00010110" // /* MW 2 */
+ 11117 "00101000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 11118 "10011000" // LSHL r4, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11119 "00101101" // /* MW 3 */
+ 11120 "11001001" // /* MW 2 */
+ 11121 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11123 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11125 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11127 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11129 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 762 18 first
+ 11130 "10011000" // EQ r20, r25, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11131 "01000111" // /* MW 3 */
+ 11132 "01101001" // /* MW 2 */
+ 11133 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 762 8
+ 11134 "10000100" // JNZ r20, #11392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11392 delay_slots=5 */
+ 11135 "00000001" // /* MW 5 */
+ 11136 "01000000" // /* MW 4 */
+ 11137 "01000000" // /* MW 3 */
+ 11138 "00010110" // /* MW 2 */
+ 11139 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11141 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11143 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11145 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11147 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11149 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11150 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11151 "10100000" // /* MW 3 */
+ 11152 "01010001" // /* MW 2 */
+ 11153 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 787 4
+ 11154 "11111000" // MOV r2, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11155 "10100000" // /* MW 3 */
+ 11156 "10011100" // /* MW 2 */
+ 11157 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 767 12 first
+ 11158 "00011000" // ADD r0, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11159 "00000111" // /* MW 3 */
+ 11160 "01000000" // /* MW 2 */
+ 11161 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 766 8 first
+ 11162 "00011000" // SEL.EQZ r17, r0, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11163 "00010010" // /* MW 3 */
+ 11164 "00100011" // /* MW 2 */
+ 11165 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 772 35 first
+ 11166 "10011000" // SUB r17, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11167 "00010001" // /* MW 3 */
+ 11168 "00100011" // /* MW 2 */
+ 11169 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11170 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */
+ 11171 "00000001" // /* MW 5 */
+ 11172 "00000000" // /* MW 4 */
+ 11173 "00101000" // /* MW 3 */
+ 11174 "00010110" // /* MW 2 */
+ 11175 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 770 17
+.src_ref 10 "softfloat.c" 785 9
+.delay_slot
+ 11176 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11177 "00000000" // /* MW 5 */
+ 11178 "00100000" // /* MW 4 */
+ 11179 "00001010" // /* MW 3 */
+ 11180 "00000000" // /* MW 2 */
+ 11181 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 770 17 first
+.delay_slot
+ 11182 "10011000" // OR r3, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11183 "01000101" // /* MW 3 */
+ 11184 "11000111" // /* MW 2 */
+ 11185 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 766 8 first
+.delay_slot
+ 11186 "00011000" // SEL.EQZ r19, r19, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11187 "00110010" // /* MW 3 */
+ 11188 "11100110" // /* MW 2 */
+ 11189 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11191 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11193 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 11194 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11195 "10000001" // /* MW 5 */
+ 11196 "00100000" // /* MW 4 */
+ 11197 "00110000" // /* MW 3 */
+ 11198 "11100010" // /* MW 2 */
+ 11199 "11000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11200 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11201 "00100100" // /* MW 3 */
+ 11202 "11100101" // /* MW 2 */
+ 11203 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11204 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11205 "00101101" // /* MW 3 */
+ 11206 "11100101" // /* MW 2 */
+ 11207 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11208 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11209 "00001010" // /* MW 3 */
+ 11210 "01110110" // /* MW 2 */
+ 11211 "00010100" // /* MW 1 */
+ 11212 "10000100" // J #11344 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11344 delay_slots=5 */
+ 11213 "00000000" // /* MW 5 */
+ 11214 "00000000" // /* MW 4 */
+ 11215 "00101000" // /* MW 3 */
+ 11216 "00010110" // /* MW 2 */
+ 11217 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15 first
+.delay_slot
+ 11218 "10011000" // LSHL r3, r19, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11219 "00111101" // /* MW 3 */
+ 11220 "11000110" // /* MW 2 */
+ 11221 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57
+.delay_slot
+ 11222 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11223 "11110000" // /* MW 3 */
+ 11224 "10100100" // /* MW 2 */
+ 11225 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+.delay_slot
+ 11226 "00011000" // NEZ r17, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11227 "11110000" // /* MW 3 */
+ 11228 "11100010" // /* MW 2 */
+ 11229 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+.delay_slot
+ 11230 "10011000" // OR r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11231 "00100101" // /* MW 3 */
+ 11232 "11100101" // /* MW 2 */
+ 11233 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+.delay_slot
+ 11234 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11235 "01100000" // /* MW 13 */
+ 11236 "00101011" // /* MW 12 */
+ 11237 "00000000" // /* MW 11 */
+ 11238 "10101111" // /* MW 10 */
+ 11239 "00110100" // /* MW 9 */
+ 11240 "00000000" // /* MW 8 */
+ 11241 "00100010" // /* MW 7 */
+ 11242 "01100111" // /* MW 6 */
+ 11243 "00100100" // /* MW 5 */
+ 11244 "00000000" // /* MW 4 */
+ 11245 "11110000" // /* MW 3 */
+ 11246 "00101100" // /* MW 2 */
+ 11247 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_208
+.src_ref 10 "softfloat.c" 748 8 first
+ 11248 "10000100" // JNZ r0, #11504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11504 delay_slots=5 */
+ 11249 "00000001" // /* MW 5 */
+ 11250 "01000000" // /* MW 4 */
+ 11251 "01111000" // /* MW 3 */
+ 11252 "00010110" // /* MW 2 */
+ 11253 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 756 17
+.src_ref 10 "softfloat.c" 785 9
+.delay_slot
+ 11254 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11255 "00000000" // /* MW 5 */
+ 11256 "00100000" // /* MW 4 */
+ 11257 "00001010" // /* MW 3 */
+ 11258 "00000000" // /* MW 2 */
+ 11259 "00100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11261 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11263 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11265 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11267 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11268 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11269 "10100000" // /* MW 3 */
+ 11270 "01010001" // /* MW 2 */
+ 11271 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 753 12 first
+.src_ref 10 "softfloat.c" 787 4
+ 11272 "11100100" // ADD r3, r17, #-1; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11273 "01000001" // /* MW 5 */
+ 11274 "00111011" // /* MW 4 */
+ 11275 "11100001" // /* MW 3 */
+ 11276 "11111111" // /* MW 2 */
+ 11277 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8
+.src_ref 10 "softfloat.c" 752 18
+ 11278 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11279 "10100000" // /* MW 3 */
+ 11280 "11011100" // /* MW 2 */
+ 11281 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8 first
+.src_ref 10 "softfloat.c" 752 18 first
+ 11282 "00011000" // SEL.EQZ r17, r3, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11283 "00010010" // /* MW 3 */
+ 11284 "11100011" // /* MW 2 */
+ 11285 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11286 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */
+ 11287 "00000001" // /* MW 5 */
+ 11288 "00000000" // /* MW 4 */
+ 11289 "00101000" // /* MW 3 */
+ 11290 "00010110" // /* MW 2 */
+ 11291 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 756 17 first
+.delay_slot
+ 11292 "10011000" // OR r0, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11293 "00000101" // /* MW 3 */
+ 11294 "00000001" // /* MW 2 */
+ 11295 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8 first
+.src_ref 10 "softfloat.c" 752 18 first
+.delay_slot
+ 11296 "00011000" // SEL.EQZ r16, r16, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11297 "00000010" // /* MW 3 */
+ 11298 "00100000" // /* MW 2 */
+ 11299 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11301 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11303 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11305 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 11306 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11307 "10000001" // /* MW 5 */
+ 11308 "00100000" // /* MW 4 */
+ 11309 "00110000" // /* MW 3 */
+ 11310 "11100010" // /* MW 2 */
+ 11311 "11000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11312 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11313 "00100100" // /* MW 3 */
+ 11314 "11100101" // /* MW 2 */
+ 11315 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11316 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11317 "00101101" // /* MW 3 */
+ 11318 "00100101" // /* MW 2 */
+ 11319 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 11320 "10011000" // LSHL r3, r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11321 "00111101" // /* MW 3 */
+ 11322 "00000110" // /* MW 2 */
+ 11323 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11324 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11325 "00001010" // /* MW 3 */
+ 11326 "01110110" // /* MW 2 */
+ 11327 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 11328 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11329 "11110000" // /* MW 3 */
+ 11330 "10100100" // /* MW 2 */
+ 11331 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11332 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11333 "11110000" // /* MW 3 */
+ 11334 "00100000" // /* MW 2 */
+ 11335 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 11336 "10011000" // OR r17, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11337 "00100101" // /* MW 3 */
+ 11338 "11100011" // /* MW 2 */
+ 11339 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 11340 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11341 "00010010" // /* MW 3 */
+ 11342 "00100001" // /* MW 2 */
+ 11343 "00010100" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_304
+.src_ref 10 "softfloat.c" 785 9 first
+.src_ref 10 "softfloat.c" 786 26
+.src_ref 10 "softfloat.c" 787 4 first
+ 11344 "10111010" // MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11345 "11001000" // /* MW 9 */
+ 11346 "10111111" // /* MW 8 */
+ 11347 "00101000" // /* MW 7 */
+ 11348 "00101110" // /* MW 6 */
+ 11349 "00111010" // /* MW 5 */
+ 11350 "00100111" // /* MW 4 */
+ 11351 "00000000" // /* MW 3 */
+ 11352 "00110010" // /* MW 2 */
+ 11353 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 786 18 first
+.src_ref 10 "softfloat.c" 790 8 first
+ 11354 "00100100" // ADD r19, r19, r16; ADD.NC r16, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11355 "00000001" // /* MW 5 */
+ 11356 "00110001" // /* MW 4 */
+ 11357 "00011000" // /* MW 3 */
+ 11358 "11100000" // /* MW 2 */
+ 11359 "10011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 786 26
+ 11360 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11361 "00101101" // /* MW 3 */
+ 11362 "11100101" // /* MW 2 */
+ 11363 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 24 first
+ 11364 "10011000" // LT r27, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11365 "10001010" // /* MW 3 */
+ 11366 "10110111" // /* MW 2 */
+ 11367 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 4
+ 11368 "00011000" // SEL.EQZ r2, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11369 "00000010" // /* MW 3 */
+ 11370 "01000101" // /* MW 2 */
+ 11371 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 4
+ 11372 "00011000" // SEL.EQZ r3, r18, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11373 "00110010" // /* MW 3 */
+ 11374 "10000111" // /* MW 2 */
+ 11375 "00010100" // /* MW 1 */
+.label __ll1__ZL14addFloat32Sigsjji
+.src_ref 10 "softfloat.c" 793 11 first
+.tail_call
+ 11376 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 11377 "00000000" // /* MW 5 */
+ 11378 "00000000" // /* MW 4 */
+ 11379 "01111000" // /* MW 3 */
+ 11380 "00010100" // /* MW 2 */
+ 11381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11383 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11385 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11387 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11391 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_352
+.src_ref 10 "softfloat.c" 763 12 first
+.return_address
+ 11392 "10000100" // JNZ r16, #11536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11536 delay_slots=5 */
+ 11393 "00000001" // /* MW 5 */
+ 11394 "01000000" // /* MW 4 */
+ 11395 "10001000" // /* MW 3 */
+ 11396 "00010110" // /* MW 2 */
+ 11397 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11399 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11407 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 764 12 first
+ 11408 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11409 "00000000" // /* MW 3 */
+ 11410 "00101000" // /* MW 2 */
+ 11411 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 11412 "01000100" // MOVXM r16, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11413 "00000000" // /* MW 5 */
+ 11414 "00100000" // /* MW 4 */
+ 11415 "00001000" // /* MW 3 */
+ 11416 "10000000" // /* MW 2 */
+ 11417 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38 first
+.delay_slot
+ 11418 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11419 "00000000" // /* MW 3 */
+ 11420 "00000001" // /* MW 2 */
+ 11421 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11423 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11427 "00011100" // /* MW 13 */
+ 11428 "00000000" // /* MW 12 */
+ 11429 "00000000" // /* MW 11 */
+ 11430 "01010111" // /* MW 10 */
+ 11431 "00011010" // /* MW 9 */
+ 11432 "01000000" // /* MW 8 */
+ 11433 "00000000" // /* MW 7 */
+ 11434 "00000000" // /* MW 6 */
+ 11435 "10110110" // /* MW 5 */
+ 11436 "00000010" // /* MW 4 */
+ 11437 "11110000" // /* MW 3 */
+ 11438 "00101100" // /* MW 2 */
+ 11439 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_400
+.src_ref 10 "softfloat.c" 776 8 first
+ 11440 "10000100" // JNZ r0, #11552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11552 delay_slots=5 */
+ 11441 "00000001" // /* MW 5 */
+ 11442 "01000000" // /* MW 4 */
+ 11443 "10010000" // /* MW 3 */
+ 11444 "00010110" // /* MW 2 */
+ 11445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11451 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11455 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 8 first
+ 11456 "10000100" // JZ r27, #11600 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11600 delay_slots=5 */
+ 11457 "00000001" // /* MW 5 */
+ 11458 "00000000" // /* MW 4 */
+ 11459 "10101000" // /* MW 3 */
+ 11460 "00010110" // /* MW 2 */
+ 11461 "11011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11463 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11471 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11472 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11473 "10100000" // /* MW 3 */
+ 11474 "01010001" // /* MW 2 */
+ 11475 "00011000" // /* MW 1 */
+ 11476 "10000100" // J #11376 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11376 delay_slots=5 */
+ 11477 "00000000" // /* MW 5 */
+ 11478 "00000000" // /* MW 4 */
+ 11479 "00111000" // /* MW 3 */
+ 11480 "00010110" // /* MW 2 */
+ 11481 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 26
+.delay_slot
+ 11482 "01000100" // MOVXM r17, #1073741824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11483 "00000000" // /* MW 5 */
+ 11484 "10100000" // /* MW 4 */
+ 11485 "00001000" // /* MW 3 */
+ 11486 "00000000" // /* MW 2 */
+ 11487 "01000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 26 first
+.src_ref 10 "softfloat.c" 793 11
+.delay_slot
+ 11488 "11100100" // ADD r17, r19, r17; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11489 "01000001" // /* MW 5 */
+ 11490 "00111011" // /* MW 4 */
+ 11491 "00010001" // /* MW 3 */
+ 11492 "01100010" // /* MW 2 */
+ 11493 "10011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 33
+.delay_slot
+ 11494 "10011000" // ADD r3, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11495 "00000000" // /* MW 3 */
+ 11496 "01000111" // /* MW 2 */
+ 11497 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11500 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11501 "01100111" // /* MW 3 */
+ 11502 "00000001" // /* MW 2 */
+ 11503 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_464
+.src_ref 10 "softfloat.c" 749 12 first
+ 11504 "10000100" // JNZ r19, #11632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11632 delay_slots=5 */
+ 11505 "00000001" // /* MW 5 */
+ 11506 "01000000" // /* MW 4 */
+ 11507 "10111000" // /* MW 3 */
+ 11508 "00010110" // /* MW 2 */
+ 11509 "10011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11511 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11513 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11515 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11517 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11519 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 750 12 first
+ 11520 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11521 "00000000" // /* MW 3 */
+ 11522 "00101000" // /* MW 2 */
+ 11523 "00010000" // /* MW 1 */
+.delay_slot
+ 11524 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11525 "10100000" // /* MW 3 */
+ 11526 "00010000" // /* MW 2 */
+ 11527 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11535 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_496
+.src_ref 10 "softfloat.c" 763 31 first
+.tail_call
+ 11536 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11537 "00000000" // /* MW 5 */
+ 11538 "00000000" // /* MW 4 */
+ 11539 "01000000" // /* MW 3 */
+ 11540 "00010100" // /* MW 2 */
+ 11541 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11543 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11549 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11551 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_512
+.src_ref 10 "softfloat.c" 777 22 first
+.return_address
+ 11552 "10011000" // OR r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11553 "00000101" // /* MW 3 */
+ 11554 "11100001" // /* MW 2 */
+ 11555 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 777 12
+ 11556 "10000100" // JNZ r16, #11648 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11648 delay_slots=5 */
+ 11557 "00000001" // /* MW 5 */
+ 11558 "01000000" // /* MW 4 */
+ 11559 "11000000" // /* MW 3 */
+ 11560 "00010110" // /* MW 2 */
+ 11561 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11563 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11565 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11567 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11571 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 778 12 first
+ 11572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11573 "00000000" // /* MW 3 */
+ 11574 "00101000" // /* MW 2 */
+ 11575 "00010000" // /* MW 1 */
+.delay_slot
+ 11576 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11577 "10100000" // /* MW 3 */
+ 11578 "00010000" // /* MW 2 */
+ 11579 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11581 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11583 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11585 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11586 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11587 "00011100" // /* MW 13 */
+ 11588 "00000000" // /* MW 12 */
+ 11589 "00000000" // /* MW 11 */
+ 11590 "01010111" // /* MW 10 */
+ 11591 "00011010" // /* MW 9 */
+ 11592 "01000000" // /* MW 8 */
+ 11593 "00000000" // /* MW 7 */
+ 11594 "00000000" // /* MW 6 */
+ 11595 "10110110" // /* MW 5 */
+ 11596 "00000010" // /* MW 4 */
+ 11597 "11110000" // /* MW 3 */
+ 11598 "00101100" // /* MW 2 */
+ 11599 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_560
+.src_ref 10 "softfloat.c" 780 25 first
+.src_ref 10 "softfloat.c" 780 62 first
+ 11600 "10100100" // RET lr; ADD.NC r16, r19, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11601 "10000010" // /* MW 5 */
+ 11602 "00110011" // /* MW 4 */
+ 11603 "00001000" // /* MW 3 */
+ 11604 "00000000" // /* MW 2 */
+ 11605 "00000101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 70
+.delay_slot
+ 11606 "00011000" // MOVX r17, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11607 "11101001" // /* MW 3 */
+ 11608 "11100010" // /* MW 2 */
+ 11609 "00010111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 70
+.delay_slot
+ 11610 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11611 "00011101" // /* MW 3 */
+ 11612 "00100001" // /* MW 2 */
+ 11613 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 66 first
+.delay_slot
+ 11614 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11615 "00000000" // /* MW 3 */
+ 11616 "00000001" // /* MW 2 */
+ 11617 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11619 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11620 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 11621 "10000001" // /* MW 11 */
+ 11622 "10101101" // /* MW 10 */
+ 11623 "00000000" // /* MW 9 */
+ 11624 "00000000" // /* MW 8 */
+ 11625 "00000000" // /* MW 7 */
+ 11626 "00000000" // /* MW 6 */
+ 11627 "00100000" // /* MW 5 */
+ 11628 "00000000" // /* MW 4 */
+ 11629 "11110000" // /* MW 3 */
+ 11630 "00101100" // /* MW 2 */
+ 11631 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_592
+.src_ref 10 "softfloat.c" 749 31 first
+.tail_call
+ 11632 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11633 "00000000" // /* MW 5 */
+ 11634 "00000000" // /* MW 4 */
+ 11635 "01000000" // /* MW 3 */
+ 11636 "00010100" // /* MW 2 */
+ 11637 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11639 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11641 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11643 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11647 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_608
+.src_ref 10 "softfloat.c" 777 38 first
+.tail_call
+.return_address
+ 11648 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11649 "00000000" // /* MW 5 */
+ 11650 "00000000" // /* MW 4 */
+ 11651 "01000000" // /* MW 3 */
+ 11652 "00010100" // /* MW 2 */
+ 11653 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11655 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11659 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11661 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL14addFloat32Sigsjji__end
+ 11663 "00000000" // /* MW 1 */
+.label _ZL14subFloat32Sigsjji
+.function subFloat32Sigs _ZL14subFloat32Sigsjji
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 81 14
+.src_ref 10 "softfloat.c" 805 first
+.function_start
+ 11664 "10111010" // MOVA r17, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11665 "10010000" // /* MW 9 */
+ 11666 "11111111" // /* MW 8 */
+ 11667 "00001111" // /* MW 7 */
+ 11668 "11111110" // /* MW 6 */
+ 11669 "00011111" // /* MW 5 */
+ 11670 "00000000" // /* MW 4 */
+ 11671 "00000000" // /* MW 3 */
+ 11672 "00110001" // /* MW 2 */
+ 11673 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14 first
+ 11674 "10011000" // LSHL r4, r2, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11675 "00011101" // /* MW 3 */
+ 11676 "10001001" // /* MW 2 */
+ 11677 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14
+ 11678 "10011000" // LSHL r18, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11679 "00011101" // /* MW 3 */
+ 11680 "01100101" // /* MW 2 */
+ 11681 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+ 11682 "10011000" // AND r20, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11683 "00000100" // /* MW 3 */
+ 11684 "01101001" // /* MW 2 */
+ 11685 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21 first
+ 11686 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11687 "10010000" // /* MW 3 */
+ 11688 "00110010" // /* MW 2 */
+ 11689 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11690 "00011000" // EXTEND.u8 r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11691 "10010000" // /* MW 3 */
+ 11692 "10110110" // /* MW 2 */
+ 11693 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+.src_ref 10 "softfloat.c" 816 9
+.src_ref 10 "softfloat.c" 817 9
+ 11694 "01100100" // AND r16, r2, r16; MOV r19, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11695 "00011101" // /* MW 5 */
+ 11696 "10100000" // /* MW 4 */
+ 11697 "10011001" // /* MW 3 */
+ 11698 "00100000" // /* MW 2 */
+ 11699 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 816 9 first
+ 11700 "10011000" // LSHL r17, r20, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11701 "00111101" // /* MW 3 */
+ 11702 "00100011" // /* MW 2 */
+ 11703 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 815 19 first
+.src_ref 10 "softfloat.c" 818 11
+.src_ref 10 "softfloat.c" 819 17
+.src_ref 10 "softfloat.c" 843 31
+ 11704 "01100100" // SUB r18, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11705 "00000001" // /* MW 5 */
+ 11706 "00100000" // /* MW 4 */
+ 11707 "00111100" // /* MW 3 */
+ 11708 "10110010" // /* MW 2 */
+ 11709 "11011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 818 11 first
+ 11710 "10011000" // LT r5, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11711 "00101010" // /* MW 3 */
+ 11712 "00001011" // /* MW 2 */
+ 11713 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 818 4
+ 11714 "10000100" // JNZ r5, #11904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11904 delay_slots=5 */
+ 11715 "00000001" // /* MW 5 */
+ 11716 "01000000" // /* MW 4 */
+ 11717 "01000000" // /* MW 3 */
+ 11718 "00010111" // /* MW 2 */
+ 11719 "00101000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 817 9 first
+.delay_slot
+ 11720 "10011000" // LSHL r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11721 "00111101" // /* MW 3 */
+ 11722 "00100001" // /* MW 2 */
+ 11723 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 14
+.src_ref 10 "softfloat.c" 851 14
+.src_ref 10 "softfloat.c" 859 13
+.src_ref 10 "softfloat.c" 862 9
+.delay_slot
+ 11724 "10111010" // MOVA r0, #255; MOVXM r4, #1073741824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11725 "00010000" // /* MW 9 */
+ 11726 "00000000" // /* MW 8 */
+ 11727 "10001000" // /* MW 7 */
+ 11728 "00000000" // /* MW 6 */
+ 11729 "00000000" // /* MW 5 */
+ 11730 "00010000" // /* MW 4 */
+ 11731 "00000000" // /* MW 3 */
+ 11732 "11100000" // /* MW 2 */
+ 11733 "00011111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 851 14 first
+.delay_slot
+ 11734 "10011000" // EQ r20, r27, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11735 "00000111" // /* MW 3 */
+ 11736 "11101000" // /* MW 2 */
+ 11737 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 862 9 first
+.delay_slot
+ 11738 "10011000" // OR r19, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11739 "01000101" // /* MW 3 */
+ 11740 "01100110" // /* MW 2 */
+ 11741 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 859 13 first
+.delay_slot
+ 11742 "10011000" // OR r4, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11743 "00000101" // /* MW 3 */
+ 11744 "00001001" // /* MW 2 */
+ 11745 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 819 17 first
+ 11746 "10011000" // GE r6, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11747 "10001001" // /* MW 3 */
+ 11748 "10001101" // /* MW 2 */
+ 11749 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 819 4
+ 11750 "10000100" // JNZ r6, #12064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12064 delay_slots=5 */
+ 11751 "00000001" // /* MW 5 */
+ 11752 "01000000" // /* MW 4 */
+ 11753 "10010000" // /* MW 3 */
+ 11754 "00010111" // /* MW 2 */
+ 11755 "00110000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.src_ref 10 "softfloat.c" 835 34
+.delay_slot
+ 11756 "00011000" // MOVX r5, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11757 "00000101" // /* MW 3 */
+ 11758 "00001010" // /* MW 2 */
+ 11759 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 835 34 first
+.delay_slot
+ 11760 "10011000" // XOR r7, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11761 "01010110" // /* MW 3 */
+ 11762 "11001110" // /* MW 2 */
+ 11763 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11765 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11767 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11769 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 14 first
+ 11770 "10011000" // EQ r20, r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11771 "00000111" // /* MW 3 */
+ 11772 "01101000" // /* MW 2 */
+ 11773 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 4
+ 11774 "10000100" // JNZ r20, #12176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12176 delay_slots=5 */
+ 11775 "00000001" // /* MW 5 */
+ 11776 "01000000" // /* MW 4 */
+ 11777 "11001000" // /* MW 3 */
+ 11778 "00010111" // /* MW 2 */
+ 11779 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11781 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11783 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11789 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+ 11790 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11791 "10100000" // /* MW 3 */
+ 11792 "01010011" // /* MW 2 */
+ 11793 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 838 8 first
+ 11794 "00011000" // ADD r16, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11795 "00000111" // /* MW 3 */
+ 11796 "10100000" // /* MW 2 */
+ 11797 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 837 4 first
+ 11798 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11799 "00100010" // /* MW 3 */
+ 11800 "00100001" // /* MW 2 */
+ 11801 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 843 31 first
+ 11802 "10011000" // SUB r16, r24, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11803 "00000001" // /* MW 3 */
+ 11804 "00100001" // /* MW 2 */
+ 11805 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11806 "10000100" // JZ r16, #11872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11872 delay_slots=5 */
+ 11807 "00000001" // /* MW 5 */
+ 11808 "00000000" // /* MW 4 */
+ 11809 "00110000" // /* MW 3 */
+ 11810 "00010111" // /* MW 2 */
+ 11811 "10000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 837 4 first
+.delay_slot
+ 11812 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11813 "00110010" // /* MW 3 */
+ 11814 "01100011" // /* MW 2 */
+ 11815 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11817 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11819 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11821 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11823 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+.src_ref 10 "softfloat-macros" 50 48
+ 11824 "10111010" // MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11825 "01011000" // /* MW 9 */
+ 11826 "00011111" // /* MW 8 */
+ 11827 "01001000" // /* MW 7 */
+ 11828 "00001110" // /* MW 6 */
+ 11829 "00111000" // /* MW 5 */
+ 11830 "00110000" // /* MW 4 */
+ 11831 "00000000" // /* MW 3 */
+ 11832 "00010100" // /* MW 2 */
+ 11833 "00000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11834 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11835 "00100100" // /* MW 3 */
+ 11836 "11100101" // /* MW 2 */
+ 11837 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11838 "10011000" // LSHL r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11839 "00101101" // /* MW 3 */
+ 11840 "01100101" // /* MW 2 */
+ 11841 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11842 "00011000" // NEZ r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11843 "11110000" // /* MW 3 */
+ 11844 "01100110" // /* MW 2 */
+ 11845 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11846 "10011000" // LT r27, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11847 "01001010" // /* MW 3 */
+ 11848 "00110111" // /* MW 2 */
+ 11849 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15 first
+ 11850 "10011000" // LSHL r17, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11851 "00111101" // /* MW 3 */
+ 11852 "01100010" // /* MW 2 */
+ 11853 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57
+ 11854 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11855 "11110000" // /* MW 3 */
+ 11856 "10100100" // /* MW 2 */
+ 11857 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25
+ 11858 "10011000" // OR r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11859 "00100101" // /* MW 3 */
+ 11860 "01100001" // /* MW 2 */
+ 11861 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 11862 "01111010" // NOPA; NOPS; SEL.EQZ r17, r19, r16, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11863 "00000010" // /* MW 9 */
+ 11864 "11100011" // /* MW 8 */
+ 11865 "00000100" // /* MW 7 */
+ 11866 "00000000" // /* MW 6 */
+ 11867 "01011011" // /* MW 5 */
+ 11868 "00000001" // /* MW 4 */
+ 11869 "11110000" // /* MW 3 */
+ 11870 "00101100" // /* MW 2 */
+ 11871 "00000000" // /* MW 1 */
+.label __ll2__ZL14subFloat32Sigsjji
+ 11872 "10000100" // J #12032 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12032 delay_slots=5 */
+ 11873 "00000000" // /* MW 5 */
+ 11874 "00000000" // /* MW 4 */
+ 11875 "10000000" // /* MW 3 */
+ 11876 "00010111" // /* MW 2 */
+ 11877 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 846 16 first
+.delay_slot
+ 11878 "10011000" // SUB r3, r4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11879 "00010001" // /* MW 3 */
+ 11880 "00000111" // /* MW 2 */
+ 11881 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11883 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11885 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11887 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11889 "00000000" // /* MW 15 */
+ 11890 "00000000" // /* MW 14 */
+ 11891 "01111000" // /* MW 13 */
+ 11892 "10100101" // /* MW 12 */
+ 11893 "00000001" // /* MW 11 */
+ 11894 "00000000" // /* MW 10 */
+ 11895 "00000000" // /* MW 9 */
+ 11896 "00000000" // /* MW 8 */
+ 11897 "01011011" // /* MW 7 */
+ 11898 "00000001" // /* MW 6 */
+ 11899 "00100000" // /* MW 5 */
+ 11900 "00000000" // /* MW 4 */
+ 11901 "11110000" // /* MW 3 */
+ 11902 "00101100" // /* MW 2 */
+ 11903 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_240
+.src_ref 10 "softfloat.c" 851 4 first
+ 11904 "10000100" // JNZ r20, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */
+ 11905 "00000001" // /* MW 5 */
+ 11906 "01000000" // /* MW 4 */
+ 11907 "11100000" // /* MW 3 */
+ 11908 "00010111" // /* MW 2 */
+ 11909 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11911 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11913 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11915 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11917 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11919 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+ 11920 "11111000" // MOV r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11921 "10100000" // /* MW 3 */
+ 11922 "00011101" // /* MW 2 */
+ 11923 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+ 11924 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11925 "10100000" // /* MW 3 */
+ 11926 "01010001" // /* MW 2 */
+ 11927 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4
+.src_ref 10 "softfloat.c" 855 14
+ 11928 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11929 "10100000" // /* MW 3 */
+ 11930 "11011100" // /* MW 2 */
+ 11931 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+ 11932 "11111000" // MOV r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11933 "00100000" // /* MW 3 */
+ 11934 "01010000" // /* MW 2 */
+ 11935 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 856 8 first
+ 11936 "00011000" // ADD r17, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11937 "11111111" // /* MW 3 */
+ 11938 "10100011" // /* MW 2 */
+ 11939 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4 first
+.src_ref 10 "softfloat.c" 855 14 first
+ 11940 "00011000" // SEL.EQZ r17, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11941 "00100010" // /* MW 3 */
+ 11942 "01100011" // /* MW 2 */
+ 11943 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11944 "10000100" // JZ r17, #12016 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12016 delay_slots=5 */
+ 11945 "00000001" // /* MW 5 */
+ 11946 "00000000" // /* MW 4 */
+ 11947 "01111000" // /* MW 3 */
+ 11948 "00010111" // /* MW 2 */
+ 11949 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4 first
+.src_ref 10 "softfloat.c" 855 14 first
+.delay_slot
+ 11950 "00011000" // SEL.EQZ r16, r16, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11951 "01000010" // /* MW 3 */
+ 11952 "00100000" // /* MW 2 */
+ 11953 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11957 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11959 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11961 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+.src_ref 10 "softfloat-macros" 50 48
+ 11962 "10111010" // MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11963 "01011000" // /* MW 9 */
+ 11964 "00011111" // /* MW 8 */
+ 11965 "10001000" // /* MW 7 */
+ 11966 "10001110" // /* MW 6 */
+ 11967 "00101000" // /* MW 5 */
+ 11968 "00110001" // /* MW 4 */
+ 11969 "00000000" // /* MW 3 */
+ 11970 "00000011" // /* MW 2 */
+ 11971 "00000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11972 "10011000" // AND r20, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11973 "01000100" // /* MW 3 */
+ 11974 "10101001" // /* MW 2 */
+ 11975 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11976 "10011000" // LSHL r20, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11977 "01001101" // /* MW 3 */
+ 11978 "00101001" // /* MW 2 */
+ 11979 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 11980 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11981 "00101101" // /* MW 3 */
+ 11982 "00100101" // /* MW 2 */
+ 11983 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11984 "10011000" // LT r27, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11985 "00111010" // /* MW 3 */
+ 11986 "01110110" // /* MW 2 */
+ 11987 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 11988 "00011000" // NEZ r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11989 "11110000" // /* MW 3 */
+ 11990 "00101000" // /* MW 2 */
+ 11991 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11992 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11993 "11110000" // /* MW 3 */
+ 11994 "00100000" // /* MW 2 */
+ 11995 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 11996 "10011000" // OR r17, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11997 "01000101" // /* MW 3 */
+ 11998 "10100011" // /* MW 2 */
+ 11999 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 12000 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12001 "00000000" // /* MW 15 */
+ 12002 "00000000" // /* MW 14 */
+ 12003 "01111000" // /* MW 13 */
+ 12004 "10100101" // /* MW 12 */
+ 12005 "00000001" // /* MW 11 */
+ 12006 "10010000" // /* MW 10 */
+ 12007 "00001000" // /* MW 9 */
+ 12008 "00100001" // /* MW 8 */
+ 12009 "01011011" // /* MW 7 */
+ 12010 "00000001" // /* MW 6 */
+ 12011 "00100000" // /* MW 5 */
+ 12012 "00000000" // /* MW 4 */
+ 12013 "11110000" // /* MW 3 */
+ 12014 "00101100" // /* MW 2 */
+ 12015 "00000000" // /* MW 1 */
+.label __ll1__ZL14subFloat32Sigsjji
+.src_ref 10 "softfloat.c" 864 16 first
+ 12016 "11100001" // NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12017 "00000000" // /* MW 15 */
+ 12018 "00000000" // /* MW 14 */
+ 12019 "01111000" // /* MW 13 */
+ 12020 "10100101" // /* MW 12 */
+ 12021 "00000001" // /* MW 11 */
+ 12022 "00001100" // /* MW 10 */
+ 12023 "00111000" // /* MW 9 */
+ 12024 "00100110" // /* MW 8 */
+ 12025 "01011011" // /* MW 7 */
+ 12026 "00000001" // /* MW 6 */
+ 12027 "00100000" // /* MW 5 */
+ 12028 "00000000" // /* MW 4 */
+ 12029 "11110000" // /* MW 3 */
+ 12030 "00101100" // /* MW 2 */
+ 12031 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_368
+.src_ref 10 "softfloat.c" 868 11 first
+.tail_call
+ 12032 "10000100" // J #10880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */
+ 12033 "00000000" // /* MW 5 */
+ 12034 "00000000" // /* MW 4 */
+ 12035 "01000000" // /* MW 3 */
+ 12036 "00010101" // /* MW 2 */
+ 12037 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4 first
+.delay_slot
+ 12038 "00011000" // ADD r2, r25, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12039 "11111111" // /* MW 3 */
+ 12040 "01000101" // /* MW 2 */
+ 12041 "00010110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12043 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12045 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12047 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12048 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12049 "00000000" // /* MW 15 */
+ 12050 "00000000" // /* MW 14 */
+ 12051 "01111000" // /* MW 13 */
+ 12052 "10100101" // /* MW 12 */
+ 12053 "00000001" // /* MW 11 */
+ 12054 "00000000" // /* MW 10 */
+ 12055 "00000000" // /* MW 9 */
+ 12056 "00000000" // /* MW 8 */
+ 12057 "01011011" // /* MW 7 */
+ 12058 "00000001" // /* MW 6 */
+ 12059 "00100000" // /* MW 5 */
+ 12060 "00000000" // /* MW 4 */
+ 12061 "11110000" // /* MW 3 */
+ 12062 "00101100" // /* MW 2 */
+ 12063 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_400
+.src_ref 10 "softfloat.c" 820 4 first
+.return_address
+ 12064 "10000100" // JNZ r20, #12256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12256 delay_slots=5 */
+ 12065 "00000001" // /* MW 5 */
+ 12066 "01000000" // /* MW 4 */
+ 12067 "11110000" // /* MW 3 */
+ 12068 "00010111" // /* MW 2 */
+ 12069 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12073 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12075 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12077 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12079 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 829 14 first
+ 12080 "10011000" // LTU r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12081 "00011100" // /* MW 3 */
+ 12082 "00100111" // /* MW 2 */
+ 12083 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 829 4
+ 12084 "10000100" // JNZ r19, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */
+ 12085 "00000001" // /* MW 5 */
+ 12086 "01000000" // /* MW 4 */
+ 12087 "00001000" // /* MW 3 */
+ 12088 "00011000" // /* MW 2 */
+ 12089 "10011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4 first
+.delay_slot
+ 12090 "00011000" // SEL.EQZ r24, r5, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12091 "10010010" // /* MW 3 */
+ 12092 "01110001" // /* MW 2 */
+ 12093 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.delay_slot
+ 12094 "11111000" // MOV r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12095 "10100000" // /* MW 3 */
+ 12096 "10011101" // /* MW 2 */
+ 12097 "00011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.delay_slot
+ 12098 "00011000" // SEL.EQZ r25, r5, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12099 "00100010" // /* MW 3 */
+ 12100 "01110011" // /* MW 2 */
+ 12101 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12103 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12105 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 830 14 first
+ 12106 "10011000" // LTU r18, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12107 "00001100" // /* MW 3 */
+ 12108 "01100101" // /* MW 2 */
+ 12109 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 830 4
+ 12110 "10000100" // JNZ r18, #12336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12336 delay_slots=5 */
+ 12111 "00000001" // /* MW 5 */
+ 12112 "01000000" // /* MW 4 */
+ 12113 "00011000" // /* MW 3 */
+ 12114 "00011000" // /* MW 2 */
+ 12115 "10010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12117 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12119 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12121 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12123 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12125 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31
+ 12126 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12127 "01111101" // /* MW 3 */
+ 12128 "00100000" // /* MW 2 */
+ 12129 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 24
+ 12130 "01000100" // MOVXM p0, #509172 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12131 "11101000" // /* MW 5 */
+ 12132 "11001001" // /* MW 4 */
+ 12133 "11000000" // /* MW 3 */
+ 12134 "00000111" // /* MW 2 */
+ 12135 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 24 first
+ 12136 "10011000" // LDA r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12137 "01010110" // /* MW 3 */
+ 12138 "00000110" // /* MW 2 */
+ 12139 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12141 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 12142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12143 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 4
+ 12144 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12145 "00000000" // /* MW 3 */
+ 12146 "00101000" // /* MW 2 */
+ 12147 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 44
+.delay_slot
+ 12148 "00011000" // MOVX r17, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12149 "00001101" // /* MW 3 */
+ 12150 "00100010" // /* MW 2 */
+ 12151 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12153 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12155 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 44
+.delay_slot
+ 12156 "10011000" // EQ r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12157 "00100111" // /* MW 3 */
+ 12158 "01100011" // /* MW 2 */
+ 12159 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 12160 "11100001" // NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12161 "00000000" // /* MW 15 */
+ 12162 "00000000" // /* MW 14 */
+ 12163 "01111000" // /* MW 13 */
+ 12164 "10100101" // /* MW 12 */
+ 12165 "00000001" // /* MW 11 */
+ 12166 "01101100" // /* MW 10 */
+ 12167 "00001000" // /* MW 9 */
+ 12168 "00100010" // /* MW 8 */
+ 12169 "01011011" // /* MW 7 */
+ 12170 "00000001" // /* MW 6 */
+ 12171 "00100000" // /* MW 5 */
+ 12172 "00000000" // /* MW 4 */
+ 12173 "11110000" // /* MW 3 */
+ 12174 "00101100" // /* MW 2 */
+ 12175 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_512
+.src_ref 10 "softfloat.c" 834 8 first
+ 12176 "10000100" // JNZ r16, #12368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12368 delay_slots=5 */
+ 12177 "00000001" // /* MW 5 */
+ 12178 "01000000" // /* MW 4 */
+ 12179 "00101000" // /* MW 3 */
+ 12180 "00011000" // /* MW 2 */
+ 12181 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12183 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12185 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12187 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12189 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12191 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31
+ 12192 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12193 "01111101" // /* MW 3 */
+ 12194 "00100000" // /* MW 2 */
+ 12195 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 835 8 first
+ 12196 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12197 "00000000" // /* MW 3 */
+ 12198 "00101000" // /* MW 2 */
+ 12199 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 12200 "10011000" // LSHL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12201 "00001101" // /* MW 3 */
+ 12202 "11100001" // /* MW 2 */
+ 12203 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 12204 "01000100" // MOVXM r17, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12205 "00000000" // /* MW 5 */
+ 12206 "10100000" // /* MW 4 */
+ 12207 "00001000" // /* MW 3 */
+ 12208 "10000000" // /* MW 2 */
+ 12209 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 12210 "10011000" // ADD r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12211 "00000000" // /* MW 3 */
+ 12212 "01000001" // /* MW 2 */
+ 12213 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12215 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12216 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 12217 "00011100" // /* MW 7 */
+ 12218 "00000000" // /* MW 6 */
+ 12219 "00000000" // /* MW 5 */
+ 12220 "00000100" // /* MW 4 */
+ 12221 "11110000" // /* MW 3 */
+ 12222 "00101100" // /* MW 2 */
+ 12223 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_560
+.src_ref 10 "softfloat.c" 852 8 first
+ 12224 "10000100" // JNZ r17, #12384 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12384 delay_slots=5 */
+ 12225 "00000001" // /* MW 5 */
+ 12226 "01000000" // /* MW 4 */
+ 12227 "00110000" // /* MW 3 */
+ 12228 "00011000" // /* MW 2 */
+ 12229 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12233 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12235 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12237 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12239 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 853 8 first
+ 12240 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12241 "00000000" // /* MW 3 */
+ 12242 "00101000" // /* MW 2 */
+ 12243 "00010000" // /* MW 1 */
+.delay_slot
+ 12244 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12245 "10100000" // /* MW 3 */
+ 12246 "00010000" // /* MW 2 */
+ 12247 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12249 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12251 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12255 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_592
+.src_ref 10 "softfloat.c" 821 18 first
+ 12256 "10011000" // OR r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12257 "00000101" // /* MW 3 */
+ 12258 "01100001" // /* MW 2 */
+ 12259 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 821 8
+ 12260 "10000100" // JNZ r16, #12400 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12400 delay_slots=5 */
+ 12261 "00000001" // /* MW 5 */
+ 12262 "01000000" // /* MW 4 */
+ 12263 "00111000" // /* MW 3 */
+ 12264 "00011000" // /* MW 2 */
+ 12265 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12267 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12269 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12271 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12273 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12275 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 823 8 first
+ 12276 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12277 "00000000" // /* MW 3 */
+ 12278 "00101000" // /* MW 2 */
+ 12279 "00010000" // /* MW 1 */
+.delay_slot
+ 12280 "01000100" // MOVXM r0, #2147483647 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12281 "11111110" // /* MW 5 */
+ 12282 "00111111" // /* MW 4 */
+ 12283 "11110000" // /* MW 3 */
+ 12284 "11111111" // /* MW 2 */
+ 12285 "01111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12287 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12289 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12292 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12293 "10000001" // /* MW 11 */
+ 12294 "10101101" // /* MW 10 */
+ 12295 "00000000" // /* MW 9 */
+ 12296 "00000000" // /* MW 8 */
+ 12297 "00000000" // /* MW 7 */
+ 12298 "00000000" // /* MW 6 */
+ 12299 "00100000" // /* MW 5 */
+ 12300 "00000000" // /* MW 4 */
+ 12301 "11110000" // /* MW 3 */
+ 12302 "00101100" // /* MW 2 */
+ 12303 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_640
+ 12304 "10000100" // J #12016 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12016 delay_slots=5 */
+ 12305 "00000000" // /* MW 5 */
+ 12306 "00000000" // /* MW 4 */
+ 12307 "01111000" // /* MW 3 */
+ 12308 "00010111" // /* MW 2 */
+ 12309 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+.delay_slot
+ 12310 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12311 "10100000" // /* MW 3 */
+ 12312 "01010001" // /* MW 2 */
+ 12313 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 864 16
+.delay_slot
+ 12314 "11111000" // MOV r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12315 "10100000" // /* MW 3 */
+ 12316 "11011000" // /* MW 2 */
+ 12317 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12319 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12321 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12322 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12323 "00011100" // /* MW 13 */
+ 12324 "00000000" // /* MW 12 */
+ 12325 "00000000" // /* MW 11 */
+ 12326 "01010111" // /* MW 10 */
+ 12327 "00011010" // /* MW 9 */
+ 12328 "01000000" // /* MW 8 */
+ 12329 "00000000" // /* MW 7 */
+ 12330 "00000000" // /* MW 6 */
+ 12331 "10110110" // /* MW 5 */
+ 12332 "00000010" // /* MW 4 */
+ 12333 "11110000" // /* MW 3 */
+ 12334 "00101100" // /* MW 2 */
+ 12335 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_672
+ 12336 "10000100" // J #11872 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11872 delay_slots=5 */
+ 12337 "00000000" // /* MW 5 */
+ 12338 "00000000" // /* MW 4 */
+ 12339 "00110000" // /* MW 3 */
+ 12340 "00010111" // /* MW 2 */
+ 12341 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 846 16
+.delay_slot
+ 12342 "11111000" // MOV r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12343 "00100000" // /* MW 3 */
+ 12344 "00011000" // /* MW 2 */
+ 12345 "00011001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+.delay_slot
+ 12346 "11111000" // MOV r25, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12347 "00100000" // /* MW 3 */
+ 12348 "01011100" // /* MW 2 */
+ 12349 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+.delay_slot
+ 12350 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12351 "10100000" // /* MW 3 */
+ 12352 "01010011" // /* MW 2 */
+ 12353 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12355 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12356 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12357 "10000001" // /* MW 11 */
+ 12358 "10101101" // /* MW 10 */
+ 12359 "00000000" // /* MW 9 */
+ 12360 "00000000" // /* MW 8 */
+ 12361 "00000000" // /* MW 7 */
+ 12362 "00000000" // /* MW 6 */
+ 12363 "00100000" // /* MW 5 */
+ 12364 "00000000" // /* MW 4 */
+ 12365 "11110000" // /* MW 3 */
+ 12366 "00101100" // /* MW 2 */
+ 12367 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_704
+.src_ref 10 "softfloat.c" 834 27 first
+.tail_call
+ 12368 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12369 "00000000" // /* MW 5 */
+ 12370 "00000000" // /* MW 4 */
+ 12371 "01000000" // /* MW 3 */
+ 12372 "00010100" // /* MW 2 */
+ 12373 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12375 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12377 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12379 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12383 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_720
+.src_ref 10 "softfloat.c" 852 27 first
+.tail_call
+.return_address
+ 12384 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12385 "00000000" // /* MW 5 */
+ 12386 "00000000" // /* MW 4 */
+ 12387 "01000000" // /* MW 3 */
+ 12388 "00010100" // /* MW 2 */
+ 12389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12391 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12395 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12397 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12399 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_736
+.src_ref 10 "softfloat.c" 821 34 first
+.tail_call
+.return_address
+ 12400 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12401 "00000000" // /* MW 5 */
+ 12402 "00000000" // /* MW 4 */
+ 12403 "01000000" // /* MW 3 */
+ 12404 "00010100" // /* MW 2 */
+ 12405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL14subFloat32Sigsjji__end
+ 12415 "00000000" // /* MW 1 */
+.label float32_add
+.function float32_add float32_add
+.src_ref 10 "softfloat.c" 92 12
+.src_ref 10 "softfloat.c" 878 first
+.function_start
+ 12416 "00011000" // MOVX r16, #-31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12417 "10000101" // /* MW 3 */
+ 12418 "11100000" // /* MW 2 */
+ 12419 "00010111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 92 12 first
+ 12420 "10011000" // LSHL r3, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12421 "00001101" // /* MW 3 */
+ 12422 "01000111" // /* MW 2 */
+ 12423 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 92 12
+ 12424 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12425 "00001101" // /* MW 3 */
+ 12426 "10100001" // /* MW 2 */
+ 12427 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 884 15 first
+ 12428 "10011000" // EQ r16, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12429 "00000111" // /* MW 3 */
+ 12430 "11100001" // /* MW 2 */
+ 12431 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 884 4
+ 12432 "10000100" // JNZ r16, #12464 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12464 delay_slots=5 */
+ 12433 "00000001" // /* MW 5 */
+ 12434 "01000000" // /* MW 4 */
+ 12435 "01011000" // /* MW 3 */
+ 12436 "00011000" // /* MW 2 */
+ 12437 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12447 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 888 15 first
+.tail_call
+ 12448 "10000100" // J #11664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11664 delay_slots=5 */
+ 12449 "00000000" // /* MW 5 */
+ 12450 "00000000" // /* MW 4 */
+ 12451 "11001000" // /* MW 3 */
+ 12452 "00010110" // /* MW 2 */
+ 12453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12455 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12457 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12459 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12461 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12463 "00000000" // /* MW 1 */
+.label TGT_Ffloat32_add_48
+.src_ref 10 "softfloat.c" 885 15 first
+.tail_call
+.return_address
+ 12464 "10000100" // J #11040 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */
+ 12465 "00000000" // /* MW 5 */
+ 12466 "00000000" // /* MW 4 */
+ 12467 "10010000" // /* MW 3 */
+ 12468 "00010101" // /* MW 2 */
+ 12469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12475 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label float32_add__end
+ 12479 "00000000" // /* MW 1 */
+.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src"
+.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer"
+.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common"
+.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc"
+.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail"
+.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2"
+.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib"
+.dir 7 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p"
+.dir 8 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend"
+.dir 9 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib"
+.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/softfloat"
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d1d5946a6747db932adeab9e7d141d4fd318d32
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/Release/3_3_reloadable12.txt
@@ -0,0 +1,2975 @@
+Contents of the .debug_line section:
+
+sigmoid_carf_templated_lut.h:
+File name Line number Starting address View Stmt
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 218 0xa10 x
+reduce_base_c8.h 220 0xa10 1 x
+reduce_base_c8.h 290 0xa10 2
+reduce_base_c8.h 348 0xa10 3
+reduce_base_c8.h 287 0xa1c
+reduce_base_c8.h 287 0xa1c 1
+reduce_base_c8.h 348 0xa1c 2 x
+reduce_base_c8.h 236 0xa26
+reduce_base_c8.h 293 0xa26 1
+reduce_base_c8.h 298 0xa26 2
+reduce_base_c8.h 299 0xa26 3
+reduce_base_c8.h 300 0xa26 4
+reduce_base_c8.h 326 0xa26 5
+reduce_base_c8.h 276 0xa30
+reduce_base_c8.h 301 0xa30 1
+reduce_base_c8.h 305 0xa30 2
+reduce_base_c8.h 218 0xa3a
+reduce_base_c8.h 280 0xa3a 1
+reduce_base_c8.h 312 0xa3a 2
+reduce_base_c8.h 298 0xa44 x
+reduce_base_c8.h 220 0xa4a x
+reduce_base_c8.h 221 0xa4e x
+reduce_base_c8.h 221 0xa5e
+reduce_base_c8.h 301 0xa5e 1 x
+reduce_base_c8.h 222 0xa64 x
+reduce_base_c8.h 293 0xa64 1 x
+reduce_base_c8.h 301 0xa64 2
+reduce_base_c8.h 290 0xa6e x
+reduce_base_c8.h 293 0xa72 x
+reduce_base_c8.h 290 0xa76 x
+reduce_base_c8.h 300 0xa76 1 x
+reduce_base_c8.h 222 0xa82 x
+reduce_base_c8.h 287 0xa82 1 x
+reduce_base_c8.h 223 0xa88 x
+reduce_base_c8.h 312 0xa88 1 x
+reduce_base_c8.h 305 0xa92 x
+reduce_base_c8.h 312 0xa96 x
+reduce_base_c8.h 299 0xa9a x
+reduce_base_c8.h 276 0xa9e x
+reduce_base_c8.h 299 0xa9e 1
+reduce_base_c8.h 276 0xaa4
+reduce_base_c8.h 301 0xaa8 x
+reduce_base_c8.h 223 0xaac x
+reduce_base_c8.h 236 0xaac 1 x
+reduce_base_c8.h 224 0xab2 x
+reduce_base_c8.h 224 0xac2
+reduce_base_c8.h 318 0xac2 1
+reduce_base_c8.h 225 0xaca x
+reduce_base_c8.h 225 0xada
+reduce_base_c8.h 318 0xada 1
+reduce_base_c8.h 226 0xae2 x
+reduce_base_c8.h 236 0xae8 x
+reduce_base_c8.h 312 0xaee x
+reduce_base_c8.h 318 0xaf2 x
+reduce_base_c8.h 300 0xaf6 x
+reduce_base_c8.h 305 0xaf6 1 x
+reduce_base_c8.h 280 0xafc x
+reduce_base_c8.h 226 0xb00 x
+reduce_base_c8.h 318 0xb00 1 x
+reduce_base_c8.h 236 0xb06
+reduce_base_c8.h 236 0xb0a x
+reduce_base_c8.h 236 0xb0e
+reduce_base_c8.h 242 0xb1c x
+reduce_base_c8.h 236 0xb20
+reduce_base_c8.h 236 0xb24 x
+reduce_base_c8.h 236 0xb28
+reduce_base_c8.h 236 0xb36
+reduce_base_c8.h 236 0xb3a
+reduce_base_c8.h 236 0xb3e
+reduce_base_c8.h 329 0xb54
+reduce_base_c8.h 236 0xb60
+reduce_base_c8.h 236 0xb64
+reduce_base_c8.h 236 0xb68
+reduce_base_c8.h 236 0xb76
+reduce_base_c8.h 316 0xb76 1
+reduce_base_c8.h 329 0xb76 2
+reduce_base_c8.h 236 0xb7a
+reduce_base_c8.h 236 0xb7e
+reduce_base_c8.h 236 0xb8e
+reduce_base_c8.h 236 0xb92
+reduce_base_c8.h 286 0xba2 x
+reduce_base_c8.h 289 0xba2 1
+reduce_base_c8.h 291 0xba2 2
+reduce_base_c8.h 291 0xba2 3
+reduce_base_c8.h 287 0xbba x
+reduce_base_c8.h 288 0xbca x
+reduce_base_c8.h 289 0xbda x
+reduce_base_c8.h 290 0xbea x
+reduce_base_c8.h 291 0xbfa x
+reduce_base_c8.h 292 0xc0e x
+reduce_base_c8.h 293 0xc12 x
+reduce_base_c8.h 274 0xc20 x
+reduce_base_c8.h 275 0xc20 1
+reduce_base_c8.h 275 0xc20 2
+reduce_base_c8.h 275 0xc2a x
+reduce_base_c8.h 279 0xc2a 1
+reduce_base_c8.h 275 0xc3e
+reduce_base_c8.h 276 0xc4e x
+reduce_base_c8.h 275 0xc5e x
+reduce_base_c8.h 277 0xc5e 1 x
+reduce_base_c8.h 278 0xc6e x
+reduce_base_c8.h 279 0xc7e x
+reduce_base_c8.h 279 0xc8c
+reduce_base_c8.h 281 0xc94 x
+reduce_base_c8.h 280 0xc98 x
+reduce_base_c8.h 236 0xca0
+reduce_base_c8.h 301 0xca0 1
+reduce_base_c8.h 302 0xca0 2
+reduce_base_c8.h 236 0xca6 x
+reduce_base_c8.h 236 0xcaa
+reduce_base_c8.h 298 0xcb0
+reduce_base_c8.h 303 0xcb0 1
+reduce_base_c8.h 310 0xcb0 2
+reduce_base_c8.h 311 0xcb0 3
+reduce_base_c8.h 236 0xcbc
+reduce_base_c8.h 236 0xcc0
+reduce_base_c8.h 236 0xcc4
+reduce_base_c8.h 310 0xcd4 x
+reduce_base_c8.h 312 0xcd4 1 x
+reduce_base_c8.h 315 0xcd4 2
+reduce_base_c8.h 313 0xcde
+reduce_base_c8.h 317 0xcde 1
+reduce_base_c8.h 315 0xce8
+reduce_base_c8.h 317 0xce8 1 x
+reduce_base_c8.h 311 0xcf6 x
+reduce_base_c8.h 312 0xd06 x
+reduce_base_c8.h 313 0xd16 x
+reduce_base_c8.h 315 0xd1a x
+reduce_base_c8.h 316 0xd2a x
+reduce_base_c8.h 317 0xd2e x
+reduce_base_c8.h 298 0xd50 x
+reduce_base_c8.h 301 0xd50 1
+reduce_base_c8.h 301 0xd50 2 x
+reduce_base_c8.h 302 0xd5a
+reduce_base_c8.h 303 0xd5a 1
+reduce_base_c8.h 306 0xd5a 2
+reduce_base_c8.h 302 0xd64 x
+reduce_base_c8.h 302 0xd68
+reduce_base_c8.h 306 0xd68 1 x
+reduce_base_c8.h 299 0xd74 x
+reduce_base_c8.h 300 0xd84 x
+reduce_base_c8.h 301 0xd94 x
+reduce_base_c8.h 302 0xda4 x
+reduce_base_c8.h 303 0xdb4 x
+reduce_base_c8.h 304 0xdc4 x
+reduce_base_c8.h 305 0xdc8 x
+reduce_base_c8.h 326 0xde0 x
+reduce_base_c8.h 329 0xde0 1
+reduce_base_c8.h 329 0xde6
+reduce_base_c8.h 330 0xde6 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 139 0xde6 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 331 0xdf0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 140 0xdf0 1 x
+reduce_mean_c8_impl.h 141 0xdf6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 326 0xdfe x
+reduce_base_c8.h 327 0xe0e x
+reduce_base_c8.h 327 0xe1a
+reduce_base_c8.h 328 0xe1a 1
+reduce_base_c8.h 328 0xe20 x
+reduce_base_c8.h 329 0xe24 x
+reduce_base_c8.h 329 0xe32
+reduce_base_c8.h 329 0xe36
+reduce_base_c8.h 330 0xe36 1
+reduce_base_c8.h 329 0xe3c
+reduce_base_c8.h 330 0xe48 x
+reduce_base_c8.h 331 0xe58 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 139 0xe68 x
+reduce_mean_c8_impl.h 140 0xe78 x
+reduce_mean_c8_impl.h 141 0xe88 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 349 0xe8c x
+reduce_base_c8.h 349 0xe90
+reduce_base_c8.h 262 0xea0 x
+reduce_base_c8.h 263 0xea0 1
+reduce_base_c8.h 263 0xeaa
+reduce_base_c8.h 263 0xeaa 1 x
+reduce_base_c8.h 267 0xeaa 2
+reduce_base_c8.h 265 0xeb4
+reduce_base_c8.h 329 0xeb4 1
+reduce_base_c8.h 265 0xebe x
+reduce_base_c8.h 265 0xec2
+reduce_base_c8.h 267 0xec2 1 x
+reduce_base_c8.h 265 0xec6 x
+reduce_base_c8.h 265 0xec6 1 x
+reduce_base_c8.h 263 0xecc x
+reduce_base_c8.h 263 0xed0
+reduce_base_c8.h 264 0xede x
+reduce_base_c8.h 265 0xeee x
+reduce_base_c8.h 266 0xefe x
+reduce_base_c8.h 267 0xf0e x
+reduce_base_c8.h 267 0xf1c
+reduce_base_c8.h 267 0xf20
+reduce_base_c8.h 270 0xf24
+reduce_base_c8.h 268 0xf28 x
+reduce_base_c8.h 269 0xf30 x
+reduce_base_c8.h 270 0xf30 1 x
+reduce_base_c8.h 250 0xf40
+reduce_base_c8.h 250 0xf40 1 x
+reduce_base_c8.h 255 0xf40 2
+reduce_base_c8.h 255 0xf4a
+reduce_base_c8.h 255 0xf4a 1
+reduce_base_c8.h 255 0xf4a 2
+reduce_base_c8.h 255 0xf4a 3 x
+reduce_base_c8.h 255 0xf54
+reduce_base_c8.h 255 0xf54 1
+reduce_base_c8.h 329 0xf54 2
+reduce_base_c8.h 251 0xf62 x
+reduce_base_c8.h 252 0xf72 x
+reduce_base_c8.h 253 0xf82 x
+reduce_base_c8.h 254 0xf92 x
+reduce_base_c8.h 255 0xfa2 x
+reduce_base_c8.h 255 0xfb0
+reduce_base_c8.h 255 0xfb0 1
+reduce_base_c8.h 256 0xfb8 x
+reduce_base_c8.h 257 0xfbc x
+reduce_base_c8.h 238 0xfc0 x
+reduce_base_c8.h 239 0xfd0 x
+reduce_base_c8.h 240 0xfe0 x
+reduce_base_c8.h 241 0xfea
+reduce_base_c8.h 241 0xfea 1
+reduce_base_c8.h 241 0xff2 x
+reduce_base_c8.h 241 0xff8
+reduce_base_c8.h 241 0xffe
+reduce_base_c8.h 241 0x1002
+reduce_base_c8.h 241 0x1002 1
+reduce_base_c8.h 241 0x1002 2
+reduce_base_c8.h 241 0x1002 3
+reduce_base_c8.h 242 0x100c x
+reduce_base_c8.h 243 0x101a
+reduce_base_c8.h 243 0x101e x
+reduce_base_c8.h 243 0x102c
+reduce_base_c8.h 243 0x102c 1
+reduce_base_c8.h 243 0x102c 2
+reduce_base_c8.h 243 0x102c 3
+reduce_base_c8.h 244 0x1036 x
+reduce_base_c8.h 245 0x103a x
+reduce_base_c8.h 329 0x103a 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 266 0x1050 x
+pad_3d.h 465 0x1050 1 x
+pad_3d.h 468 0x1050 2 x
+pad_3d.h 471 0x1050 3
+pad_3d.h 479 0x1050 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp:
+array_helpers.hpp 950 0x105a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 469 0x105a 1 x
+pad_3d.h 478 0x105a 2
+pad_3d.h 499 0x105a 3
+pad_3d.h 511 0x105a 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp:
+array_helpers.hpp 950 0x1064
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 470 0x1064 1 x
+pad_3d.h 486 0x1064 2
+pad_3d.h 498 0x1064 3
+pad_3d.h 499 0x1064 4
+pad_3d.h 509 0x1064 5
+pad_3d.h 517 0x1064 6
+pad_3d.h 471 0x106e x
+pad_3d.h 472 0x1072 x
+pad_3d.h 473 0x1076 x
+pad_3d.h 475 0x107a x
+pad_3d.h 479 0x107e x
+pad_3d.h 477 0x1082 x
+pad_3d.h 478 0x1086 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x108a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 485 0x1090 x
+pad_3d.h 485 0x1094
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp:
+array_helpers.hpp 998 0x1098 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 486 0x109c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp:
+array_helpers.hpp 950 0x10a0 x
+array_helpers.hpp 950 0x10a4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 486 0x10a4 1 x
+pad_3d.h 486 0x10aa
+pad_3d.h 487 0x10b0
+pad_3d.h 486 0x10bc
+pad_3d.h 486 0x10c2
+pad_3d.h 486 0x10c8
+pad_3d.h 487 0x1130 x
+pad_3d.h 495 0x1140
+pad_3d.h 495 0x1140 1 x
+pad_3d.h 498 0x1140 2
+pad_3d.h 499 0x1140 3 x
+pad_3d.h 495 0x114a
+pad_3d.h 496 0x114a 1 x
+pad_3d.h 495 0x1150 x
+pad_3d.h 495 0x1154
+pad_3d.h 498 0x1154 1 x
+pad_3d.h 499 0x115a x
+pad_3d.h 498 0x115e x
+pad_3d.h 498 0x1162
+pad_3d.h 499 0x1162 1 x
+pad_3d.h 499 0x1168
+pad_3d.h 499 0x116c
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x117c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x117c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 499 0x117c 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x1186
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1186 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 499 0x1186 2
+pad_3d.h 499 0x1190
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x1200 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1200 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 514 0x1210
+pad_3d.h 514 0x1216 x
+pad_3d.h 514 0x121a
+pad_3d.h 514 0x121e
+pad_3d.h 511 0x1222 x
+pad_3d.h 509 0x1226 x
+pad_3d.h 515 0x122a x
+pad_3d.h 509 0x122e x
+pad_3d.h 509 0x1232
+pad_3d.h 514 0x1232 1
+pad_3d.h 517 0x1232 2 x
+pad_3d.h 509 0x1238 x
+pad_3d.h 509 0x123c
+pad_3d.h 517 0x123c 1 x
+pad_3d.h 517 0x1242
+pad_3d.h 514 0x124c x
+pad_3d.h 514 0x1250
+pad_3d.h 515 0x1254 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x1258
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1258 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 517 0x1258 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x1262
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1262 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 517 0x1262 2
+pad_3d.h 517 0x126c
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x12d0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x12d0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 282 0x12e0 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 352 0x12f0
+reduce_base_c8.h 362 0x12f0 1 x
+reduce_base_c8.h 365 0x12f0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 200 0x12f0 3
+reduce_mean_c8_impl.h 223 0x12f0 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 365 0x12f4 x
+reduce_base_c8.h 367 0x12fc x
+reduce_base_c8.h 367 0x130c
+reduce_base_c8.h 367 0x130c 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 101 0x1312
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 80 0x1312 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1312 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 372 0x1312 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 80 0x1316 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 362 0x131e
+reduce_base_c8.h 372 0x1324
+reduce_base_c8.h 372 0x1328 x
+reduce_base_c8.h 372 0x1338
+reduce_base_c8.h 372 0x133c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1342
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 374 0x1342 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x134e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 372 0x134e 1
+reduce_base_c8.h 374 0x134e 2
+reduce_base_c8.h 372 0x135a
+reduce_base_c8.h 372 0x1360
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x13d0 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 374 0x13d0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x13e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 388 0x13e0 1
+reduce_base_c8.h 412 0x13e0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x13e0 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 388 0x13e4 x
+reduce_base_c8.h 388 0x13e8
+reduce_base_c8.h 388 0x13e8 1
+reduce_base_c8.h 388 0x13ee
+reduce_base_c8.h 570 0x13ee 1
+reduce_base_c8.h 570 0x13ee 2
+reduce_base_c8.h 570 0x13ee 3
+reduce_base_c8.h 570 0x13f4 x
+reduce_base_c8.h 594 0x13f4 1
+reduce_base_c8.h 570 0x13fa
+reduce_base_c8.h 594 0x13fa 1 x
+reduce_base_c8.h 594 0x1400
+reduce_base_c8.h 594 0x1404
+reduce_base_c8.h 388 0x1408
+reduce_base_c8.h 595 0x1408 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x140e
+aie_core.h 73 0x140e 1
+aie_core.h 90 0x140e 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x140e 3
+vector.hpp 1139 0x140e 4
+vector.hpp 1159 0x140e 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x140e 6
+accum.hpp 198 0x140e 7
+accum.hpp 198 0x140e 8
+accum.hpp 943 0x140e 9
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 388 0x140e 10
+reduce_base_c8.h 596 0x140e 11 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x1418
+aie_core.h 90 0x1418 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1418 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1418 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 388 0x1418 4 x
+reduce_base_c8.h 570 0x1418 5
+reduce_base_c8.h 570 0x1418 6
+reduce_base_c8.h 570 0x1418 7
+reduce_base_c8.h 570 0x1418 8 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x1424
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 109 0x1424 1
+me_vmult_float_emulated.h 109 0x1424 2
+me_vmult_float_emulated.h 111 0x1424 3
+me_vmult_float_emulated.h 111 0x1424 4
+me_vmult_float_emulated.h 113 0x1424 5
+me_vmult_float_emulated.h 113 0x1424 6
+me_vmult_float_emulated.h 115 0x1424 7
+me_vmult_float_emulated.h 115 0x1424 8
+me_vmult_float_emulated.h 117 0x1424 9
+me_vmult_float_emulated.h 117 0x1424 10
+me_vmult_float_emulated.h 118 0x1424 11
+me_vmult_float_emulated.h 118 0x1424 12
+me_vmult_float_emulated.h 118 0x1424 13
+me_vmult_float_emulated.h 118 0x1424 14
+me_vmult_float_emulated.h 119 0x1424 15
+me_vmult_float_emulated.h 119 0x1424 16
+me_vmult_float_emulated.h 119 0x1424 17
+me_vmult_float_emulated.h 119 0x1424 18
+me_vmult_float_emulated.h 120 0x1424 19
+me_vmult_float_emulated.h 120 0x1424 20
+me_vmult_float_emulated.h 120 0x1424 21
+me_vmult_float_emulated.h 120 0x1424 22
+me_vmult_float_emulated.h 121 0x1424 23
+me_vmult_float_emulated.h 121 0x1424 24
+me_vmult_float_emulated.h 121 0x1424 25
+me_vmult_float_emulated.h 121 0x1424 26
+me_vmult_float_emulated.h 122 0x1424 27
+me_vmult_float_emulated.h 122 0x1424 28
+me_vmult_float_emulated.h 122 0x1424 29
+me_vmult_float_emulated.h 122 0x1424 30
+me_vmult_float_emulated.h 123 0x1424 31
+me_vmult_float_emulated.h 123 0x1424 32
+me_vmult_float_emulated.h 123 0x1424 33
+me_vmult_float_emulated.h 123 0x1424 34
+me_vmult_float_emulated.h 124 0x1424 35
+me_vmult_float_emulated.h 124 0x1424 36
+me_vmult_float_emulated.h 124 0x1424 37
+me_vmult_float_emulated.h 124 0x1424 38
+me_vmult_float_emulated.h 125 0x1424 39
+me_vmult_float_emulated.h 125 0x1424 40
+me_vmult_float_emulated.h 125 0x1424 41
+me_vmult_float_emulated.h 125 0x1424 42
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1424 43
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1424 44
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp:
+add.hpp 28 0x1424 45
+add.hpp 28 0x1424 46
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1424 47
+add_reduce.hpp 324 0x1424 48
+add_reduce.hpp 324 0x1424 49
+add_reduce.hpp 324 0x1424 50
+add_reduce.hpp 324 0x1424 51
+add_reduce.hpp 324 0x1424 52
+add_reduce.hpp 324 0x1424 53
+add_reduce.hpp 324 0x1424 54
+add_reduce.hpp 324 0x1424 55
+add_reduce.hpp 324 0x1424 56
+add_reduce.hpp 324 0x1424 57
+add_reduce.hpp 324 0x1424 58
+add_reduce.hpp 324 0x1424 59
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1424 60
+add_accum.hpp 19 0x1424 61
+add_accum.hpp 19 0x1424 62
+add_accum.hpp 19 0x1424 63
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 388 0x1424 64
+reduce_base_c8.h 595 0x1424 65 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x1430
+aie_core.h 73 0x1430 1
+aie_core.h 73 0x1430 2
+aie_core.h 73 0x1430 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1430 4
+vector.hpp 1139 0x1430 5
+vector.hpp 1139 0x1430 6
+vector.hpp 1159 0x1430 7
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x1430 8
+accum.hpp 198 0x1430 9
+accum.hpp 198 0x1430 10
+accum.hpp 198 0x1430 11
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1430 12 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 200 0x1430 13
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x143c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x143c 1
+vector.hpp 1139 0x143c 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x143c 3
+accum.hpp 198 0x143c 4 x
+accum.hpp 943 0x143c 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x143c 6
+reduce_base_c8.h 570 0x143c 7
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x1446 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1446 1
+vector.hpp 1139 0x1446 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1446 3
+accum.hpp 198 0x1446 4
+accum.hpp 943 0x1446 5
+accum.hpp 943 0x1446 6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1446 7
+reduce_base_c8.h 570 0x1446 8 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x1450
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 391 0x1450 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x1456
+aie_core.h 90 0x1456 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1456 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1456 3
+accum.hpp 943 0x1456 4
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x145c
+aie_core.h 90 0x145c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x145c 2
+vector.hpp 1139 0x145c 3
+vector.hpp 1139 0x145c 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x145c 5
+accum.hpp 198 0x145c 6
+accum.hpp 198 0x145c 7 x
+accum.hpp 943 0x145c 8
+accum.hpp 943 0x145c 9
+accum.hpp 943 0x145c 10 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x145c 11 x
+reduce_base_c8.h 570 0x145c 12 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x1468
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1468 1
+vector.hpp 1139 0x1468 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1468 3
+accum.hpp 198 0x1468 4
+accum.hpp 943 0x1468 5
+accum.hpp 943 0x1468 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1468 7 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1468 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x1472
+aie_core.h 90 0x1472 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1472 2
+vector.hpp 1139 0x1472 3
+vector.hpp 1139 0x1472 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1472 5
+accum.hpp 198 0x1472 6
+accum.hpp 198 0x1472 7 x
+accum.hpp 943 0x1472 8
+accum.hpp 943 0x1472 9
+accum.hpp 943 0x1472 10 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1472 11 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x147c
+aie_core.h 90 0x147c 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x147c 2
+vector.hpp 1159 0x147c 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x147c 4
+accum.hpp 198 0x147c 5
+accum.hpp 943 0x147c 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1482 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1482 1 x
+accum.hpp 943 0x1482 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1482 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1482 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x148a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x148a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x148a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1490 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 391 0x1490 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x149a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x149a 1 x
+accum.hpp 943 0x149a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 412 0x149a 3
+reduce_base_c8.h 570 0x149a 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x14a0
+aie_core.h 73 0x14a0 1
+aie_core.h 73 0x14a0 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x14a0 3
+vector.hpp 1159 0x14a0 4
+vector.hpp 1159 0x14a0 5
+vector.hpp 1285 0x14a0 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x14a0 7
+accum.hpp 153 0x14a0 8
+accum.hpp 153 0x14a0 9
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x14a0 10
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x14a0 11 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x14b0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14b0 1 x
+vector.hpp 1159 0x14b0 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x14b0 3
+accum.hpp 198 0x14b0 4 x
+accum.hpp 943 0x14b0 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x14b0 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 391 0x14b0 7 x
+reduce_base_c8.h 570 0x14b0 8 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x14c0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14c0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x14c0 2
+accum.hpp 943 0x14c0 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x14c4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14c4 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x14c4 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x14c4 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14d0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x14d0 1
+accum.hpp 943 0x14d0 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x14d0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x14f0 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x1500 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1500 1 x
+vector.hpp 1159 0x1500 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x1500 3 x
+accum.hpp 198 0x1500 4 x
+accum.hpp 943 0x1500 5 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1500 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1510 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1520 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1520 1 x
+accum.hpp 943 0x1520 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1520 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1520 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 107 0x1530
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 412 0x1530 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x153a x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 107 0x153a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x153a 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x153a 3 x
+accum.hpp 943 0x153a 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x153a 5 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 101 0x1544 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x154a x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x154e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x154e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x154e 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1554 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 412 0x1554 1 x
+reduce_base_c8.h 412 0x155c
+reduce_base_c8.h 412 0x1560
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x156c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x156c 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x156c 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 80 0x1572 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 184 0x1572 1 x
+reduce_mean_c8_impl.h 184 0x1572 2
+reduce_mean_c8_impl.h 184 0x1584
+reduce_mean_c8_impl.h 184 0x1588
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x158e
+add_reduce.hpp 322 0x158e 1
+add_reduce.hpp 322 0x158e 2
+add_reduce.hpp 322 0x158e 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 184 0x158e 4
+reduce_mean_c8_impl.h 184 0x159a
+reduce_mean_c8_impl.h 184 0x159e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x15ae
+blend.hpp 170 0x15b4
+blend.hpp 163 0x15ba
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 109 0x15c0
+me_vmult_float_emulated.h 111 0x15c0 1
+me_vmult_float_emulated.h 113 0x15c0 2
+me_vmult_float_emulated.h 115 0x15c0 3
+me_vmult_float_emulated.h 117 0x15c0 4
+me_vmult_float_emulated.h 118 0x15c0 5
+me_vmult_float_emulated.h 118 0x15c0 6
+me_vmult_float_emulated.h 119 0x15c0 7
+me_vmult_float_emulated.h 119 0x15c0 8
+me_vmult_float_emulated.h 120 0x15c0 9
+me_vmult_float_emulated.h 120 0x15c0 10
+me_vmult_float_emulated.h 121 0x15c0 11
+me_vmult_float_emulated.h 121 0x15c0 12
+me_vmult_float_emulated.h 122 0x15c0 13
+me_vmult_float_emulated.h 122 0x15c0 14
+me_vmult_float_emulated.h 123 0x15c0 15
+me_vmult_float_emulated.h 123 0x15c0 16
+me_vmult_float_emulated.h 124 0x15c0 17
+me_vmult_float_emulated.h 124 0x15c0 18
+me_vmult_float_emulated.h 125 0x15c0 19
+me_vmult_float_emulated.h 125 0x15c0 20
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp:
+add.hpp 28 0x15c0 21
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x15c0 22
+add_reduce.hpp 324 0x15c0 23
+add_reduce.hpp 324 0x15c0 24
+add_reduce.hpp 324 0x15c0 25
+add_reduce.hpp 324 0x15c0 26
+add_reduce.hpp 324 0x15c0 27
+add_reduce.hpp 324 0x15c0 28
+add_reduce.hpp 324 0x15c0 29
+add_reduce.hpp 324 0x15c0 30
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 200 0x15c0 31
+reduce_mean_c8_impl.h 200 0x15c0 32
+reduce_mean_c8_impl.h 200 0x15c0 33
+reduce_mean_c8_impl.h 223 0x15c0 34
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1285 0x15cc
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 200 0x15cc 1 x
+reduce_mean_c8_impl.h 200 0x15e0
+reduce_mean_c8_impl.h 223 0x15f0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1285 0x15fc
+vector.hpp 1289 0x15fc 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 112 0x1608
+me_vmult_float_emulated.h 112 0x1608 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1608 2
+vector.hpp 57 0x1608 3
+vector.hpp 1280 0x1608 4
+vector.hpp 1285 0x1608 5
+vector.hpp 1287 0x1608 6
+vector.hpp 1288 0x1608 7
+vector.hpp 1289 0x1608 8
+vector.hpp 1292 0x1608 9
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 223 0x1608 10 x
+reduce_mean_c8_impl.h 268 0x1608 11
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x1614
+vector.hpp 915 0x1614 1
+vector.hpp 1280 0x1614 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x161e
+add_reduce.hpp 322 0x161e 1
+add_reduce.hpp 322 0x161e 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 223 0x161e 3 x
+reduce_mean_c8_impl.h 223 0x1628
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1286 0x1632
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1638
+me_vmult_float_emulated.h 108 0x1638 1
+me_vmult_float_emulated.h 109 0x1638 2
+me_vmult_float_emulated.h 110 0x1638 3
+me_vmult_float_emulated.h 110 0x1638 4
+me_vmult_float_emulated.h 111 0x1638 5
+me_vmult_float_emulated.h 111 0x1638 6
+me_vmult_float_emulated.h 111 0x1638 7
+me_vmult_float_emulated.h 112 0x1638 8
+me_vmult_float_emulated.h 112 0x1638 9
+me_vmult_float_emulated.h 113 0x1638 10
+me_vmult_float_emulated.h 114 0x1638 11
+me_vmult_float_emulated.h 114 0x1638 12
+me_vmult_float_emulated.h 115 0x1638 13
+me_vmult_float_emulated.h 115 0x1638 14
+me_vmult_float_emulated.h 115 0x1638 15
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1286 0x1638 16
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1108 0x1638 17
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 112 0x163c x
+me_vmult_float_emulated.h 112 0x163c 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 223 0x163c 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1644
+me_vmult_float_emulated.h 108 0x1644 1
+me_vmult_float_emulated.h 109 0x1644 2
+me_vmult_float_emulated.h 110 0x1644 3
+me_vmult_float_emulated.h 110 0x1644 4
+me_vmult_float_emulated.h 111 0x1644 5
+me_vmult_float_emulated.h 111 0x1644 6
+me_vmult_float_emulated.h 111 0x1644 7
+me_vmult_float_emulated.h 113 0x1644 8
+me_vmult_float_emulated.h 114 0x1644 9
+me_vmult_float_emulated.h 114 0x1644 10
+me_vmult_float_emulated.h 115 0x1644 11
+me_vmult_float_emulated.h 115 0x1644 12
+me_vmult_float_emulated.h 115 0x1644 13
+me_vmult_float_emulated.h 108 0x1648
+me_vmult_float_emulated.h 108 0x1648 1
+me_vmult_float_emulated.h 109 0x1648 2
+me_vmult_float_emulated.h 110 0x1648 3
+me_vmult_float_emulated.h 110 0x1648 4
+me_vmult_float_emulated.h 111 0x1648 5
+me_vmult_float_emulated.h 111 0x1648 6
+me_vmult_float_emulated.h 111 0x1648 7
+me_vmult_float_emulated.h 113 0x1648 8 x
+me_vmult_float_emulated.h 115 0x1648 9
+me_vmult_float_emulated.h 115 0x1648 10
+me_vmult_float_emulated.h 115 0x1648 11
+me_vmult_float_emulated.h 108 0x1650
+me_vmult_float_emulated.h 108 0x1650 1
+me_vmult_float_emulated.h 109 0x1650 2
+me_vmult_float_emulated.h 110 0x1650 3
+me_vmult_float_emulated.h 110 0x1650 4
+me_vmult_float_emulated.h 111 0x1650 5
+me_vmult_float_emulated.h 111 0x1650 6
+me_vmult_float_emulated.h 111 0x1650 7
+me_vmult_float_emulated.h 113 0x165c
+me_vmult_float_emulated.h 114 0x165c 1 x
+me_vmult_float_emulated.h 114 0x165c 2 x
+me_vmult_float_emulated.h 115 0x1662 x
+me_vmult_float_emulated.h 115 0x1670
+me_vmult_float_emulated.h 115 0x1670 1
+me_vmult_float_emulated.h 115 0x1670 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x1670 3
+add_reduce.hpp 322 0x1670 4
+add_reduce.hpp 322 0x1670 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1680 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 226 0x1680 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1280 0x168a
+vector.hpp 1280 0x168e x
+vector.hpp 1285 0x1692 x
+vector.hpp 1285 0x1692 1 x
+vector.hpp 1285 0x1698
+vector.hpp 1286 0x169c x
+vector.hpp 1285 0x16a0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x16a0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16a6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 142 0x16aa x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16aa 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 142 0x16ae
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16ae 1 x
+accum.hpp 199 0x16ba x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp:
+add.hpp 28 0x16ba 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x16c2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 142 0x16c6 x
+vector.hpp 243 0x16c6 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x16c6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16ce x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x16d2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16d6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x16d6 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16de
+accum.hpp 151 0x16e2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 243 0x16e6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 151 0x16e6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x16ea x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16ee x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x16ee 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16f6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x16fa
+add_reduce.hpp 322 0x16fe x
+add_reduce.hpp 324 0x1702 x
+add_reduce.hpp 324 0x1702 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x170a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x170e x
+add_reduce.hpp 324 0x170e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1716 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x171a x
+add_reduce.hpp 322 0x171e x
+add_reduce.hpp 324 0x171e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1726 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x172a x
+add_reduce.hpp 322 0x172e x
+add_reduce.hpp 324 0x172e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1736 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x173a x
+add_reduce.hpp 322 0x173e x
+add_reduce.hpp 324 0x1742 x
+add_reduce.hpp 324 0x1742 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x174a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x174e x
+add_reduce.hpp 324 0x174e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1756 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x175a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x175e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1762 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x1766 x
+vector.hpp 1288 0x1766 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x176c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x1770 x
+vector.hpp 1287 0x1770 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 80 0x1770 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 163 0x1776 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 853 0x177a x
+vector.hpp 853 0x177e
+vector.hpp 142 0x1782 x
+vector.hpp 1413 0x1782 1 x
+vector.hpp 142 0x1786
+vector.hpp 1413 0x1786 1
+vector.hpp 142 0x178a
+vector.hpp 1413 0x178a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x178e x
+blend.hpp 170 0x1792
+blend.hpp 170 0x1796
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x179a
+me_vmult_float_emulated.h 108 0x179a 1
+me_vmult_float_emulated.h 108 0x179e
+me_vmult_float_emulated.h 108 0x179e 1
+me_vmult_float_emulated.h 109 0x179e 2
+me_vmult_float_emulated.h 110 0x179e 3
+me_vmult_float_emulated.h 110 0x179e 4
+me_vmult_float_emulated.h 111 0x179e 5
+me_vmult_float_emulated.h 111 0x179e 6
+me_vmult_float_emulated.h 111 0x179e 7
+me_vmult_float_emulated.h 108 0x17a2 x
+me_vmult_float_emulated.h 108 0x17a2 1 x
+me_vmult_float_emulated.h 109 0x17a2 2 x
+me_vmult_float_emulated.h 108 0x17aa
+me_vmult_float_emulated.h 108 0x17aa 1
+me_vmult_float_emulated.h 109 0x17aa 2
+me_vmult_float_emulated.h 110 0x17aa 3
+me_vmult_float_emulated.h 110 0x17aa 4
+me_vmult_float_emulated.h 111 0x17aa 5
+me_vmult_float_emulated.h 111 0x17aa 6
+me_vmult_float_emulated.h 111 0x17aa 7
+me_vmult_float_emulated.h 109 0x17ae
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1285 0x17ae 1
+vector.hpp 1285 0x17ae 2 x
+vector.hpp 1289 0x17ae 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 120 0x17b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1285 0x17b8 1
+vector.hpp 1289 0x17b8 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x17c0
+me_vmult_float_emulated.h 108 0x17c0 1
+me_vmult_float_emulated.h 109 0x17c0 2
+me_vmult_float_emulated.h 110 0x17c0 3
+me_vmult_float_emulated.h 110 0x17c0 4
+me_vmult_float_emulated.h 111 0x17c0 5
+me_vmult_float_emulated.h 111 0x17c0 6
+me_vmult_float_emulated.h 111 0x17c0 7
+me_vmult_float_emulated.h 124 0x17c0 8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1285 0x17c0 9 x
+vector.hpp 1289 0x17c0 10
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 125 0x17ca x
+me_vmult_float_emulated.h 109 0x17d2 x
+me_vmult_float_emulated.h 110 0x17d2 1 x
+me_vmult_float_emulated.h 110 0x17d2 2 x
+me_vmult_float_emulated.h 111 0x17d8 x
+me_vmult_float_emulated.h 111 0x17e6
+me_vmult_float_emulated.h 111 0x17e6 1
+me_vmult_float_emulated.h 111 0x17e6 2
+me_vmult_float_emulated.h 117 0x17ec x
+me_vmult_float_emulated.h 118 0x17f0 x
+me_vmult_float_emulated.h 119 0x17fa x
+me_vmult_float_emulated.h 117 0x17fe x
+me_vmult_float_emulated.h 118 0x1802 x
+me_vmult_float_emulated.h 118 0x1806
+me_vmult_float_emulated.h 122 0x1810 x
+me_vmult_float_emulated.h 118 0x1814 x
+me_vmult_float_emulated.h 119 0x1818 x
+me_vmult_float_emulated.h 119 0x181c
+me_vmult_float_emulated.h 121 0x1826 x
+me_vmult_float_emulated.h 119 0x182a x
+me_vmult_float_emulated.h 120 0x182e x
+me_vmult_float_emulated.h 120 0x1832
+me_vmult_float_emulated.h 123 0x183c x
+me_vmult_float_emulated.h 120 0x1840 x
+me_vmult_float_emulated.h 121 0x1844 x
+me_vmult_float_emulated.h 121 0x1848
+me_vmult_float_emulated.h 121 0x1854
+me_vmult_float_emulated.h 122 0x1858 x
+me_vmult_float_emulated.h 122 0x185c
+me_vmult_float_emulated.h 122 0x1868
+me_vmult_float_emulated.h 123 0x186c x
+me_vmult_float_emulated.h 123 0x1870
+me_vmult_float_emulated.h 123 0x187c
+me_vmult_float_emulated.h 124 0x1880 x
+me_vmult_float_emulated.h 124 0x1884
+me_vmult_float_emulated.h 124 0x1890
+me_vmult_float_emulated.h 125 0x1894 x
+me_vmult_float_emulated.h 125 0x1898
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1286 0x18a4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1108 0x18a4 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1286 0x18aa
+vector.hpp 1289 0x18ae x
+vector.hpp 57 0x18b4 x
+vector.hpp 1292 0x18b4 1 x
+vector.hpp 57 0x18c0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x18c0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 352 0x18f0 x
+reduce_base_c8.h 352 0x18f4
+reduce_base_c8.h 352 0x18fe
+reduce_base_c8.h 353 0x1902 x
+reduce_base_c8.h 352 0x190e x
+reduce_base_c8.h 352 0x1912
+reduce_base_c8.h 420 0x1920
+reduce_base_c8.h 353 0x1928 x
+reduce_base_c8.h 420 0x192c x
+reduce_base_c8.h 420 0x1938
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 163 0x1950
+blend.hpp 170 0x195a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 184 0x1970
+reduce_mean_c8_impl.h 184 0x1974 x
+reduce_mean_c8_impl.h 184 0x1978
+reduce_mean_c8_impl.h 184 0x1988
+reduce_mean_c8_impl.h 184 0x198c
+reduce_mean_c8_impl.h 184 0x1990
+reduce_mean_c8_impl.h 200 0x1996
+reduce_mean_c8_impl.h 200 0x19b0 x
+reduce_mean_c8_impl.h 202 0x19b0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x19ba
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 202 0x19ba 1 x
+reduce_mean_c8_impl.h 202 0x19c0
+reduce_mean_c8_impl.h 200 0x19ce x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x19d2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x19d2 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 206 0x19d2 2 x
+reduce_mean_c8_impl.h 206 0x19d2 3
+reduce_mean_c8_impl.h 209 0x19d2 4
+reduce_mean_c8_impl.h 206 0x19de
+reduce_mean_c8_impl.h 206 0x19de 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x19ea x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x19ea 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 209 0x19ea 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x19f0
+accum.hpp 199 0x19f6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp:
+add.hpp 28 0x19f6 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 206 0x1a00 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1a10 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x1a10 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 209 0x1a10 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 150 0x1a50 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp:
+add.hpp 28 0x1a60 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x1a70 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1a80
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x1a80 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1a8a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x1a8a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x1a8a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1a94
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 150 0x1a9a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x1aa0
+add_reduce.hpp 322 0x1aa4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1aa8
+me_vmult_float_emulated.h 108 0x1aa8 1
+me_vmult_float_emulated.h 109 0x1aa8 2
+me_vmult_float_emulated.h 110 0x1aa8 3
+me_vmult_float_emulated.h 110 0x1aa8 4
+me_vmult_float_emulated.h 111 0x1aa8 5
+me_vmult_float_emulated.h 111 0x1aa8 6
+me_vmult_float_emulated.h 111 0x1aa8 7
+me_vmult_float_emulated.h 112 0x1aa8 8
+me_vmult_float_emulated.h 112 0x1aa8 9
+me_vmult_float_emulated.h 113 0x1aa8 10
+me_vmult_float_emulated.h 114 0x1aa8 11
+me_vmult_float_emulated.h 114 0x1aa8 12
+me_vmult_float_emulated.h 115 0x1aa8 13
+me_vmult_float_emulated.h 115 0x1aa8 14
+me_vmult_float_emulated.h 115 0x1aa8 15
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1aa8 16
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1aa8 17 x
+accum.hpp 1108 0x1aa8 18
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1aa8 19 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1ab2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 80 0x1ab6 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 112 0x1aba
+me_vmult_float_emulated.h 112 0x1aba 1
+me_vmult_float_emulated.h 113 0x1aba 2
+me_vmult_float_emulated.h 113 0x1ac0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1ac4 x
+add_reduce.hpp 322 0x1ac8 x
+add_reduce.hpp 324 0x1ac8 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1ad0 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1ad4
+me_vmult_float_emulated.h 108 0x1ad4 1
+me_vmult_float_emulated.h 109 0x1ad4 2
+me_vmult_float_emulated.h 110 0x1ad4 3
+me_vmult_float_emulated.h 110 0x1ad4 4
+me_vmult_float_emulated.h 111 0x1ad4 5
+me_vmult_float_emulated.h 111 0x1ad4 6
+me_vmult_float_emulated.h 111 0x1ad4 7
+me_vmult_float_emulated.h 113 0x1ad4 8
+me_vmult_float_emulated.h 114 0x1ad4 9
+me_vmult_float_emulated.h 114 0x1ad4 10
+me_vmult_float_emulated.h 115 0x1ad4 11
+me_vmult_float_emulated.h 115 0x1ad4 12
+me_vmult_float_emulated.h 115 0x1ad4 13
+me_vmult_float_emulated.h 112 0x1ada x
+me_vmult_float_emulated.h 112 0x1ada 1 x
+me_vmult_float_emulated.h 113 0x1ae0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1ae0 1 x
+add_reduce.hpp 322 0x1ae8 x
+add_reduce.hpp 324 0x1ae8 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1af0 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1af4
+me_vmult_float_emulated.h 108 0x1af4 1
+me_vmult_float_emulated.h 109 0x1af4 2
+me_vmult_float_emulated.h 110 0x1af4 3
+me_vmult_float_emulated.h 110 0x1af4 4
+me_vmult_float_emulated.h 111 0x1af4 5
+me_vmult_float_emulated.h 111 0x1af4 6
+me_vmult_float_emulated.h 111 0x1af4 7
+me_vmult_float_emulated.h 115 0x1af4 8
+me_vmult_float_emulated.h 115 0x1af4 9
+me_vmult_float_emulated.h 115 0x1af4 10
+me_vmult_float_emulated.h 113 0x1afc x
+me_vmult_float_emulated.h 114 0x1afc 1 x
+me_vmult_float_emulated.h 114 0x1afc 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1b00 x
+add_reduce.hpp 322 0x1b04 x
+add_reduce.hpp 324 0x1b04 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1b0c x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1b10
+me_vmult_float_emulated.h 108 0x1b10 1
+me_vmult_float_emulated.h 109 0x1b10 2
+me_vmult_float_emulated.h 110 0x1b10 3
+me_vmult_float_emulated.h 110 0x1b10 4
+me_vmult_float_emulated.h 111 0x1b10 5
+me_vmult_float_emulated.h 111 0x1b10 6
+me_vmult_float_emulated.h 111 0x1b10 7
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1b1a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x1b1e x
+vector.hpp 856 0x1b24 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1b28 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1b2c
+me_vmult_float_emulated.h 108 0x1b2c 1
+me_vmult_float_emulated.h 109 0x1b30
+me_vmult_float_emulated.h 110 0x1b30 1
+me_vmult_float_emulated.h 110 0x1b30 2
+me_vmult_float_emulated.h 111 0x1b30 3
+me_vmult_float_emulated.h 111 0x1b30 4
+me_vmult_float_emulated.h 111 0x1b30 5
+me_vmult_float_emulated.h 108 0x1b34 x
+me_vmult_float_emulated.h 108 0x1b34 1 x
+me_vmult_float_emulated.h 111 0x1b34 2
+me_vmult_float_emulated.h 111 0x1b34 3
+me_vmult_float_emulated.h 111 0x1b34 4
+me_vmult_float_emulated.h 109 0x1b3e x
+me_vmult_float_emulated.h 124 0x1b42 x
+me_vmult_float_emulated.h 109 0x1b4e x
+me_vmult_float_emulated.h 110 0x1b4e 1 x
+me_vmult_float_emulated.h 110 0x1b4e 2 x
+me_vmult_float_emulated.h 115 0x1b52 x
+me_vmult_float_emulated.h 111 0x1b56 x
+me_vmult_float_emulated.h 115 0x1b62 x
+me_vmult_float_emulated.h 115 0x1b62 1 x
+me_vmult_float_emulated.h 115 0x1b62 2 x
+me_vmult_float_emulated.h 111 0x1b66 x
+me_vmult_float_emulated.h 111 0x1b66 1 x
+me_vmult_float_emulated.h 111 0x1b66 2 x
+me_vmult_float_emulated.h 117 0x1b6c x
+me_vmult_float_emulated.h 118 0x1b70 x
+me_vmult_float_emulated.h 119 0x1b7a x
+me_vmult_float_emulated.h 117 0x1b7e x
+me_vmult_float_emulated.h 118 0x1b82 x
+me_vmult_float_emulated.h 118 0x1b86
+me_vmult_float_emulated.h 120 0x1b90 x
+me_vmult_float_emulated.h 118 0x1b94 x
+me_vmult_float_emulated.h 119 0x1b98 x
+me_vmult_float_emulated.h 119 0x1b9c
+me_vmult_float_emulated.h 121 0x1ba6 x
+me_vmult_float_emulated.h 119 0x1baa x
+me_vmult_float_emulated.h 120 0x1bae x
+me_vmult_float_emulated.h 120 0x1bb2
+me_vmult_float_emulated.h 120 0x1bbe
+me_vmult_float_emulated.h 121 0x1bc2 x
+me_vmult_float_emulated.h 121 0x1bc6
+me_vmult_float_emulated.h 122 0x1bce x
+me_vmult_float_emulated.h 121 0x1bd4 x
+me_vmult_float_emulated.h 122 0x1bd8 x
+me_vmult_float_emulated.h 122 0x1bdc
+me_vmult_float_emulated.h 123 0x1be4 x
+me_vmult_float_emulated.h 122 0x1bea x
+me_vmult_float_emulated.h 123 0x1bee x
+me_vmult_float_emulated.h 123 0x1bf2
+me_vmult_float_emulated.h 123 0x1bfe
+me_vmult_float_emulated.h 124 0x1bfe 1 x
+me_vmult_float_emulated.h 124 0x1c06
+me_vmult_float_emulated.h 125 0x1c06 1 x
+me_vmult_float_emulated.h 125 0x1c14
+me_vmult_float_emulated.h 124 0x1c18 x
+me_vmult_float_emulated.h 125 0x1c2a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1c30 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1108 0x1c30 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 163 0x1c40
+blend.hpp 170 0x1c4a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 184 0x1c80
+reduce_mean_c8_impl.h 184 0x1c84 x
+reduce_mean_c8_impl.h 184 0x1c88
+reduce_mean_c8_impl.h 184 0x1c9c
+reduce_mean_c8_impl.h 184 0x1ca6
+reduce_mean_c8_impl.h 184 0x1caa
+reduce_mean_c8_impl.h 184 0x1cba
+reduce_mean_c8_impl.h 184 0x1cbe
+reduce_mean_c8_impl.h 200 0x1cc4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1ce0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1cea
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1cea 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x1cea 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 163 0x1cf0
+blend.hpp 170 0x1d06
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1d0c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x1d0c 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1d20
+me_vmult_float_emulated.h 108 0x1d20 1
+me_vmult_float_emulated.h 109 0x1d20 2
+me_vmult_float_emulated.h 110 0x1d20 3
+me_vmult_float_emulated.h 110 0x1d20 4
+me_vmult_float_emulated.h 111 0x1d20 5
+me_vmult_float_emulated.h 111 0x1d20 6
+me_vmult_float_emulated.h 111 0x1d20 7
+me_vmult_float_emulated.h 112 0x1d20 8
+me_vmult_float_emulated.h 112 0x1d20 9
+me_vmult_float_emulated.h 113 0x1d20 10
+me_vmult_float_emulated.h 114 0x1d20 11
+me_vmult_float_emulated.h 114 0x1d20 12
+me_vmult_float_emulated.h 115 0x1d20 13
+me_vmult_float_emulated.h 115 0x1d20 14
+me_vmult_float_emulated.h 115 0x1d20 15
+me_vmult_float_emulated.h 109 0x1d2a
+me_vmult_float_emulated.h 111 0x1d2a 1
+me_vmult_float_emulated.h 113 0x1d2a 2
+me_vmult_float_emulated.h 115 0x1d2a 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x1d2a 4
+add_reduce.hpp 322 0x1d2a 5
+add_reduce.hpp 322 0x1d2a 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 163 0x1d2a 7
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1d34
+vector.hpp 57 0x1d34 1
+vector.hpp 1139 0x1d34 2
+vector.hpp 1280 0x1d34 3
+vector.hpp 1287 0x1d34 4
+vector.hpp 1288 0x1d34 5
+vector.hpp 1292 0x1d34 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1d34 7
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 226 0x1d34 8
+reduce_mean_c8_impl.h 268 0x1d34 9
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1d3e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1d3e 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x1d3e 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1d44
+blend.hpp 170 0x1d48
+blend.hpp 170 0x1d5a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1d60
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x1d60 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 472 0x1d70
+superkernels.cpp 472 0x1d70 1 x
+superkernels.cpp 477 0x1d76
+superkernels.cpp 477 0x1d80 x
+superkernels.cpp 474 0x1d8a x
+superkernels.cpp 569 0x1d8a 1
+superkernels.cpp 474 0x1d94
+superkernels.cpp 477 0x1da4 x
+superkernels.cpp 477 0x1da4 1 x
+superkernels.cpp 474 0x1db6
+superkernels.cpp 474 0x1dbc x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1dc0
+io_buffer_main.h 218 0x1dc0 1
+io_buffer_main.h 324 0x1dc0 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x1dc0 3
+tile.hpp 74 0x1dc0 4
+tile.hpp 74 0x1dcc x
+tile.hpp 86 0x1dcc 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 483 0x1dd6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x1dd6 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 480 0x1ddc x
+superkernels.cpp 480 0x1de2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x1dec
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 481 0x1e00
+superkernels.cpp 487 0x1e00 1
+superkernels.cpp 481 0x1e0a
+superkernels.cpp 481 0x1e0a 1 x
+superkernels.cpp 481 0x1e14
+superkernels.cpp 481 0x1e14 1
+superkernels.cpp 481 0x1e1e
+superkernels.cpp 482 0x1e1e 1
+superkernels.cpp 481 0x1e28
+superkernels.cpp 482 0x1e28 1 x
+superkernels.cpp 481 0x1e32 x
+superkernels.cpp 483 0x1e32 1
+superkernels.cpp 483 0x1e38
+superkernels.cpp 487 0x1e3c
+superkernels.cpp 483 0x1e42
+superkernels.cpp 481 0x1e48
+superkernels.cpp 491 0x1e4c
+superkernels.cpp 481 0x1e52
+superkernels.cpp 482 0x1e52 1 x
+superkernels.cpp 481 0x1e5a x
+superkernels.cpp 481 0x1e60
+superkernels.cpp 483 0x1e64 x
+superkernels.cpp 487 0x1e68 x
+superkernels.cpp 487 0x1e6c
+superkernels.cpp 487 0x1e70
+superkernels.cpp 487 0x1e74
+superkernels.cpp 487 0x1e78
+superkernels.cpp 487 0x1e7c
+superkernels.cpp 483 0x1e80 x
+superkernels.cpp 487 0x1e84 x
+superkernels.cpp 487 0x1e88
+superkernels.cpp 487 0x1e8c
+superkernels.cpp 491 0x1e90 x
+superkernels.cpp 491 0x1ea0
+superkernels.cpp 491 0x1ea4
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1eaa
+io_buffer_main.h 218 0x1eaa 1
+io_buffer_main.h 324 0x1eaa 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 491 0x1eb8
+superkernels.cpp 491 0x1ed6
+superkernels.cpp 491 0x1ef0
+superkernels.cpp 491 0x1f00
+superkernels.cpp 491 0x1f10
+superkernels.cpp 491 0x1f16
+superkernels.cpp 491 0x1f1a
+superkernels.cpp 491 0x1f20
+superkernels.cpp 491 0x1f30
+superkernels.cpp 491 0x1f30 1
+superkernels.cpp 491 0x1f30 2
+superkernels.cpp 491 0x1f3a
+superkernels.cpp 492 0x1f3a 1
+superkernels.cpp 492 0x1f3a 2
+superkernels.cpp 498 0x1f44
+superkernels.cpp 498 0x1f44 1
+superkernels.cpp 499 0x1f4e
+superkernels.cpp 505 0x1f54
+superkernels.cpp 508 0x1f54 1
+superkernels.cpp 511 0x1f54 2
+superkernels.cpp 491 0x1f5c
+superkernels.cpp 491 0x1f60
+superkernels.cpp 491 0x1f64
+superkernels.cpp 491 0x1f6a
+superkernels.cpp 492 0x1f72 x
+superkernels.cpp 494 0x1f82 x
+superkernels.cpp 495 0x1f86 x
+superkernels.cpp 496 0x1f8a x
+superkernels.cpp 498 0x1f8e x
+superkernels.cpp 498 0x1f9e
+superkernels.cpp 499 0x1fa2 x
+superkernels.cpp 499 0x1fb2
+superkernels.cpp 500 0x1fb6 x
+superkernels.cpp 500 0x1fc2
+superkernels.cpp 500 0x1fd0
+superkernels.cpp 505 0x1fe0
+superkernels.cpp 508 0x1fe0 1
+superkernels.cpp 511 0x1fe0 2
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1fea
+io_buffer_main.h 218 0x1fea 1
+io_buffer_main.h 324 0x1fea 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 505 0x1ff0 x
+superkernels.cpp 505 0x1ff0 1
+superkernels.cpp 505 0x2002
+superkernels.cpp 505 0x2006
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x200c x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 505 0x2018
+superkernels.cpp 505 0x201e x
+superkernels.cpp 505 0x201e 1
+superkernels.cpp 505 0x2028
+superkernels.cpp 505 0x2030
+superkernels.cpp 505 0x2036
+superkernels.cpp 505 0x203c
+superkernels.cpp 505 0x2040
+superkernels.cpp 505 0x2040 1
+superkernels.cpp 505 0x2046
+superkernels.cpp 505 0x2050
+superkernels.cpp 505 0x2050 1
+superkernels.cpp 505 0x2056
+superkernels.cpp 505 0x205a
+superkernels.cpp 505 0x205a 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x206a
+io_buffer_main.h 395 0x206a 1
+io_buffer_main.h 218 0x2070 x
+io_buffer_main.h 218 0x2074
+io_buffer_main.h 218 0x2078
+io_buffer_main.h 235 0x207e x
+io_buffer_main.h 218 0x208a x
+io_buffer_main.h 218 0x208a 1 x
+io_buffer_main.h 218 0x208e
+io_buffer_main.h 395 0x209a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 508 0x20a6 x
+superkernels.cpp 508 0x20b0
+superkernels.cpp 522 0x20b0 1
+superkernels.cpp 558 0x20b0 2
+superkernels.cpp 508 0x20be
+superkernels.cpp 508 0x20c2
+superkernels.cpp 508 0x20d2
+superkernels.cpp 508 0x20d8
+superkernels.cpp 508 0x20d8 1
+superkernels.cpp 508 0x20e2
+superkernels.cpp 508 0x20ea
+superkernels.cpp 508 0x20f0
+superkernels.cpp 508 0x20f6
+superkernels.cpp 508 0x20fa
+superkernels.cpp 508 0x20fa 1
+superkernels.cpp 508 0x2100
+superkernels.cpp 508 0x2110
+superkernels.cpp 508 0x2110 1
+superkernels.cpp 508 0x2116
+superkernels.cpp 508 0x211a
+superkernels.cpp 508 0x211a 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x212a
+io_buffer_main.h 395 0x212a 1
+io_buffer_main.h 218 0x2130 x
+io_buffer_main.h 218 0x2134
+io_buffer_main.h 218 0x2138
+io_buffer_main.h 235 0x213e x
+io_buffer_main.h 218 0x214a x
+io_buffer_main.h 218 0x214a 1 x
+io_buffer_main.h 218 0x214e
+io_buffer_main.h 395 0x215a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 511 0x2166 x
+superkernels.cpp 511 0x2166 1
+superkernels.cpp 511 0x217a
+superkernels.cpp 511 0x217e
+superkernels.cpp 511 0x2182
+superkernels.cpp 511 0x2188
+superkernels.cpp 511 0x2194
+superkernels.cpp 511 0x2198
+superkernels.cpp 511 0x2198 1
+superkernels.cpp 511 0x219e
+superkernels.cpp 511 0x21a6
+superkernels.cpp 511 0x21b0
+superkernels.cpp 511 0x21b4
+superkernels.cpp 511 0x21b4 1
+superkernels.cpp 511 0x21ba
+superkernels.cpp 511 0x21c0
+superkernels.cpp 511 0x21c0 1
+superkernels.cpp 511 0x21c6
+superkernels.cpp 511 0x21ca
+superkernels.cpp 511 0x21ca 1
+superkernels.cpp 516 0x21da
+superkernels.cpp 522 0x21da 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x21da 2 x
+io_buffer_main.h 395 0x21da 3
+io_buffer_main.h 218 0x21e4
+io_buffer_main.h 218 0x21e8
+io_buffer_main.h 235 0x21ee x
+io_buffer_main.h 218 0x21fa x
+io_buffer_main.h 218 0x21fa 1 x
+io_buffer_main.h 218 0x21fe
+io_buffer_main.h 395 0x220e x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 516 0x2226
+superkernels.cpp 522 0x2226 1
+superkernels.cpp 516 0x2240
+superkernels.cpp 522 0x2240 1
+superkernels.cpp 516 0x2250
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2250 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 516 0x225a x
+superkernels.cpp 522 0x225a 1
+superkernels.cpp 514 0x2264
+superkernels.cpp 522 0x2264 1 x
+superkernels.cpp 514 0x226e x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2278 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 516 0x227c x
+superkernels.cpp 522 0x2280 x
+superkernels.cpp 522 0x2284
+superkernels.cpp 514 0x228a x
+superkernels.cpp 514 0x228e
+superkernels.cpp 516 0x2294 x
+superkernels.cpp 516 0x2298
+superkernels.cpp 522 0x2298 1
+superkernels.cpp 522 0x229e x
+superkernels.cpp 522 0x22a2
+superkernels.cpp 522 0x22b2
+superkernels.cpp 522 0x22b6
+superkernels.cpp 523 0x22bc
+superkernels.cpp 523 0x22ca x
+superkernels.cpp 523 0x22ca 1
+superkernels.cpp 523 0x22d4
+superkernels.cpp 524 0x22d4 1
+superkernels.cpp 524 0x22de
+superkernels.cpp 524 0x22de 1 x
+superkernels.cpp 523 0x22ee x
+superkernels.cpp 524 0x22f4 x
+superkernels.cpp 524 0x22f4 1 x
+superkernels.cpp 524 0x22fa
+superkernels.cpp 524 0x22fe
+superkernels.cpp 524 0x2302
+superkernels.cpp 524 0x2306
+superkernels.cpp 525 0x230a x
+superkernels.cpp 526 0x230e x
+superkernels.cpp 547 0x2312 x
+superkernels.cpp 525 0x2318
+superkernels.cpp 525 0x231e x
+superkernels.cpp 554 0x232e
+superkernels.cpp 558 0x232e 1
+superkernels.cpp 552 0x2338
+superkernels.cpp 554 0x2338 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2338 2
+io_buffer_main.h 327 0x2338 3
+io_buffer_main.h 425 0x2338 4
+io_buffer_main.h 425 0x2338 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 554 0x2342
+superkernels.cpp 555 0x2342 1
+superkernels.cpp 558 0x2342 2
+superkernels.cpp 559 0x2342 3
+superkernels.cpp 562 0x2342 4
+superkernels.cpp 563 0x2342 5
+superkernels.cpp 567 0x2342 6
+superkernels.cpp 554 0x2356
+superkernels.cpp 558 0x2356 1
+superkernels.cpp 552 0x2360
+superkernels.cpp 554 0x2360 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2360 2
+io_buffer_main.h 327 0x2360 3
+io_buffer_main.h 425 0x2360 4
+io_buffer_main.h 425 0x2360 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 554 0x236a
+superkernels.cpp 555 0x236a 1
+superkernels.cpp 558 0x236a 2
+superkernels.cpp 559 0x236a 3
+superkernels.cpp 562 0x236a 4
+superkernels.cpp 563 0x236a 5
+superkernels.cpp 567 0x236a 6
+superkernels.cpp 532 0x2380
+superkernels.cpp 533 0x2380 1
+superkernels.cpp 554 0x2380 2
+superkernels.cpp 555 0x2380 3
+superkernels.cpp 558 0x2380 4
+superkernels.cpp 559 0x2380 5
+superkernels.cpp 562 0x2380 6
+superkernels.cpp 563 0x2380 7
+superkernels.cpp 567 0x2380 8
+superkernels.cpp 532 0x238a x
+superkernels.cpp 532 0x238a 1
+superkernels.cpp 552 0x238a 2
+superkernels.cpp 532 0x2394
+superkernels.cpp 533 0x2394 1
+superkernels.cpp 533 0x239e x
+superkernels.cpp 554 0x239e 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x239e 2
+io_buffer_main.h 327 0x239e 3
+io_buffer_main.h 425 0x239e 4
+io_buffer_main.h 425 0x239e 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 532 0x23ae x
+superkernels.cpp 533 0x23b4 x
+superkernels.cpp 533 0x23b4 1 x
+superkernels.cpp 533 0x23ba
+superkernels.cpp 533 0x23be
+superkernels.cpp 533 0x23c2
+superkernels.cpp 533 0x23c6
+superkernels.cpp 534 0x23ca x
+superkernels.cpp 535 0x23ce x
+superkernels.cpp 547 0x23d2 x
+superkernels.cpp 534 0x23d8
+superkernels.cpp 534 0x23de x
+superkernels.cpp 554 0x23e6
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x23f0
+io_buffer_main.h 324 0x23f0 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 541 0x2410
+superkernels.cpp 541 0x2416 x
+superkernels.cpp 541 0x2416 1
+superkernels.cpp 541 0x2420
+superkernels.cpp 542 0x2420 1
+superkernels.cpp 542 0x242a x
+superkernels.cpp 541 0x2438 x
+superkernels.cpp 542 0x243e x
+superkernels.cpp 542 0x243e 1 x
+superkernels.cpp 542 0x2444
+superkernels.cpp 542 0x2448
+superkernels.cpp 542 0x244c
+superkernels.cpp 542 0x244c 1
+superkernels.cpp 542 0x2452
+superkernels.cpp 543 0x2456 x
+superkernels.cpp 544 0x245a x
+superkernels.cpp 547 0x245e x
+superkernels.cpp 543 0x2464
+superkernels.cpp 543 0x246a x
+superkernels.cpp 554 0x2480
+superkernels.cpp 558 0x2480 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2480 2
+io_buffer_main.h 125 0x2480 3 x
+io_buffer_main.h 324 0x2480 4
+io_buffer_main.h 327 0x2480 5
+io_buffer_main.h 327 0x2480 6
+io_buffer_main.h 425 0x2480 7
+io_buffer_main.h 425 0x2480 8
+io_buffer_main.h 125 0x248c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 287 0x2494 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 552 0x249a
+superkernels.cpp 554 0x249e
+superkernels.cpp 555 0x249e 1
+superkernels.cpp 558 0x249e 2
+superkernels.cpp 559 0x249e 3
+superkernels.cpp 562 0x249e 4
+superkernels.cpp 563 0x249e 5
+superkernels.cpp 567 0x249e 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 287 0x24a6
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 552 0x24b0
+superkernels.cpp 552 0x24b0 1
+superkernels.cpp 554 0x24ba
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x24c0 x
+io_buffer_main.h 324 0x24c0 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 552 0x24c4 x
+superkernels.cpp 554 0x24e0 x
+superkernels.cpp 554 0x24f0
+superkernels.cpp 554 0x24f4
+superkernels.cpp 554 0x2504
+superkernels.cpp 555 0x2504 1
+superkernels.cpp 554 0x250a
+superkernels.cpp 554 0x250a 1
+superkernels.cpp 554 0x2514
+superkernels.cpp 554 0x251e
+superkernels.cpp 554 0x2526
+superkernels.cpp 554 0x252a
+superkernels.cpp 554 0x252a 1
+superkernels.cpp 554 0x2530
+superkernels.cpp 554 0x2530 1
+superkernels.cpp 554 0x2536
+superkernels.cpp 554 0x2540
+superkernels.cpp 554 0x2540 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2540 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 554 0x254a
+superkernels.cpp 554 0x254e
+superkernels.cpp 554 0x254e 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2554
+io_buffer_main.h 327 0x2554 1
+io_buffer_main.h 327 0x2554 2
+io_buffer_main.h 425 0x2554 3
+io_buffer_main.h 425 0x2554 4
+io_buffer_main.h 425 0x2554 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 555 0x2560 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2560 1 x
+io_buffer_main.h 425 0x2572 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 558 0x2576
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2576 1 x
+io_buffer_main.h 327 0x2590
+io_buffer_main.h 327 0x2594
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 558 0x25a0
+superkernels.cpp 558 0x25b0 x
+superkernels.cpp 558 0x25c0
+superkernels.cpp 558 0x25ce
+superkernels.cpp 558 0x25d2
+superkernels.cpp 558 0x25d8
+superkernels.cpp 559 0x25d8 1
+superkernels.cpp 558 0x25de
+superkernels.cpp 558 0x25ea
+superkernels.cpp 558 0x25ee
+superkernels.cpp 558 0x25f8
+superkernels.cpp 558 0x2600
+superkernels.cpp 558 0x2604
+superkernels.cpp 558 0x2604 1
+superkernels.cpp 558 0x260a
+superkernels.cpp 558 0x260a 1
+superkernels.cpp 558 0x2610
+superkernels.cpp 558 0x2620
+superkernels.cpp 558 0x2620 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2620 2
+io_buffer_main.h 324 0x2620 3
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 558 0x262a
+superkernels.cpp 558 0x262e
+superkernels.cpp 558 0x262e 1
+superkernels.cpp 562 0x2634
+superkernels.cpp 559 0x2642 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2642 1 x
+io_buffer_main.h 425 0x2654 x
+io_buffer_main.h 327 0x2658 x
+io_buffer_main.h 327 0x2668
+io_buffer_main.h 327 0x266c
+io_buffer_main.h 324 0x2676
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 562 0x2690
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2690 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 562 0x26a0 x
+superkernels.cpp 562 0x26a0 1
+superkernels.cpp 562 0x26b2
+superkernels.cpp 562 0x26b6
+superkernels.cpp 562 0x26bc
+superkernels.cpp 562 0x26ca
+superkernels.cpp 562 0x26ca 1
+superkernels.cpp 562 0x26d4
+superkernels.cpp 562 0x26de
+superkernels.cpp 562 0x26e6
+superkernels.cpp 562 0x26ea
+superkernels.cpp 562 0x26ea 1
+superkernels.cpp 562 0x26f0
+superkernels.cpp 562 0x26f0 1
+superkernels.cpp 562 0x26f6
+superkernels.cpp 562 0x2700
+superkernels.cpp 562 0x2700 1
+superkernels.cpp 562 0x2706
+superkernels.cpp 562 0x270a
+superkernels.cpp 562 0x270a 1
+superkernels.cpp 563 0x2710
+superkernels.cpp 563 0x271e x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x271e 1 x
+io_buffer_main.h 425 0x2730 x
+io_buffer_main.h 327 0x2734 x
+io_buffer_main.h 327 0x2744
+io_buffer_main.h 327 0x2748
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 566 0x2750
+superkernels.cpp 567 0x2750 1
+superkernels.cpp 566 0x2756 x
+superkernels.cpp 566 0x2756 1
+superkernels.cpp 566 0x2760
+superkernels.cpp 566 0x2770
+superkernels.cpp 566 0x2774
+superkernels.cpp 567 0x278a x
+superkernels.cpp 569 0x2790
+superkernels.cpp 569 0x279e x
+superkernels.cpp 569 0x27a6
+superkernels.cpp 554 0x27c0
+superkernels.cpp 555 0x27c0 1
+superkernels.cpp 558 0x27c0 2
+superkernels.cpp 559 0x27c0 3
+superkernels.cpp 562 0x27c0 4
+superkernels.cpp 563 0x27c0 5
+superkernels.cpp 567 0x27c0 6
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x27c0 7
+io_buffer_main.h 324 0x27c0 8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 554 0x27cc
+superkernels.cpp 558 0x27cc 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x27cc 2
+io_buffer_main.h 327 0x27cc 3
+io_buffer_main.h 425 0x27cc 4
+io_buffer_main.h 425 0x27cc 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 552 0x27d2
+superkernels.cpp 554 0x27d8
+superkernels.cpp - 0x27d9
+
+
+superkernels.cpp:
+File name Line number Starting address View Stmt
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc:
+0_0_reloadable2.cc 29 0x930 x
+0_0_reloadable2.cc 31 0x930 1 x
+0_0_reloadable2.cc 29 0x936
+0_0_reloadable2.cc 31 0x93c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x93c 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc:
+0_0_reloadable2.cc 17 0x944
+0_0_reloadable2.cc 31 0x944 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 590 0x956 x
+io_buffer_compiler.h 590 0x95a
+io_buffer_compiler.h 590 0x95e
+io_buffer_compiler.h 590 0x962
+io_buffer_compiler.h 590 0x966
+io_buffer_compiler.h 195 0x976 x
+io_buffer_compiler.h 195 0x976 1 x
+io_buffer_compiler.h 194 0x97a x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x97e
+io_buffer_main.h 410 0x988 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc:
+0_0_reloadable2.cc 17 0x992 x
+0_0_reloadable2.cc 18 0x996 x
+0_0_reloadable2.cc 19 0x99a x
+0_0_reloadable2.cc 16 0x99e x
+0_0_reloadable2.cc 38 0x9b0 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0x9b4
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 605 0x9c2 x
+io_buffer_compiler.h 605 0x9c6
+io_buffer_compiler.h 606 0x9ca
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0x9ca 1
+io_buffer_main.h 440 0x9d8 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc:
+0_0_reloadable2.cc 41 0x9dc
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x9dc 1
+io_buffer_compiler.h 606 0x9e2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc:
+0_0_reloadable2.cc 41 0x9f0 x
+0_0_reloadable2.cc 41 0x9f8
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x9fc x
+io_buffer_compiler.h 606 0xa00
+io_buffer_compiler.h 606 0xa04
+io_buffer_compiler.h - 0xa05
+
+
+CU: me_div.c:
+File name Line number Starting address View Stmt
+
+./me_div.c:[++]
+me_div.c 108 0x27f0
+me_div.c 108 0x27f0 1
+me_div.c 115 0x27f0 2 x
+me_div.c 108 0x27f6
+me_div.c 108 0x27fa
+me_div.c 108 0x27fe
+me_div.c 108 0x2802
+me_div.c 108 0x2806
+me_div.c 108 0x280a
+me_div.c 108 0x280e
+me_div.c 108 0x2812
+me_div.c 108 0x2816
+me_div.c 108 0x281a
+me_div.c 108 0x281e
+me_div.c 108 0x2822
+me_div.c 108 0x2826
+me_div.c 108 0x282a
+me_div.c 108 0x282e
+me_div.c 108 0x2832
+me_div.c 108 0x2836
+me_div.c 108 0x283a
+me_div.c 108 0x283e
+me_div.c 108 0x2842
+me_div.c 108 0x2846
+me_div.c 108 0x284a
+me_div.c 108 0x284e
+me_div.c 108 0x2852
+me_div.c 108 0x2856
+me_div.c 108 0x285a
+me_div.c 108 0x285e
+me_div.c 108 0x2862
+me_div.c 119 0x2866 x
+me_div.c 108 0x286a x
+me_div.c 108 0x286e
+me_div.c 108 0x2872
+me_div.c 108 0x2876
+me_div.c - 0x2877
+
+
+CU: No directory table
+CU: Empty file name table
+ - 0x1
+
+
+CU: softfloat-specialize:
+File name Line number Starting address View Stmt
+
+./softfloat-specialize:[++]
+softfloat-specialize 78 0x2880
+softfloat-specialize 137 0x2880 1
+softfloat-specialize 139 0x2880 2
+softfloat-specialize 143 0x2880 3 x
+softfloat-specialize 137 0x288a
+softfloat-specialize 139 0x288a 1
+softfloat-specialize 140 0x288a 2
+softfloat-specialize 141 0x288a 3
+softfloat-specialize 78 0x2894
+softfloat-specialize 137 0x2894 1
+softfloat-specialize 139 0x2894 2
+softfloat-specialize 140 0x2894 3 x
+softfloat-specialize 141 0x289e x
+softfloat-specialize 137 0x28a2 x
+softfloat-specialize 139 0x28a6 x
+softfloat-specialize 139 0x28aa
+softfloat-specialize 137 0x28ae x
+softfloat-specialize 137 0x28b2
+softfloat-specialize 78 0x28b6 x
+softfloat-specialize 78 0x28ba
+softfloat-specialize 143 0x28be x
+softfloat-specialize 137 0x28c2
+softfloat-specialize 139 0x28c2 1
+softfloat-specialize 139 0x28c8 x
+softfloat-specialize 139 0x28cc
+softfloat-specialize 137 0x28d0 x
+softfloat-specialize 137 0x28d4
+softfloat-specialize 143 0x28d8 x
+softfloat-specialize 137 0x28dc x
+softfloat-specialize 139 0x28e0 x
+softfloat-specialize 143 0x28e4 x
+softfloat-specialize 139 0x28e8 x
+softfloat-specialize 143 0x28ec x
+
+./softfloat.c:[++]
+softfloat.c 154 0x28f0 x
+softfloat.c 161 0x28f0 1
+softfloat.c 203 0x28f0 2
+softfloat.c 161 0x28fa x
+softfloat.c 171 0x28fa 1
+softfloat.c 174 0x28fa 2
+softfloat.c 178 0x28fa 3
+softfloat.c 194 0x28fa 4
+softfloat.c 162 0x290c x
+softfloat.c 164 0x290c 1 x
+softfloat.c 182 0x2912
+softfloat.c 185 0x2912 1
+softfloat.c 202 0x2912 2
+softfloat.c 165 0x291e
+softfloat.c 171 0x291e 1
+softfloat.c 171 0x291e 2
+softfloat.c 174 0x291e 3
+softfloat.c 174 0x291e 4
+softfloat.c 165 0x2928
+softfloat.c 171 0x2928 1 x
+softfloat.c 171 0x292e
+softfloat.c 174 0x2932 x
+softfloat.c 170 0x2936
+softfloat.c 174 0x2936 1
+softfloat.c 170 0x293c x
+softfloat.c 170 0x293c 1 x
+softfloat.c 165 0x2940 x
+softfloat.c 165 0x2944
+softfloat.c 179 0x2950
+softfloat.c 179 0x2950 1 x
+softfloat.c 180 0x2950 2
+softfloat.c 181 0x2950 3
+softfloat.c 179 0x2956
+softfloat.c 179 0x295a
+softfloat.c 178 0x2960 x
+
+./softfloat-macros:[++]
+softfloat-macros 50 0x2964
+
+./softfloat.c:[++]
+softfloat.c 128 0x2964 1
+softfloat.c 128 0x2968 x
+softfloat.c 181 0x2970 x
+softfloat.c 182 0x2970 1 x
+softfloat.c 182 0x2970 2
+softfloat.c 182 0x297a
+softfloat.c 180 0x297e x
+softfloat.c 182 0x2982 x
+softfloat.c 181 0x2986 x
+softfloat.c 180 0x298a x
+
+./softfloat-macros:[++]
+softfloat-macros 50 0x2990
+
+./softfloat.c:[++]
+softfloat.c 187 0x2990 1
+softfloat.c 192 0x2990 2
+softfloat.c 204 0x2990 3
+softfloat.c 204 0x2990 4
+softfloat.c 187 0x299c x
+softfloat.c 187 0x29a0
+softfloat.c 192 0x29b0 x
+
+./softfloat-macros:[++]
+softfloat-macros 46 0x29b4 x
+softfloat-macros 46 0x29b4 1 x
+softfloat-macros 49 0x29c4
+softfloat-macros 50 0x29c4 1 x
+softfloat-macros 50 0x29ca
+softfloat-macros 50 0x29ce
+softfloat-macros 50 0x29d2
+softfloat-macros 49 0x29d6 x
+softfloat-macros 50 0x29da x
+softfloat-macros 53 0x29de x
+softfloat-macros 50 0x29e2 x
+softfloat-macros 49 0x29e6 x
+
+./softfloat.c:[++]
+softfloat.c 194 0x29f6 x
+softfloat.c 204 0x29fa
+softfloat.c 204 0x29fa 1
+softfloat.c 204 0x2a10
+softfloat.c 204 0x2a10 1
+softfloat.c 202 0x2a20 x
+softfloat.c 202 0x2a20 1
+softfloat.c 203 0x2a20 2 x
+softfloat.c 128 0x2a2a
+softfloat.c 203 0x2a2a 1
+softfloat.c 203 0x2a2a 2
+softfloat.c 203 0x2a34
+softfloat.c 202 0x2a38
+softfloat.c 203 0x2a3c
+softfloat.c 205 0x2a40 x
+softfloat.c 203 0x2a44 x
+softfloat.c 204 0x2a48 x
+softfloat.c 204 0x2a48 1 x
+softfloat.c 128 0x2a4c x
+softfloat.c 128 0x2a50
+softfloat.c 128 0x2a54
+softfloat.c 185 0x2a60 x
+softfloat.c 128 0x2a64
+softfloat.c 128 0x2a6a x
+softfloat.c 185 0x2a6e x
+softfloat.c 185 0x2a72
+softfloat.c 218 0x2a80 x
+softfloat.c 224 0x2a80 1 x
+
+./softfloat-macros:[++]
+softfloat-macros 552 0x2a86 x
+
+./softfloat.c:[++]
+softfloat.c 223 0x2a8a x
+softfloat.c 224 0x2a8e x
+softfloat.c 224 0x2a92
+softfloat.c 477 0x2aa0 x
+softfloat.c 481 0x2aa0 1
+softfloat.c 481 0x2aa0 2 x
+softfloat.c 482 0x2ab0
+softfloat.c 482 0x2ab6 x
+softfloat.c 482 0x2aba
+softfloat.c 484 0x2aca
+softfloat.c 484 0x2aca 1 x
+softfloat.c 484 0x2ad4
+softfloat.c 484 0x2ad4 1
+softfloat.c 483 0x2ad8
+softfloat.c 483 0x2adc x
+softfloat.c 481 0x2af0 x
+softfloat.c 482 0x2b00 x
+softfloat.c 70 0x2b20
+softfloat.c 81 0x2b20 1
+softfloat.c 734 0x2b20 2 x
+softfloat.c 81 0x2b2a x
+softfloat.c 81 0x2b2e
+softfloat.c 81 0x2b32
+softfloat.c 81 0x2b36
+
+./softfloat-macros:[++]
+softfloat-macros 50 0x2b3a
+
+./softfloat.c:[++]
+softfloat.c 744 0x2b3a 1 x
+softfloat.c 747 0x2b3a 2
+softfloat.c 761 0x2b3a 3
+softfloat.c 772 0x2b3a 4
+softfloat.c 788 0x2b3a 5
+softfloat.c 747 0x2b40 x
+softfloat.c 747 0x2b44
+softfloat.c 70 0x2b4a x
+softfloat.c 70 0x2b4e
+softfloat.c 745 0x2b4e 1
+softfloat.c 746 0x2b4e 2
+softfloat.c 745 0x2b54 x
+softfloat.c 746 0x2b58 x
+softfloat.c 748 0x2b58 1
+softfloat.c 762 0x2b58 2
+
+./softfloat-macros:[++]
+softfloat-macros 50 0x2b5e
+
+./softfloat.c:[++]
+softfloat.c 128 0x2b5e 1
+softfloat.c 748 0x2b5e 2 x
+softfloat.c 761 0x2b64 x
+softfloat.c 761 0x2b68
+softfloat.c 128 0x2b6e x
+softfloat.c 762 0x2b7a x
+softfloat.c 762 0x2b7e
+softfloat.c 793 0x2b8e
+softfloat.c 787 0x2b92
+softfloat.c 767 0x2b96 x
+softfloat.c 766 0x2b9a x
+softfloat.c 772 0x2b9e x
+
+./softfloat-macros:[++]
+softfloat-macros 46 0x2ba2 x
+softfloat-macros 46 0x2ba2 1 x
+
+./softfloat.c:[++]
+softfloat.c 770 0x2ba8
+softfloat.c 785 0x2ba8 1
+softfloat.c 770 0x2bae x
+softfloat.c 766 0x2bb2 x
+
+./softfloat-macros:[++]
+softfloat-macros 49 0x2bba
+softfloat-macros 50 0x2bba 1 x
+softfloat-macros 50 0x2bc0
+softfloat-macros 50 0x2bc4
+softfloat-macros 49 0x2bc8 x
+softfloat-macros 50 0x2bd2 x
+softfloat-macros 50 0x2bd6
+softfloat-macros 53 0x2bda x
+softfloat-macros 50 0x2bde x
+softfloat-macros 49 0x2be2 x
+
+./softfloat.c:[++]
+softfloat.c 748 0x2bf0 x
+softfloat.c 756 0x2bf6
+softfloat.c 785 0x2bf6 1
+softfloat.c 793 0x2c04
+softfloat.c 753 0x2c08 x
+softfloat.c 787 0x2c08 1
+softfloat.c 752 0x2c0e
+softfloat.c 752 0x2c0e 1
+softfloat.c 752 0x2c12 x
+softfloat.c 752 0x2c12 1 x
+
+./softfloat-macros:[++]
+softfloat-macros 46 0x2c16 x
+softfloat-macros 46 0x2c16 1 x
+
+./softfloat.c:[++]
+softfloat.c 756 0x2c1c x
+softfloat.c 752 0x2c20 x
+softfloat.c 752 0x2c20 1 x
+
+./softfloat-macros:[++]
+softfloat-macros 49 0x2c2a
+softfloat-macros 50 0x2c2a 1 x
+softfloat-macros 50 0x2c30
+softfloat-macros 50 0x2c34
+softfloat-macros 50 0x2c38
+softfloat-macros 49 0x2c3c x
+softfloat-macros 50 0x2c40 x
+softfloat-macros 53 0x2c44 x
+softfloat-macros 50 0x2c48 x
+softfloat-macros 49 0x2c4c x
+
+./softfloat.c:[++]
+softfloat.c 785 0x2c50 x
+softfloat.c 786 0x2c50 1
+softfloat.c 787 0x2c50 2 x
+softfloat.c 786 0x2c5a x
+softfloat.c 790 0x2c5a 1 x
+softfloat.c 786 0x2c60
+softfloat.c 788 0x2c64 x
+softfloat.c 788 0x2c68
+softfloat.c 788 0x2c6c
+softfloat.c 793 0x2c70 x
+softfloat.c 763 0x2c80 x
+softfloat.c 764 0x2c90 x
+softfloat.c 128 0x2c94
+softfloat.c 128 0x2c9a x
+softfloat.c 776 0x2cb0 x
+softfloat.c 780 0x2cc0 x
+softfloat.c 793 0x2cd0
+softfloat.c 781 0x2cda
+softfloat.c 781 0x2ce0 x
+softfloat.c 793 0x2ce0 1
+softfloat.c 781 0x2ce6
+softfloat.c 749 0x2cf0 x
+softfloat.c 750 0x2d00 x
+softfloat.c 763 0x2d10 x
+softfloat.c 777 0x2d20 x
+softfloat.c 777 0x2d24
+softfloat.c 778 0x2d34 x
+softfloat.c 780 0x2d50 x
+softfloat.c 780 0x2d50 1 x
+softfloat.c 780 0x2d56
+softfloat.c 780 0x2d5a
+softfloat.c 128 0x2d5e x
+softfloat.c 749 0x2d70 x
+softfloat.c 777 0x2d80 x
+softfloat.c 70 0x2d90
+softfloat.c 81 0x2d90 1
+softfloat.c 805 0x2d90 2 x
+softfloat.c 81 0x2d9a x
+softfloat.c 81 0x2d9e
+softfloat.c 70 0x2da2 x
+softfloat.c 81 0x2da6 x
+softfloat.c 81 0x2daa
+softfloat.c 70 0x2dae x
+softfloat.c 816 0x2dae 1
+softfloat.c 817 0x2dae 2
+softfloat.c 816 0x2db4 x
+
+./softfloat-macros:[++]
+softfloat-macros 50 0x2db8
+
+./softfloat.c:[++]
+softfloat.c 815 0x2db8 1 x
+softfloat.c 818 0x2db8 2
+softfloat.c 819 0x2db8 3
+softfloat.c 843 0x2db8 4
+softfloat.c 818 0x2dbe x
+softfloat.c 818 0x2dc2
+softfloat.c 817 0x2dc8 x
+softfloat.c 833 0x2dcc
+softfloat.c 851 0x2dcc 1
+softfloat.c 859 0x2dcc 2
+softfloat.c 862 0x2dcc 3
+softfloat.c 851 0x2dd6 x
+softfloat.c 862 0x2dda x
+softfloat.c 859 0x2dde x
+softfloat.c 819 0x2de2 x
+softfloat.c 819 0x2de6
+softfloat.c 825 0x2dec
+softfloat.c 835 0x2dec 1
+softfloat.c 835 0x2df0 x
+softfloat.c 833 0x2dfa x
+softfloat.c 833 0x2dfe
+softfloat.c 868 0x2e0e
+softfloat.c 838 0x2e12 x
+softfloat.c 837 0x2e16 x
+softfloat.c 843 0x2e1a x
+
+./softfloat-macros:[++]
+softfloat-macros 46 0x2e1e x
+softfloat-macros 46 0x2e1e 1 x
+
+./softfloat.c:[++]
+softfloat.c 837 0x2e24 x
+
+./softfloat-macros:[++]
+softfloat-macros 49 0x2e30
+softfloat-macros 50 0x2e30 1 x
+softfloat-macros 50 0x2e30 2
+softfloat-macros 50 0x2e3a
+softfloat-macros 50 0x2e3e
+softfloat-macros 53 0x2e42 x
+softfloat-macros 49 0x2e46 x
+softfloat-macros 50 0x2e4a x
+softfloat-macros 50 0x2e4e
+softfloat-macros 50 0x2e52
+softfloat-macros 49 0x2e56 x
+
+./softfloat.c:[++]
+softfloat.c 846 0x2e66 x
+softfloat.c 851 0x2e80 x
+softfloat.c 867 0x2e90
+softfloat.c 868 0x2e94
+softfloat.c 855 0x2e98
+softfloat.c 855 0x2e98 1
+softfloat.c 867 0x2e9c
+softfloat.c 856 0x2ea0 x
+softfloat.c 855 0x2ea4 x
+softfloat.c 855 0x2ea4 1 x
+
+./softfloat-macros:[++]
+softfloat-macros 46 0x2ea8 x
+softfloat-macros 46 0x2ea8 1 x
+
+./softfloat.c:[++]
+softfloat.c 855 0x2eae x
+softfloat.c 855 0x2eae 1 x
+
+./softfloat-macros:[++]
+softfloat-macros 49 0x2eba
+softfloat-macros 50 0x2eba 1 x
+softfloat-macros 50 0x2eba 2
+softfloat-macros 50 0x2ec4
+softfloat-macros 50 0x2ec8
+softfloat-macros 50 0x2ecc
+softfloat-macros 49 0x2ed0 x
+softfloat-macros 50 0x2ed4 x
+softfloat-macros 53 0x2ed8 x
+softfloat-macros 50 0x2edc x
+softfloat-macros 49 0x2ee0 x
+
+./softfloat.c:[++]
+softfloat.c 864 0x2ef0 x
+softfloat.c 868 0x2f00 x
+softfloat.c 867 0x2f06 x
+softfloat.c 820 0x2f20 x
+softfloat.c 829 0x2f30 x
+softfloat.c 829 0x2f34
+softfloat.c 825 0x2f3a x
+softfloat.c 825 0x2f3e
+softfloat.c 825 0x2f42
+softfloat.c 830 0x2f4a x
+softfloat.c 830 0x2f4e
+softfloat.c 128 0x2f5e
+softfloat.c 831 0x2f62
+softfloat.c 831 0x2f68 x
+softfloat.c 831 0x2f70
+softfloat.c 831 0x2f74
+softfloat.c 831 0x2f7c
+softfloat.c 128 0x2f80 x
+softfloat.c 834 0x2f90 x
+softfloat.c 128 0x2fa0
+softfloat.c 835 0x2fa4 x
+softfloat.c 128 0x2fa8 x
+softfloat.c 128 0x2fac
+softfloat.c 128 0x2fb2
+softfloat.c 852 0x2fc0 x
+softfloat.c 853 0x2fd0 x
+softfloat.c 821 0x2fe0 x
+softfloat.c 821 0x2fe4
+softfloat.c 823 0x2ff4 x
+softfloat.c 868 0x3016
+softfloat.c 864 0x301a
+softfloat.c 846 0x3036
+softfloat.c 867 0x303a
+softfloat.c 868 0x303e
+softfloat.c 834 0x3050 x
+softfloat.c 852 0x3060 x
+softfloat.c 821 0x3070 x
+softfloat.c 92 0x3080
+softfloat.c 878 0x3080 1 x
+softfloat.c 92 0x3084 x
+softfloat.c 92 0x3088
+softfloat.c 884 0x308c x
+softfloat.c 884 0x3090
+softfloat.c 888 0x30a0 x
+softfloat.c 885 0x30b0 x
+softfloat.c - 0x30b1
+
+
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/scripts/3_3_reloadable12.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/scripts/3_3_reloadable12.bcf
new file mode 100644
index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/scripts/3_3_reloadable12.bcf
@@ -0,0 +1,16 @@
+_reserved DMb 0x0 0x40000
+
+_reserved PM 0x0 0x930 //reserved for main elf
+
+_entry_point _Z13kernelWrapperPPvjjjj
+_symbol _Z13kernelWrapperPPvjjjj 0x930
+
+_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers
+_reserved DMb 0x7ba80 0x40 //reserved for sync buffer
+_stack DM_stack 0x7bac0 0x940 //stack for core
+_reserved DMb 0x7c400 0x40 //reserved for main elf heap
+//space for synopsys compiler at 0x7c440 0x880//heap
+_reserved DMb 0x40000 0x3b280
+
+_reserved DMb 0x7ccc0 0x3340
+_reserved DMb 0x80000 0x80000 // And everything else the core can't see
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/scripts/3_3_reloadable12.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/scripts/3_3_reloadable12.prx
new file mode 100644
index 0000000000000000000000000000000000000000..6d404d9379db68f0a4e0136123d66bec3f4591c3
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/scripts/3_3_reloadable12.prx
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/src/3_3_reloadable12.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/src/3_3_reloadable12.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7b211124072bdc08c2e3d113228cd9b65f8857a3
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable12/src/3_3_reloadable12.cc
@@ -0,0 +1,41 @@
+// Automatically generated processor driver using AIEngine tool-chain
+
+#include
+#include
+#include
+
+
+// Declare Kernel functions and initializers
+void superkernel_reduce_mean_c8(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+
+// Declare Kernel objects and external arrays
+
+
+void _b961_wrapper(void* args[])
+{
+ superkernel_reduce_mean_c8(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[2]),
+ *reinterpret_cast*>(args[1]));
+}
+
+using UniformKernelFunc = void (*)(void **);
+
+static UniformKernelFunc g_uniformKernelFuncs[1] = {
+ _b961_wrapper
+};
+
+__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut)
+{
+ uint32 idx = 0;
+ reinterpret_cast(args[idx])->acquire(numSyncIn > 0);
+ idx += (numSyncIn > 0) ? 1 : 0;
+ idx += numAsyncIn;
+
+ (*(g_uniformKernelFuncs[kernelId]))(args);
+
+ idx = 0;
+ reinterpret_cast(args[idx])->release(numSyncIn > 0);
+ idx += (numSyncIn > 0) ? 1 : 0;
+ idx += numAsyncIn;
+}
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.calltree
new file mode 100644
index 0000000000000000000000000000000000000000..78faacd04df9723eae105409bd66ec99011b5021
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.calltree
@@ -0,0 +1,88 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:01 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable3 ../Release/0_0_reloadable3.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable3.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3342 -pme
+
+
+// Release: ipp V-2024.06-TGT-241219
+
+_Z13kernelWrapperPPvjjjj
+ _Z13_b896_wrapperPPv (referenced text)
+ _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+ _Z13_b901_wrapperPPv (referenced text)
+ _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+ _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ _Z13_b906_wrapperPPv (referenced text)
+ _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+ _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+ _Z13_b881_wrapperPPv (referenced text)
+ _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+ _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+ _Z13_b891_wrapperPPv (referenced text)
+ _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+ _Z13_b919_wrapperPPv (referenced text)
+ _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+ _ZN12me_primitive10udiv_dstepEjjRjS0_
+ _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+
+
+Call tree stack and functions sizes:
+
+stack stack stack call func func function name
+ desc level level desc
+----- ----- ----- ----- ----- ----- --------------------------------------------------------------
+ 64 320 0 0 390 11754 _Z13kernelWrapperPPvjjjj
+ 0 192 1 1 36 4714 _Z13_b896_wrapperPPv
+ 64 192 1 2 568 4678 _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ 0 0 3 4 270 270 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+ 0 192 1 1 32 1252 _Z13_b901_wrapperPPv
+ 64 192 1 2 488 1220 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 128 2 3 62 304 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+ 64 64 3 4 162 186 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+ 0 0 4 5 24 24 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+ 0 0 2 4 56 56 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+ 128 128 2 3 114 428 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+ 0 0 3 4 314 314 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ 0 64 1 1 32 862 _Z13_b906_wrapperPPv
+ 64 64 1 2 488 830 _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 0 0 2 3 100 100 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+ 0 0 2 3 242 242 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+ 0 256 1 1 32 1394 _Z13_b881_wrapperPPv
+ 64 256 1 2 488 1362 _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 3 74 190 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+ 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+ 64 192 2 3 150 684 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+ 128 128 3 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+ 0 128 1 1 36 1092 _Z13_b891_wrapperPPv
+ 64 128 1 2 602 1056 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+ 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+ 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+ 0 192 1 1 36 2050 _Z13_b919_wrapperPPv
+ 128 192 1 2 478 2014 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 3 672 814 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+ 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_
+ 0 0 2 3 722 722 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+
+
+Maximum call level : 5
+Maximum stack level: 4
+Maximum stack size : 320
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.cmic2
new file mode 100644
index 0000000000000000000000000000000000000000..f6eec0e2b8bd493ef0112849914646d03f76489e
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.cmic2
@@ -0,0 +1,17226 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:03 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable3 me
+
+// Release: ipp V-2024.06-TGT-241219
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function kernelWrapper _Z13kernelWrapperPPvjjjj
+.src_ref 0 "0_0_reloadable3.cc" 82 first
+.src_ref 0 "0_0_reloadable3.cc" 84 60 first
+.src_ref 0 "0_0_reloadable3.cc" 84 110
+.src_ref 0 "0_0_reloadable3.cc" 86 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.function_start
+ 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2353 "01000001" // /* MW 5 */
+ 2354 "00100001" // /* MW 4 */
+ 2355 "11010001" // /* MW 3 */
+ 2356 "11000110" // /* MW 2 */
+ 2357 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 82
+ 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2359 "00000001" // /* MW 5 */
+ 2360 "00000000" // /* MW 4 */
+ 2361 "00000000" // /* MW 3 */
+ 2362 "00001000" // /* MW 2 */
+ 2363 "00000000" // /* MW 1 */
+ 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2365 "01110000" // /* MW 7 */
+ 2366 "11010000" // /* MW 6 */
+ 2367 "00101011" // /* MW 5 */
+ 2368 "00000000" // /* MW 4 */
+ 2369 "10110000" // /* MW 3 */
+ 2370 "11110011" // /* MW 2 */
+ 2371 "11111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 84 110
+.src_ref 0 "0_0_reloadable3.cc" 86 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2373 "01110000" // /* MW 7 */
+ 2374 "10010000" // /* MW 6 */
+ 2375 "11101000" // /* MW 5 */
+ 2376 "00000001" // /* MW 4 */
+ 2377 "10110000" // /* MW 3 */
+ 2378 "10000111" // /* MW 2 */
+ 2379 "11111111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 84 110 first
+ 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2381 "11100000" // /* MW 5 */
+ 2382 "11000001" // /* MW 4 */
+ 2383 "10110111" // /* MW 3 */
+ 2384 "00000110" // /* MW 2 */
+ 2385 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2387 "00100000" // /* MW 3 */
+ 2388 "10011000" // /* MW 2 */
+ 2389 "00011110" // /* MW 1 */
+ 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2391 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2393 "10000010" // /* MW 3 */
+ 2394 "01101000" // /* MW 2 */
+ 2395 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2397 "00110110" // /* MW 3 */
+ 2398 "00011110" // /* MW 2 */
+ 2399 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2401 "01110110" // /* MW 3 */
+ 2402 "00111110" // /* MW 2 */
+ 2403 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2405 "01010110" // /* MW 3 */
+ 2406 "11101110" // /* MW 2 */
+ 2407 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2409 "01110110" // /* MW 3 */
+ 2410 "00000111" // /* MW 2 */
+ 2411 "00000111" // /* MW 1 */
+ 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2413 "00000000" // /* MW 1 */
+ 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2415 "00000000" // /* MW 1 */
+ 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2417 "00000000" // /* MW 1 */
+ 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2419 "00000000" // /* MW 1 */
+ 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2421 "00000000" // /* MW 1 */
+ 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2423 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2425 "00110010" // /* MW 3 */
+ 2426 "01100011" // /* MW 2 */
+ 2427 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2429 "00110001" // /* MW 3 */
+ 2430 "11010110" // /* MW 2 */
+ 2431 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2433 "11111101" // /* MW 3 */
+ 2434 "11100010" // /* MW 2 */
+ 2435 "00010111" // /* MW 1 */
+ 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2437 "00000000" // /* MW 1 */
+ 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2439 "00000000" // /* MW 1 */
+ 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2441 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2443 "00011000" // /* MW 3 */
+ 2444 "10010111" // /* MW 2 */
+ 2445 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 60
+.src_ref 0 "0_0_reloadable3.cc" 90 7
+ 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2447 "00001001" // /* MW 3 */
+ 2448 "00100100" // /* MW 2 */
+ 2449 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 60 first
+ 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2451 "00101101" // /* MW 3 */
+ 2452 "00101001" // /* MW 2 */
+ 2453 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 60
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2455 "00100000" // /* MW 3 */
+ 2456 "10001010" // /* MW 2 */
+ 2457 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 60
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2459 "10001011" // /* MW 5 */
+ 2460 "11011000" // /* MW 4 */
+ 2461 "11011111" // /* MW 3 */
+ 2462 "01001110" // /* MW 2 */
+ 2463 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2465 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2467 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2469 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2471 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 110
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2473 "00000101" // /* MW 3 */
+ 2474 "00100110" // /* MW 2 */
+ 2475 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 110
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2477 "11111100" // /* MW 3 */
+ 2478 "11110100" // /* MW 2 */
+ 2479 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2481 "00000000" // /* MW 7 */
+ 2482 "11000001" // /* MW 6 */
+ 2483 "10110100" // /* MW 5 */
+ 2484 "00000011" // /* MW 4 */
+ 2485 "10110000" // /* MW 3 */
+ 2486 "01101010" // /* MW 2 */
+ 2487 "11111110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2489 "01110110" // /* MW 3 */
+ 2490 "00011110" // /* MW 2 */
+ 2491 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2493 "10110110" // /* MW 3 */
+ 2494 "00111110" // /* MW 2 */
+ 2495 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2497 "10010110" // /* MW 3 */
+ 2498 "11101110" // /* MW 2 */
+ 2499 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2501 "01110110" // /* MW 3 */
+ 2502 "00000111" // /* MW 2 */
+ 2503 "00000111" // /* MW 1 */
+ 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2505 "00000000" // /* MW 1 */
+ 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2507 "00000000" // /* MW 1 */
+ 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2509 "00000000" // /* MW 1 */
+ 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2511 "00000000" // /* MW 1 */
+ 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2513 "00000000" // /* MW 1 */
+ 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2515 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2517 "01010010" // /* MW 3 */
+ 2518 "11100111" // /* MW 2 */
+ 2519 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2521 "01110001" // /* MW 3 */
+ 2522 "11010110" // /* MW 2 */
+ 2523 "00001111" // /* MW 1 */
+ 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2525 "00000000" // /* MW 1 */
+ 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2527 "00000000" // /* MW 1 */
+ 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2529 "00000000" // /* MW 1 */
+ 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2531 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2533 "00011000" // /* MW 3 */
+ 2534 "00010111" // /* MW 2 */
+ 2535 "00010101" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 90 7 first
+ 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2537 "00101101" // /* MW 3 */
+ 2538 "00100011" // /* MW 2 */
+ 2539 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 90 7
+ 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2541 "10100000" // /* MW 3 */
+ 2542 "10001000" // /* MW 2 */
+ 2543 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 90 7
+ 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2545 "00000000" // /* MW 5 */
+ 2546 "11001001" // /* MW 4 */
+ 2547 "11001110" // /* MW 3 */
+ 2548 "00000111" // /* MW 2 */
+ 2549 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 90 7
+ 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2551 "00101011" // /* MW 5 */
+ 2552 "11010100" // /* MW 4 */
+ 2553 "11011111" // /* MW 3 */
+ 2554 "00010011" // /* MW 2 */
+ 2555 "11100000" // /* MW 1 */
+ 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2557 "00000000" // /* MW 1 */
+ 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2559 "00000000" // /* MW 1 */
+ 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2561 "00000000" // /* MW 1 */
+ 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2563 "00000000" // /* MW 1 */
+ 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2565 "00000000" // /* MW 1 */
+ 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2567 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 90 4
+.no_stack_arguments
+ 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */
+ 2569 "01000000" // /* MW 3 */
+ 2570 "00110000" // /* MW 2 */
+ 2571 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 93 60
+.src_ref 0 "0_0_reloadable3.cc" 95 60
+.delay_slot
+ 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2573 "11000000" // /* MW 3 */
+ 2574 "01100000" // /* MW 2 */
+ 2575 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2577 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2579 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2581 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2583 "01111110" // /* MW 9 */
+ 2584 "10100101" // /* MW 8 */
+ 2585 "00000001" // /* MW 7 */
+ 2586 "00000000" // /* MW 6 */
+ 2587 "00010000" // /* MW 5 */
+ 2588 "00000000" // /* MW 4 */
+ 2589 "11110000" // /* MW 3 */
+ 2590 "00101100" // /* MW 2 */
+ 2591 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 93 60 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+.src_ref 1 "io_buffer_main.h" 440 8
+.return_address
+ 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2593 "00001010" // /* MW 5 */
+ 2594 "01000000" // /* MW 4 */
+ 2595 "11010000" // /* MW 3 */
+ 2596 "11000110" // /* MW 2 */
+ 2597 "11100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2599 "01010001" // /* MW 3 */
+ 2600 "11101011" // /* MW 2 */
+ 2601 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 95 60
+ 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2603 "01000001" // /* MW 3 */
+ 2604 "11101100" // /* MW 2 */
+ 2605 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2607 "00101001" // /* MW 3 */
+ 2608 "11110000" // /* MW 2 */
+ 2609 "00000111" // /* MW 1 */
+ 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2611 "00000000" // /* MW 1 */
+ 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2613 "00000000" // /* MW 1 */
+ 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+ 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2617 "10001000" // /* MW 3 */
+ 2618 "01101000" // /* MW 2 */
+ 2619 "00011001" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+ 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2621 "00110110" // /* MW 3 */
+ 2622 "00000110" // /* MW 2 */
+ 2623 "00000001" // /* MW 1 */
+ 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2625 "00000000" // /* MW 1 */
+ 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2627 "00000000" // /* MW 1 */
+ 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2629 "00000000" // /* MW 1 */
+ 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2631 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2633 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2635 "00011100" // /* MW 3 */
+ 2636 "10100000" // /* MW 2 */
+ 2637 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2639 "00001000" // /* MW 3 */
+ 2640 "01010101" // /* MW 2 */
+ 2641 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2643 "01000001" // /* MW 5 */
+ 2644 "10101111" // /* MW 4 */
+ 2645 "11011101" // /* MW 3 */
+ 2646 "11000110" // /* MW 2 */
+ 2647 "00111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 95 60 first
+ 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2649 "01010110" // /* MW 3 */
+ 2650 "00000010" // /* MW 2 */
+ 2651 "00000111" // /* MW 1 */
+ 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2653 "00000000" // /* MW 1 */
+ 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2655 "00000000" // /* MW 1 */
+ 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2657 "00000000" // /* MW 1 */
+ 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2659 "00000000" // /* MW 1 */
+ 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2661 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2663 "00010001" // /* MW 3 */
+ 2664 "00100111" // /* MW 2 */
+ 2665 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2667 "00010000" // /* MW 5 */
+ 2668 "11010010" // /* MW 4 */
+ 2669 "01000000" // /* MW 3 */
+ 2670 "01100110" // /* MW 2 */
+ 2671 "10001100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+.src_ref 1 "io_buffer_compiler.h" 606 22 first
+ 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2673 "01100011" // /* MW 5 */
+ 2674 "11101100" // /* MW 4 */
+ 2675 "11010011" // /* MW 3 */
+ 2676 "11000110" // /* MW 2 */
+ 2677 "00000000" // /* MW 1 */
+ 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2679 "00000000" // /* MW 1 */
+ 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2681 "00000000" // /* MW 1 */
+ 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2683 "00000000" // /* MW 1 */
+ 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2685 "00000000" // /* MW 1 */
+ 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2687 "00000000" // /* MW 1 */
+ 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2689 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+ 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2691 "00001000" // /* MW 3 */
+ 2692 "01010101" // /* MW 2 */
+ 2693 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 98
+ 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2695 "00111001" // /* MW 3 */
+ 2696 "11111100" // /* MW 2 */
+ 2697 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2699 "00110110" // /* MW 3 */
+ 2700 "11110110" // /* MW 2 */
+ 2701 "00000000" // /* MW 1 */
+ 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2703 "10011001" // /* MW 3 */
+ 2704 "11110111" // /* MW 2 */
+ 2705 "00000111" // /* MW 1 */
+ 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2707 "11110001" // /* MW 3 */
+ 2708 "11111001" // /* MW 2 */
+ 2709 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 98 first
+ 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2711 "00000001" // /* MW 5 */
+ 2712 "00000000" // /* MW 4 */
+ 2713 "00000000" // /* MW 3 */
+ 2714 "11111000" // /* MW 2 */
+ 2715 "11111111" // /* MW 1 */
+ 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2717 "00000000" // /* MW 1 */
+ 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2719 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 98
+ 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 2721 "00000000" // /* MW 3 */
+ 2722 "00101000" // /* MW 2 */
+ 2723 "00010000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2725 "00011100" // /* MW 3 */
+ 2726 "11100000" // /* MW 2 */
+ 2727 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+.delay_slot
+ 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2729 "00010001" // /* MW 3 */
+ 2730 "00100001" // /* MW 2 */
+ 2731 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2733 "00000010" // /* MW 3 */
+ 2734 "01100001" // /* MW 2 */
+ 2735 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 22
+.delay_slot
+ 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2737 "00010001" // /* MW 3 */
+ 2738 "11110110" // /* MW 2 */
+ 2739 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+ 2741 "00000000" // /* MW 1 */
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 432 first
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.function_start
+ 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2753 "01111000" // /* MW 9 */
+ 2754 "01100000" // /* MW 8 */
+ 2755 "01001001" // /* MW 7 */
+ 2756 "10001000" // /* MW 6 */
+ 2757 "01000000" // /* MW 5 */
+ 2758 "00000000" // /* MW 4 */
+ 2759 "11010000" // /* MW 3 */
+ 2760 "10000101" // /* MW 2 */
+ 2761 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2763 "01001000" // /* MW 9 */
+ 2764 "10000010" // /* MW 8 */
+ 2765 "00110000" // /* MW 7 */
+ 2766 "11101001" // /* MW 6 */
+ 2767 "01010111" // /* MW 5 */
+ 2768 "00111110" // /* MW 4 */
+ 2769 "11010000" // /* MW 3 */
+ 2770 "10000001" // /* MW 2 */
+ 2771 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 432
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+ 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2773 "01110000" // /* MW 9 */
+ 2774 "00000000" // /* MW 8 */
+ 2775 "00000000" // /* MW 7 */
+ 2776 "00000000" // /* MW 6 */
+ 2777 "00000010" // /* MW 5 */
+ 2778 "00000000" // /* MW 4 */
+ 2779 "00000000" // /* MW 3 */
+ 2780 "10000001" // /* MW 2 */
+ 2781 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+ 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2783 "01011000" // /* MW 11 */
+ 2784 "00010000" // /* MW 10 */
+ 2785 "00000000" // /* MW 9 */
+ 2786 "00101000" // /* MW 8 */
+ 2787 "00000000" // /* MW 7 */
+ 2788 "10000001" // /* MW 6 */
+ 2789 "10110101" // /* MW 5 */
+ 2790 "11111101" // /* MW 4 */
+ 2791 "00000111" // /* MW 3 */
+ 2792 "10000110" // /* MW 2 */
+ 2793 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2795 "01011000" // /* MW 11 */
+ 2796 "00001111" // /* MW 10 */
+ 2797 "10001000" // /* MW 9 */
+ 2798 "10101010" // /* MW 8 */
+ 2799 "01010111" // /* MW 7 */
+ 2800 "10111111" // /* MW 6 */
+ 2801 "11010101" // /* MW 5 */
+ 2802 "11111001" // /* MW 4 */
+ 2803 "00000111" // /* MW 3 */
+ 2804 "01100011" // /* MW 2 */
+ 2805 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2807 "00000010" // /* MW 5 */
+ 2808 "01100000" // /* MW 4 */
+ 2809 "10110000" // /* MW 3 */
+ 2810 "10111110" // /* MW 2 */
+ 2811 "11111110" // /* MW 1 */
+ 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2815 "00101001" // /* MW 3 */
+ 2816 "00011100" // /* MW 2 */
+ 2817 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2819 "00001001" // /* MW 3 */
+ 2820 "00011100" // /* MW 2 */
+ 2821 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2823 "00101110" // /* MW 3 */
+ 2824 "00011100" // /* MW 2 */
+ 2825 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2827 "00001110" // /* MW 3 */
+ 2828 "00011100" // /* MW 2 */
+ 2829 "00000000" // /* MW 1 */
+ 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2831 "00000000" // /* MW 1 */
+ 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2833 "00000000" // /* MW 1 */
+ 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2835 "00000000" // /* MW 1 */
+ 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2837 "00000000" // /* MW 1 */
+ 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2839 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2841 "00101001" // /* MW 3 */
+ 2842 "00011100" // /* MW 2 */
+ 2843 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2845 "00001001" // /* MW 3 */
+ 2846 "00011100" // /* MW 2 */
+ 2847 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2849 "00101110" // /* MW 3 */
+ 2850 "00011100" // /* MW 2 */
+ 2851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2853 "00001110" // /* MW 3 */
+ 2854 "00011100" // /* MW 2 */
+ 2855 "00000000" // /* MW 1 */
+ 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2857 "00000000" // /* MW 1 */
+ 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2859 "00000000" // /* MW 1 */
+ 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2861 "00000000" // /* MW 1 */
+ 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2863 "00000000" // /* MW 1 */
+ 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2865 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2867 "00101001" // /* MW 3 */
+ 2868 "00011100" // /* MW 2 */
+ 2869 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2871 "00001001" // /* MW 3 */
+ 2872 "00011100" // /* MW 2 */
+ 2873 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2875 "00001110" // /* MW 3 */
+ 2876 "00000100" // /* MW 2 */
+ 2877 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2879 "00101110" // /* MW 3 */
+ 2880 "00010100" // /* MW 2 */
+ 2881 "00000000" // /* MW 1 */
+ 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2883 "00000000" // /* MW 1 */
+ 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2885 "00000000" // /* MW 1 */
+ 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2887 "00000000" // /* MW 1 */
+ 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2889 "00000000" // /* MW 1 */
+ 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2891 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2893 "00001001" // /* MW 3 */
+ 2894 "00000100" // /* MW 2 */
+ 2895 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2897 "00101001" // /* MW 3 */
+ 2898 "00010100" // /* MW 2 */
+ 2899 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 40 first
+ 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2901 "10101010" // /* MW 3 */
+ 2902 "11011101" // /* MW 2 */
+ 2903 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 447 34 first
+ 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2905 "00101010" // /* MW 3 */
+ 2906 "00011110" // /* MW 2 */
+ 2907 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 448 34 first
+ 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2909 "11001010" // /* MW 3 */
+ 2910 "10111101" // /* MW 2 */
+ 2911 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2913 "11111010" // /* MW 3 */
+ 2914 "11111101" // /* MW 2 */
+ 2915 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+ 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2917 "01101010" // /* MW 3 */
+ 2918 "00001010" // /* MW 2 */
+ 2919 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 20 first
+ 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2921 "11101010" // /* MW 3 */
+ 2922 "10101100" // /* MW 2 */
+ 2923 "00000010" // /* MW 1 */
+ 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2925 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+ 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2927 "00011101" // /* MW 3 */
+ 2928 "01000010" // /* MW 2 */
+ 2929 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+ 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2931 "00000001" // /* MW 5 */
+ 2932 "00110001" // /* MW 4 */
+ 2933 "11111001" // /* MW 3 */
+ 2934 "00100000" // /* MW 2 */
+ 2935 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2937 "01011101" // /* MW 3 */
+ 2938 "10100100" // /* MW 2 */
+ 2939 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2941 "01000111" // /* MW 3 */
+ 2942 "11110110" // /* MW 2 */
+ 2943 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2945 "00111001" // /* MW 5 */
+ 2946 "10110111" // /* MW 4 */
+ 2947 "01000000" // /* MW 3 */
+ 2948 "01001010" // /* MW 2 */
+ 2949 "11000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2951 "00100010" // /* MW 3 */
+ 2952 "01111011" // /* MW 2 */
+ 2953 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+ 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2955 "01100111" // /* MW 3 */
+ 2956 "11001100" // /* MW 2 */
+ 2957 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+ 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2959 "00000100" // /* MW 3 */
+ 2960 "10110111" // /* MW 2 */
+ 2961 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+ 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2963 "01000001" // /* MW 5 */
+ 2964 "10111011" // /* MW 4 */
+ 2965 "10111100" // /* MW 3 */
+ 2966 "11101011" // /* MW 2 */
+ 2967 "01111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+ 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2969 "00000100" // /* MW 5 */
+ 2970 "10011011" // /* MW 4 */
+ 2971 "10110011" // /* MW 3 */
+ 2972 "10111110" // /* MW 2 */
+ 2973 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+ 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 2975 "00000001" // /* MW 5 */
+ 2976 "01000000" // /* MW 4 */
+ 2977 "11111000" // /* MW 3 */
+ 2978 "00000101" // /* MW 2 */
+ 2979 "11001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+.delay_slot
+ 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2981 "01000111" // /* MW 3 */
+ 2982 "10110110" // /* MW 2 */
+ 2983 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+.delay_slot
+ 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2985 "01000100" // /* MW 3 */
+ 2986 "01110001" // /* MW 2 */
+ 2987 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.delay_slot
+ 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2989 "01011101" // /* MW 3 */
+ 2990 "11111100" // /* MW 2 */
+ 2991 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11 first
+.delay_slot
+ 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2993 "01001101" // /* MW 3 */
+ 2994 "11101000" // /* MW 2 */
+ 2995 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.delay_slot
+ 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2997 "00110010" // /* MW 3 */
+ 2998 "10001100" // /* MW 2 */
+ 2999 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+ 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 3001 "00000001" // /* MW 5 */
+ 3002 "01000000" // /* MW 4 */
+ 3003 "11111000" // /* MW 3 */
+ 3004 "00000101" // /* MW 2 */
+ 3005 "11011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3007 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3013 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3015 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */
+ 3017 "00100000" // /* MW 9 */
+ 3018 "00000000" // /* MW 8 */
+ 3019 "00000000" // /* MW 7 */
+ 3020 "10000100" // /* MW 6 */
+ 3021 "00000001" // /* MW 5 */
+ 3022 "00000000" // /* MW 4 */
+ 3023 "00000000" // /* MW 3 */
+ 3024 "00101111" // /* MW 2 */
+ 3025 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3027 "01011000" // /* MW 9 */
+ 3028 "00001100" // /* MW 8 */
+ 3029 "10001000" // /* MW 7 */
+ 3030 "10101011" // /* MW 6 */
+ 3031 "01010111" // /* MW 5 */
+ 3032 "00111110" // /* MW 4 */
+ 3033 "00000000" // /* MW 3 */
+ 3034 "00011010" // /* MW 2 */
+ 3035 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3037 "01000001" // /* MW 5 */
+ 3038 "00100000" // /* MW 4 */
+ 3039 "00100001" // /* MW 3 */
+ 3040 "01000010" // /* MW 2 */
+ 3041 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.delay_slot
+ 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3043 "00001101" // /* MW 3 */
+ 3044 "00011010" // /* MW 2 */
+ 3045 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.delay_slot
+ 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3047 "00111101" // /* MW 3 */
+ 3048 "00001110" // /* MW 2 */
+ 3049 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3051 "11100010" // /* MW 5 */
+ 3052 "10010001" // /* MW 4 */
+ 3053 "11111111" // /* MW 3 */
+ 3054 "00101100" // /* MW 2 */
+ 3055 "00000000" // /* MW 1 */
+.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3057 "01011000" // /* MW 11 */
+ 3058 "11111100" // /* MW 10 */
+ 3059 "10001111" // /* MW 9 */
+ 3060 "10001000" // /* MW 8 */
+ 3061 "01010000" // /* MW 7 */
+ 3062 "00000001" // /* MW 6 */
+ 3063 "00001011" // /* MW 5 */
+ 3064 "10000010" // /* MW 4 */
+ 3065 "10000001" // /* MW 3 */
+ 3066 "00000010" // /* MW 2 */
+ 3067 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3069 "01011000" // /* MW 9 */
+ 3070 "00001100" // /* MW 8 */
+ 3071 "10001000" // /* MW 7 */
+ 3072 "00001011" // /* MW 6 */
+ 3073 "10100000" // /* MW 5 */
+ 3074 "00000001" // /* MW 4 */
+ 3075 "11100000" // /* MW 3 */
+ 3076 "00011000" // /* MW 2 */
+ 3077 "00100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3079 "01011000" // /* MW 9 */
+ 3080 "00000001" // /* MW 8 */
+ 3081 "11101000" // /* MW 7 */
+ 3082 "10101001" // /* MW 6 */
+ 3083 "01010111" // /* MW 5 */
+ 3084 "00111110" // /* MW 4 */
+ 3085 "00000000" // /* MW 3 */
+ 3086 "00000010" // /* MW 2 */
+ 3087 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+ 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 3089 "00000000" // /* MW 15 */
+ 3090 "00000000" // /* MW 14 */
+ 3091 "01011000" // /* MW 13 */
+ 3092 "00000011" // /* MW 12 */
+ 3093 "10101000" // /* MW 11 */
+ 3094 "11101001" // /* MW 10 */
+ 3095 "01110001" // /* MW 9 */
+ 3096 "00000000" // /* MW 8 */
+ 3097 "01011011" // /* MW 7 */
+ 3098 "00000001" // /* MW 6 */
+ 3099 "00100000" // /* MW 5 */
+ 3100 "00000000" // /* MW 4 */
+ 3101 "11110000" // /* MW 3 */
+ 3102 "00101100" // /* MW 2 */
+ 3103 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.src_ref 2 "conv2d_bf16_params.h" 495 68 first
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+ 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3105 "01011000" // /* MW 9 */
+ 3106 "00111100" // /* MW 8 */
+ 3107 "00000000" // /* MW 7 */
+ 3108 "00111100" // /* MW 6 */
+ 3109 "10110011" // /* MW 5 */
+ 3110 "00011011" // /* MW 4 */
+ 3111 "01010000" // /* MW 3 */
+ 3112 "11000101" // /* MW 2 */
+ 3113 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24 first
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+ 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3115 "01011000" // /* MW 9 */
+ 3116 "11001101" // /* MW 8 */
+ 3117 "10000111" // /* MW 7 */
+ 3118 "00010010" // /* MW 6 */
+ 3119 "00101101" // /* MW 5 */
+ 3120 "00000011" // /* MW 4 */
+ 3121 "01010000" // /* MW 3 */
+ 3122 "00000101" // /* MW 2 */
+ 3123 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18 first
+.src_ref 2 "conv2d_bf16_params.h" 496 68
+.src_ref 2 "conv2d_bf16_params.h" 504 35
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 578 47
+ 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3125 "01011000" // /* MW 9 */
+ 3126 "00110111" // /* MW 8 */
+ 3127 "10000000" // /* MW 7 */
+ 3128 "10010001" // /* MW 6 */
+ 3129 "11011010" // /* MW 5 */
+ 3130 "00111011" // /* MW 4 */
+ 3131 "00000000" // /* MW 3 */
+ 3132 "01010111" // /* MW 2 */
+ 3133 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.src_ref 2 "conv2d_bf16_params.h" 504 45 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+.src_ref 2 "conv2d_bf16_params.h" 519 42
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+ 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3135 "01011000" // /* MW 9 */
+ 3136 "10111100" // /* MW 8 */
+ 3137 "00000111" // /* MW 7 */
+ 3138 "00111101" // /* MW 6 */
+ 3139 "10110000" // /* MW 5 */
+ 3140 "00101011" // /* MW 4 */
+ 3141 "00000000" // /* MW 3 */
+ 3142 "00000011" // /* MW 2 */
+ 3143 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 492 25 first
+.src_ref 2 "conv2d_bf16_params.h" 497 46
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+ 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3145 "01011000" // /* MW 9 */
+ 3146 "01110000" // /* MW 8 */
+ 3147 "10000000" // /* MW 7 */
+ 3148 "01101100" // /* MW 6 */
+ 3149 "01101100" // /* MW 5 */
+ 3150 "00011111" // /* MW 4 */
+ 3151 "00000000" // /* MW 3 */
+ 3152 "00010000" // /* MW 2 */
+ 3153 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 520 34 first
+ 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3155 "01011101" // /* MW 5 */
+ 3156 "00011110" // /* MW 4 */
+ 3157 "00001000" // /* MW 3 */
+ 3158 "10010010" // /* MW 2 */
+ 3159 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+ 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3161 "01011001" // /* MW 9 */
+ 3162 "00110001" // /* MW 8 */
+ 3163 "10000000" // /* MW 7 */
+ 3164 "01101111" // /* MW 6 */
+ 3165 "01100001" // /* MW 5 */
+ 3166 "00101101" // /* MW 4 */
+ 3167 "10110000" // /* MW 3 */
+ 3168 "01011010" // /* MW 2 */
+ 3169 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+.src_ref 2 "conv2d_bf16_params.h" 507 42 first
+ 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3171 "00000101" // /* MW 5 */
+ 3172 "00011111" // /* MW 4 */
+ 3173 "00111100" // /* MW 3 */
+ 3174 "10111010" // /* MW 2 */
+ 3175 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 99 first
+ 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3177 "00010001" // /* MW 3 */
+ 3178 "11000010" // /* MW 2 */
+ 3179 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 610 64
+.src_ref 2 "conv2d_bf16_params.h" 709 96
+ 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3181 "00011101" // /* MW 5 */
+ 3182 "10100000" // /* MW 4 */
+ 3183 "11110000" // /* MW 3 */
+ 3184 "11000011" // /* MW 2 */
+ 3185 "10001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+ 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3187 "00100001" // /* MW 3 */
+ 3188 "10100011" // /* MW 2 */
+ 3189 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96 first
+ 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3191 "00011101" // /* MW 3 */
+ 3192 "11111110" // /* MW 2 */
+ 3193 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 506 48
+.src_ref 2 "conv2d_bf16_params.h" 519 42 first
+ 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3195 "01011001" // /* MW 9 */
+ 3196 "01010111" // /* MW 8 */
+ 3197 "10000000" // /* MW 7 */
+ 3198 "11101110" // /* MW 6 */
+ 3199 "11110001" // /* MW 5 */
+ 3200 "00111011" // /* MW 4 */
+ 3201 "00110000" // /* MW 3 */
+ 3202 "01111110" // /* MW 2 */
+ 3203 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 68 first
+.src_ref 2 "conv2d_bf16_params.h" 504 35 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68
+ 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3205 "01011000" // /* MW 9 */
+ 3206 "10110010" // /* MW 8 */
+ 3207 "10000111" // /* MW 7 */
+ 3208 "00111101" // /* MW 6 */
+ 3209 "00110000" // /* MW 5 */
+ 3210 "00101111" // /* MW 4 */
+ 3211 "01010000" // /* MW 3 */
+ 3212 "01010101" // /* MW 2 */
+ 3213 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3215 "01111011" // /* MW 5 */
+ 3216 "11001100" // /* MW 4 */
+ 3217 "10111001" // /* MW 3 */
+ 3218 "01001110" // /* MW 2 */
+ 3219 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53 first
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3221 "01011000" // /* MW 9 */
+ 3222 "11110110" // /* MW 8 */
+ 3223 "00000000" // /* MW 7 */
+ 3224 "00101101" // /* MW 6 */
+ 3225 "01101011" // /* MW 5 */
+ 3226 "00111111" // /* MW 4 */
+ 3227 "11100000" // /* MW 3 */
+ 3228 "01010100" // /* MW 2 */
+ 3229 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 46 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3231 "01011000" // /* MW 9 */
+ 3232 "01010000" // /* MW 8 */
+ 3233 "10000111" // /* MW 7 */
+ 3234 "00010000" // /* MW 6 */
+ 3235 "00111000" // /* MW 5 */
+ 3236 "00100111" // /* MW 4 */
+ 3237 "01010000" // /* MW 3 */
+ 3238 "01000011" // /* MW 2 */
+ 3239 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3241 "01100111" // /* MW 3 */
+ 3242 "11111110" // /* MW 2 */
+ 3243 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3245 "01100111" // /* MW 3 */
+ 3246 "11100000" // /* MW 2 */
+ 3247 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3249 "00000101" // /* MW 3 */
+ 3250 "11110111" // /* MW 2 */
+ 3251 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3253 "01010100" // /* MW 3 */
+ 3254 "11101011" // /* MW 2 */
+ 3255 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3257 "01100001" // /* MW 5 */
+ 3258 "10100000" // /* MW 4 */
+ 3259 "11011000" // /* MW 3 */
+ 3260 "10100011" // /* MW 2 */
+ 3261 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25 first
+.src_ref 2 "conv2d_bf16_params.h" 507 34
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3263 "01001001" // /* MW 9 */
+ 3264 "10000000" // /* MW 8 */
+ 3265 "11001111" // /* MW 7 */
+ 3266 "01101111" // /* MW 6 */
+ 3267 "00101001" // /* MW 5 */
+ 3268 "00011111" // /* MW 4 */
+ 3269 "10110000" // /* MW 3 */
+ 3270 "01000010" // /* MW 2 */
+ 3271 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47 first
+ 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3273 "00111011" // /* MW 5 */
+ 3274 "01000110" // /* MW 4 */
+ 3275 "00111111" // /* MW 3 */
+ 3276 "11101010" // /* MW 2 */
+ 3277 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3279 "01010000" // /* MW 7 */
+ 3280 "10101000" // /* MW 6 */
+ 3281 "00000000" // /* MW 5 */
+ 3282 "00000010" // /* MW 4 */
+ 3283 "00110000" // /* MW 3 */
+ 3284 "01101010" // /* MW 2 */
+ 3285 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77 first
+.src_ref 2 "conv2d_bf16_params.h" 509 19 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3287 "01111000" // /* MW 11 */
+ 3288 "11001110" // /* MW 10 */
+ 3289 "00001101" // /* MW 9 */
+ 3290 "00101100" // /* MW 8 */
+ 3291 "10110000" // /* MW 7 */
+ 3292 "10100111" // /* MW 6 */
+ 3293 "11110101" // /* MW 5 */
+ 3294 "11100111" // /* MW 4 */
+ 3295 "01010111" // /* MW 3 */
+ 3296 "01001001" // /* MW 2 */
+ 3297 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 19 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3299 "00010101" // /* MW 3 */
+ 3300 "11100011" // /* MW 2 */
+ 3301 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3303 "10000001" // /* MW 3 */
+ 3304 "10110111" // /* MW 2 */
+ 3305 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 47 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3307 "10010000" // /* MW 3 */
+ 3308 "10110000" // /* MW 2 */
+ 3309 "00010100" // /* MW 1 */
+ 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3311 "00000000" // /* MW 1 */
+ 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3313 "00000000" // /* MW 1 */
+ 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3315 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 57 first
+ 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3317 "00100001" // /* MW 3 */
+ 3318 "11100101" // /* MW 2 */
+ 3319 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+ 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3321 "01010001" // /* MW 3 */
+ 3322 "11001010" // /* MW 2 */
+ 3323 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 48 first
+ 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3325 "01001010" // /* MW 3 */
+ 3326 "10101010" // /* MW 2 */
+ 3327 "00000010" // /* MW 1 */
+ 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3329 "00000000" // /* MW 1 */
+ 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3331 "00000000" // /* MW 1 */
+ 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3333 "00000000" // /* MW 1 */
+ 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3335 "00000000" // /* MW 1 */
+ 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3337 "00000000" // /* MW 1 */
+ 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3339 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 62
+ 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3341 "11100001" // /* MW 3 */
+ 3342 "10100100" // /* MW 2 */
+ 3343 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+ 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3345 "10111110" // /* MW 3 */
+ 3346 "10100101" // /* MW 2 */
+ 3347 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45 first
+ 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3349 "00101101" // /* MW 3 */
+ 3350 "10100100" // /* MW 2 */
+ 3351 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3353 "00000000" // /* MW 5 */
+ 3354 "10100000" // /* MW 4 */
+ 3355 "00001101" // /* MW 3 */
+ 3356 "00000001" // /* MW 2 */
+ 3357 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3359 "00100000" // /* MW 3 */
+ 3360 "11100101" // /* MW 2 */
+ 3361 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3363 "00000000" // /* MW 5 */
+ 3364 "10100000" // /* MW 4 */
+ 3365 "00001101" // /* MW 3 */
+ 3366 "11111111" // /* MW 2 */
+ 3367 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 642 99
+ 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3369 "11000001" // /* MW 5 */
+ 3370 "00111111" // /* MW 4 */
+ 3371 "10011001" // /* MW 3 */
+ 3372 "11100100" // /* MW 2 */
+ 3373 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 19 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3375 "11100001" // /* MW 5 */
+ 3376 "10111111" // /* MW 4 */
+ 3377 "10111000" // /* MW 3 */
+ 3378 "11100010" // /* MW 2 */
+ 3379 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 512 64 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122 first
+ 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3381 "00111011" // /* MW 5 */
+ 3382 "11001110" // /* MW 4 */
+ 3383 "00111001" // /* MW 3 */
+ 3384 "11101110" // /* MW 2 */
+ 3385 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3387 "00110001" // /* MW 3 */
+ 3388 "10110101" // /* MW 2 */
+ 3389 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3391 "10101101" // /* MW 3 */
+ 3392 "00101001" // /* MW 2 */
+ 3393 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+ 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3395 "01100101" // /* MW 3 */
+ 3396 "10110101" // /* MW 2 */
+ 3397 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 36 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68 first
+ 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3399 "00100000" // /* MW 5 */
+ 3400 "01101001" // /* MW 4 */
+ 3401 "00111111" // /* MW 3 */
+ 3402 "01101010" // /* MW 2 */
+ 3403 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 65 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62 first
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3405 "10101000" // /* MW 9 */
+ 3406 "10101000" // /* MW 8 */
+ 3407 "11001110" // /* MW 7 */
+ 3408 "01101111" // /* MW 6 */
+ 3409 "01001001" // /* MW 5 */
+ 3410 "00110111" // /* MW 4 */
+ 3411 "01010000" // /* MW 3 */
+ 3412 "01100101" // /* MW 2 */
+ 3413 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3415 "11111001" // /* MW 5 */
+ 3416 "10100011" // /* MW 4 */
+ 3417 "10111000" // /* MW 3 */
+ 3418 "10100011" // /* MW 2 */
+ 3419 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 45 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3421 "00011111" // /* MW 5 */
+ 3422 "01101011" // /* MW 4 */
+ 3423 "11101101" // /* MW 3 */
+ 3424 "01100100" // /* MW 2 */
+ 3425 "01000101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3427 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3429 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3431 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3433 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 48 first
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3435 "11001010" // /* MW 5 */
+ 3436 "10110101" // /* MW 4 */
+ 3437 "10111101" // /* MW 3 */
+ 3438 "01011111" // /* MW 2 */
+ 3439 "10000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3441 "00000001" // /* MW 5 */
+ 3442 "01000000" // /* MW 4 */
+ 3443 "11111000" // /* MW 3 */
+ 3444 "00000110" // /* MW 2 */
+ 3445 "11111000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 76 first
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3447 "11110010" // /* MW 5 */
+ 3448 "10111011" // /* MW 4 */
+ 3449 "11101101" // /* MW 3 */
+ 3450 "01000001" // /* MW 2 */
+ 3451 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3453 "01011101" // /* MW 3 */
+ 3454 "11101011" // /* MW 2 */
+ 3455 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93 first
+.delay_slot
+ 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3457 "00010100" // /* MW 3 */
+ 3458 "01100011" // /* MW 2 */
+ 3459 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.src_ref 2 "conv2d_bf16_params.h" 539 139 first
+.src_ref 2 "conv2d_bf16_params.h" 555 59
+.src_ref 2 "conv2d_bf16_params.h" 559 59
+.src_ref 2 "conv2d_bf16_params.h" 700 17
+.delay_slot
+ 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3461 "01011001" // /* MW 9 */
+ 3462 "00000001" // /* MW 8 */
+ 3463 "00101000" // /* MW 7 */
+ 3464 "00111110" // /* MW 6 */
+ 3465 "10111110" // /* MW 5 */
+ 3466 "00001101" // /* MW 4 */
+ 3467 "00110000" // /* MW 3 */
+ 3468 "01000110" // /* MW 2 */
+ 3469 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3471 "10011100" // /* MW 3 */
+ 3472 "10011011" // /* MW 2 */
+ 3473 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3475 "10010001" // /* MW 3 */
+ 3476 "11100011" // /* MW 2 */
+ 3477 "00000111" // /* MW 1 */
+ 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3479 "00000000" // /* MW 1 */
+ 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3481 "00000000" // /* MW 1 */
+ 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3483 "00000000" // /* MW 1 */
+ 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3485 "00000000" // /* MW 1 */
+ 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3487 "00000000" // /* MW 1 */
+ 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3491 "00000001" // /* MW 5 */
+ 3492 "01000000" // /* MW 4 */
+ 3493 "11111000" // /* MW 3 */
+ 3494 "00000110" // /* MW 2 */
+ 3495 "11100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3501 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3503 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3505 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3507 "01011000" // /* MW 9 */
+ 3508 "01000000" // /* MW 8 */
+ 3509 "00101000" // /* MW 7 */
+ 3510 "10001011" // /* MW 6 */
+ 3511 "00010000" // /* MW 5 */
+ 3512 "00000001" // /* MW 4 */
+ 3513 "00000000" // /* MW 3 */
+ 3514 "10111100" // /* MW 2 */
+ 3515 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3517 "11010010" // /* MW 3 */
+ 3518 "01111110" // /* MW 2 */
+ 3519 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3521 "01100111" // /* MW 3 */
+ 3522 "01110110" // /* MW 2 */
+ 3523 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3525 "00000001" // /* MW 5 */
+ 3526 "10100000" // /* MW 4 */
+ 3527 "01001111" // /* MW 3 */
+ 3528 "00111000" // /* MW 2 */
+ 3529 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 46
+ 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3531 "01010000" // /* MW 3 */
+ 3532 "00110010" // /* MW 2 */
+ 3533 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 44
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3535 "11101111" // /* MW 3 */
+ 3536 "01111101" // /* MW 2 */
+ 3537 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3539 "00111001" // /* MW 5 */
+ 3540 "11000100" // /* MW 4 */
+ 3541 "01011101" // /* MW 3 */
+ 3542 "11100011" // /* MW 2 */
+ 3543 "11001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3545 "10000010" // /* MW 3 */
+ 3546 "11100011" // /* MW 2 */
+ 3547 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 79
+ 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3549 "11101111" // /* MW 3 */
+ 3550 "01100011" // /* MW 2 */
+ 3551 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3553 "11000001" // /* MW 3 */
+ 3554 "11111001" // /* MW 2 */
+ 3555 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3557 "11001110" // /* MW 3 */
+ 3558 "01100011" // /* MW 2 */
+ 3559 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 55 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3561 "00011100" // /* MW 7 */
+ 3562 "00000000" // /* MW 6 */
+ 3563 "00000000" // /* MW 5 */
+ 3564 "10000001" // /* MW 4 */
+ 3565 "00010100" // /* MW 3 */
+ 3566 "00100011" // /* MW 2 */
+ 3567 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3569 "01111000" // /* MW 9 */
+ 3570 "00001110" // /* MW 8 */
+ 3571 "01110000" // /* MW 7 */
+ 3572 "11101011" // /* MW 6 */
+ 3573 "11000111" // /* MW 5 */
+ 3574 "00111111" // /* MW 4 */
+ 3575 "00000000" // /* MW 3 */
+ 3576 "00011001" // /* MW 2 */
+ 3577 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63 first
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3579 "11000010" // /* MW 3 */
+ 3580 "01111111" // /* MW 2 */
+ 3581 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 34 first
+.src_ref 2 "conv2d_bf16_params.h" 641 32 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3583 "10101000" // /* MW 9 */
+ 3584 "01110100" // /* MW 8 */
+ 3585 "01001111" // /* MW 7 */
+ 3586 "10000011" // /* MW 6 */
+ 3587 "00000100" // /* MW 5 */
+ 3588 "00100001" // /* MW 4 */
+ 3589 "00100000" // /* MW 3 */
+ 3590 "01101110" // /* MW 2 */
+ 3591 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 61 first
+.src_ref 2 "conv2d_bf16_params.h" 640 16
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3593 "01011000" // /* MW 9 */
+ 3594 "00001001" // /* MW 8 */
+ 3595 "10101000" // /* MW 7 */
+ 3596 "10000011" // /* MW 6 */
+ 3597 "01000100" // /* MW 5 */
+ 3598 "00101001" // /* MW 4 */
+ 3599 "00000000" // /* MW 3 */
+ 3600 "00011110" // /* MW 2 */
+ 3601 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3603 "11100010" // /* MW 3 */
+ 3604 "01110011" // /* MW 2 */
+ 3605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 47 first
+ 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3607 "10001000" // /* MW 3 */
+ 3608 "11111001" // /* MW 2 */
+ 3609 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 640 16 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3611 "00111101" // /* MW 3 */
+ 3612 "01111011" // /* MW 2 */
+ 3613 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3615 "00010000" // /* MW 9 */
+ 3616 "00000100" // /* MW 8 */
+ 3617 "00001010" // /* MW 7 */
+ 3618 "00000011" // /* MW 6 */
+ 3619 "00000000" // /* MW 5 */
+ 3620 "00000000" // /* MW 4 */
+ 3621 "00100000" // /* MW 3 */
+ 3622 "11011110" // /* MW 2 */
+ 3623 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 641 44 first
+.src_ref 2 "conv2d_bf16_params.h" 642 45 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3625 "11111111" // /* MW 5 */
+ 3626 "00111010" // /* MW 4 */
+ 3627 "10111111" // /* MW 3 */
+ 3628 "11100111" // /* MW 2 */
+ 3629 "11001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3631 "11100110" // /* MW 3 */
+ 3632 "11001111" // /* MW 2 */
+ 3633 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 55 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3635 "00101001" // /* MW 5 */
+ 3636 "10101000" // /* MW 4 */
+ 3637 "00001011" // /* MW 3 */
+ 3638 "11010010" // /* MW 2 */
+ 3639 "10110100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3641 "00000001" // /* MW 5 */
+ 3642 "00100001" // /* MW 4 */
+ 3643 "01001101" // /* MW 3 */
+ 3644 "10110000" // /* MW 2 */
+ 3645 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3647 "00111001" // /* MW 5 */
+ 3648 "11000010" // /* MW 4 */
+ 3649 "00011101" // /* MW 3 */
+ 3650 "10110101" // /* MW 2 */
+ 3651 "00110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 99 first
+ 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3653 "00100100" // /* MW 3 */
+ 3654 "11001111" // /* MW 2 */
+ 3655 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3657 "01000001" // /* MW 5 */
+ 3658 "10100110" // /* MW 4 */
+ 3659 "01001101" // /* MW 3 */
+ 3660 "11011110" // /* MW 2 */
+ 3661 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3663 "01111101" // /* MW 5 */
+ 3664 "00100000" // /* MW 4 */
+ 3665 "01001001" // /* MW 3 */
+ 3666 "00001000" // /* MW 2 */
+ 3667 "00101001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119 first
+ 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3669 "00100100" // /* MW 3 */
+ 3670 "11101111" // /* MW 2 */
+ 3671 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 15 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3673 "01011000" // /* MW 9 */
+ 3674 "01110000" // /* MW 8 */
+ 3675 "01001111" // /* MW 7 */
+ 3676 "01101110" // /* MW 6 */
+ 3677 "01000010" // /* MW 5 */
+ 3678 "00100000" // /* MW 4 */
+ 3679 "00000000" // /* MW 3 */
+ 3680 "00011110" // /* MW 2 */
+ 3681 "11011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3683 "00100010" // /* MW 3 */
+ 3684 "10111101" // /* MW 2 */
+ 3685 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 85 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3687 "01011000" // /* MW 9 */
+ 3688 "00100000" // /* MW 8 */
+ 3689 "00001001" // /* MW 7 */
+ 3690 "11111110" // /* MW 6 */
+ 3691 "10101001" // /* MW 5 */
+ 3692 "00101111" // /* MW 4 */
+ 3693 "00000000" // /* MW 3 */
+ 3694 "00000101" // /* MW 2 */
+ 3695 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3697 "01010010" // /* MW 3 */
+ 3698 "00100000" // /* MW 2 */
+ 3699 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 559 59 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3701 "11110010" // /* MW 5 */
+ 3702 "10111101" // /* MW 4 */
+ 3703 "11111101" // /* MW 3 */
+ 3704 "00001001" // /* MW 2 */
+ 3705 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 41 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3707 "00111001" // /* MW 5 */
+ 3708 "11000100" // /* MW 4 */
+ 3709 "10111101" // /* MW 3 */
+ 3710 "00111111" // /* MW 2 */
+ 3711 "10000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 117 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3713 "01011111" // /* MW 5 */
+ 3714 "01101011" // /* MW 4 */
+ 3715 "10110111" // /* MW 3 */
+ 3716 "11101110" // /* MW 2 */
+ 3717 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+ 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3719 "00110010" // /* MW 3 */
+ 3720 "10000100" // /* MW 2 */
+ 3721 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 52 first
+ 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3723 "00001100" // /* MW 3 */
+ 3724 "01111110" // /* MW 2 */
+ 3725 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 92 first
+ 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3727 "10001111" // /* MW 3 */
+ 3728 "00110001" // /* MW 2 */
+ 3729 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 36 first
+ 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3731 "11000101" // /* MW 3 */
+ 3732 "11110111" // /* MW 2 */
+ 3733 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 64 first
+.src_ref 2 "conv2d_bf16_params.h" 611 47
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 629 82
+ 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3735 "01011000" // /* MW 11 */
+ 3736 "00000000" // /* MW 10 */
+ 3737 "10001001" // /* MW 9 */
+ 3738 "11101110" // /* MW 8 */
+ 3739 "11000000" // /* MW 7 */
+ 3740 "10110111" // /* MW 6 */
+ 3741 "10010101" // /* MW 5 */
+ 3742 "11101110" // /* MW 4 */
+ 3743 "00000111" // /* MW 3 */
+ 3744 "00000011" // /* MW 2 */
+ 3745 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+ 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3747 "00111001" // /* MW 5 */
+ 3748 "10110111" // /* MW 4 */
+ 3749 "01000000" // /* MW 3 */
+ 3750 "00101000" // /* MW 2 */
+ 3751 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3753 "00001100" // /* MW 5 */
+ 3754 "10101100" // /* MW 4 */
+ 3755 "00001111" // /* MW 3 */
+ 3756 "00000000" // /* MW 2 */
+ 3757 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 60 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+ 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3759 "11001001" // /* MW 9 */
+ 3760 "00111111" // /* MW 8 */
+ 3761 "10001001" // /* MW 7 */
+ 3762 "00111100" // /* MW 6 */
+ 3763 "10110000" // /* MW 5 */
+ 3764 "00011111" // /* MW 4 */
+ 3765 "10110000" // /* MW 3 */
+ 3766 "00010010" // /* MW 2 */
+ 3767 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 53
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 555 59 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3769 "11001000" // /* MW 11 */
+ 3770 "01111111" // /* MW 10 */
+ 3771 "11001100" // /* MW 9 */
+ 3772 "10010010" // /* MW 8 */
+ 3773 "11111111" // /* MW 7 */
+ 3774 "10101101" // /* MW 6 */
+ 3775 "10010001" // /* MW 5 */
+ 3776 "00011100" // /* MW 4 */
+ 3777 "10000010" // /* MW 3 */
+ 3778 "10001100" // /* MW 2 */
+ 3779 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 240 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3781 "01111001" // /* MW 9 */
+ 3782 "10001110" // /* MW 8 */
+ 3783 "01110000" // /* MW 7 */
+ 3784 "11101111" // /* MW 6 */
+ 3785 "01010111" // /* MW 5 */
+ 3786 "00101011" // /* MW 4 */
+ 3787 "00110000" // /* MW 3 */
+ 3788 "01011010" // /* MW 2 */
+ 3789 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 53 first
+.src_ref 2 "conv2d_bf16_params.h" 559 53
+.src_ref 2 "conv2d_bf16_params.h" 621 140
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3791 "01011000" // /* MW 11 */
+ 3792 "01011000" // /* MW 10 */
+ 3793 "00000000" // /* MW 9 */
+ 3794 "00001110" // /* MW 8 */
+ 3795 "01001110" // /* MW 7 */
+ 3796 "10101001" // /* MW 6 */
+ 3797 "01010001" // /* MW 5 */
+ 3798 "00011111" // /* MW 4 */
+ 3799 "00000010" // /* MW 3 */
+ 3800 "11011001" // /* MW 2 */
+ 3801 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 53 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3803 "00100100" // /* MW 5 */
+ 3804 "11100011" // /* MW 4 */
+ 3805 "00111111" // /* MW 3 */
+ 3806 "01100010" // /* MW 2 */
+ 3807 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 47 first
+.src_ref 2 "conv2d_bf16_params.h" 621 222
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3809 "01111000" // /* MW 11 */
+ 3810 "10010000" // /* MW 10 */
+ 3811 "01101001" // /* MW 9 */
+ 3812 "00001111" // /* MW 8 */
+ 3813 "11001110" // /* MW 7 */
+ 3814 "10101011" // /* MW 6 */
+ 3815 "10010001" // /* MW 5 */
+ 3816 "11101111" // /* MW 4 */
+ 3817 "00100010" // /* MW 3 */
+ 3818 "01101110" // /* MW 2 */
+ 3819 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 661 61
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3821 "11001000" // /* MW 9 */
+ 3822 "11111111" // /* MW 8 */
+ 3823 "10001100" // /* MW 7 */
+ 3824 "00010010" // /* MW 6 */
+ 3825 "11001110" // /* MW 5 */
+ 3826 "00101001" // /* MW 4 */
+ 3827 "00000000" // /* MW 3 */
+ 3828 "11110011" // /* MW 2 */
+ 3829 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 710 60
+.src_ref 2 "conv2d_bf16_params.h" 710 65
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3831 "01111000" // /* MW 9 */
+ 3832 "10001110" // /* MW 8 */
+ 3833 "01110000" // /* MW 7 */
+ 3834 "01110011" // /* MW 6 */
+ 3835 "11101010" // /* MW 5 */
+ 3836 "00111011" // /* MW 4 */
+ 3837 "00000000" // /* MW 3 */
+ 3838 "00011101" // /* MW 2 */
+ 3839 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3841 "01000100" // /* MW 5 */
+ 3842 "11001010" // /* MW 4 */
+ 3843 "00101110" // /* MW 3 */
+ 3844 "11101110" // /* MW 2 */
+ 3845 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 649 41 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3847 "01111000" // /* MW 9 */
+ 3848 "10010000" // /* MW 8 */
+ 3849 "01101001" // /* MW 7 */
+ 3850 "10010011" // /* MW 6 */
+ 3851 "00111001" // /* MW 5 */
+ 3852 "00111111" // /* MW 4 */
+ 3853 "00000000" // /* MW 3 */
+ 3854 "00011111" // /* MW 2 */
+ 3855 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3857 "00100010" // /* MW 3 */
+ 3858 "11000100" // /* MW 2 */
+ 3859 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 82 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3861 "01010001" // /* MW 3 */
+ 3862 "11101011" // /* MW 2 */
+ 3863 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 611 47 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3865 "01011001" // /* MW 9 */
+ 3866 "11000000" // /* MW 8 */
+ 3867 "01101111" // /* MW 7 */
+ 3868 "10010000" // /* MW 6 */
+ 3869 "00100111" // /* MW 5 */
+ 3870 "00000100" // /* MW 4 */
+ 3871 "00110000" // /* MW 3 */
+ 3872 "10001110" // /* MW 2 */
+ 3873 "01000111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3875 "00110010" // /* MW 3 */
+ 3876 "00111000" // /* MW 2 */
+ 3877 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 643 22 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3879 "01111111" // /* MW 3 */
+ 3880 "11111110" // /* MW 2 */
+ 3881 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3883 "01100100" // /* MW 5 */
+ 3884 "00001100" // /* MW 4 */
+ 3885 "00101110" // /* MW 3 */
+ 3886 "11000110" // /* MW 2 */
+ 3887 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 47 first
+.src_ref 2 "conv2d_bf16_params.h" 629 45
+.src_ref 2 "conv2d_bf16_params.h" 684 30 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3889 "01011001" // /* MW 9 */
+ 3890 "00101000" // /* MW 8 */
+ 3891 "10000000" // /* MW 7 */
+ 3892 "01111100" // /* MW 6 */
+ 3893 "00101001" // /* MW 5 */
+ 3894 "00110101" // /* MW 4 */
+ 3895 "00110000" // /* MW 3 */
+ 3896 "10001110" // /* MW 2 */
+ 3897 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 45 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3899 "11100100" // /* MW 5 */
+ 3900 "00001101" // /* MW 4 */
+ 3901 "00110001" // /* MW 3 */
+ 3902 "01010110" // /* MW 2 */
+ 3903 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 644 22
+.src_ref 2 "conv2d_bf16_params.h" 700 17 first
+.src_ref 2 "conv2d_bf16_params.h" 705 50
+.src_ref 2 "conv2d_bf16_params.h" 705 61
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3905 "10101000" // /* MW 9 */
+ 3906 "11111100" // /* MW 8 */
+ 3907 "10101001" // /* MW 7 */
+ 3908 "11111110" // /* MW 6 */
+ 3909 "00111000" // /* MW 5 */
+ 3910 "00000110" // /* MW 4 */
+ 3911 "00100000" // /* MW 3 */
+ 3912 "00000010" // /* MW 2 */
+ 3913 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 38 first
+.src_ref 2 "conv2d_bf16_params.h" 700 111
+.src_ref 2 "conv2d_bf16_params.h" 700 149
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3915 "00000110" // /* MW 9 */
+ 3916 "00000110" // /* MW 8 */
+ 3917 "00000101" // /* MW 7 */
+ 3918 "10000000" // /* MW 6 */
+ 3919 "00010001" // /* MW 5 */
+ 3920 "00011111" // /* MW 4 */
+ 3921 "00100010" // /* MW 3 */
+ 3922 "11000110" // /* MW 2 */
+ 3923 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14
+.src_ref 2 "conv2d_bf16_params.h" 649 38 first
+.src_ref 2 "conv2d_bf16_params.h" 674 24
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3925 "00010001" // /* MW 9 */
+ 3926 "11111000" // /* MW 8 */
+ 3927 "01101111" // /* MW 7 */
+ 3928 "00111110" // /* MW 6 */
+ 3929 "00000000" // /* MW 5 */
+ 3930 "00000000" // /* MW 4 */
+ 3931 "00110000" // /* MW 3 */
+ 3932 "11001110" // /* MW 2 */
+ 3933 "01001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14 first
+.src_ref 2 "conv2d_bf16_params.h" 662 61
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3935 "11001001" // /* MW 9 */
+ 3936 "10111111" // /* MW 8 */
+ 3937 "01001011" // /* MW 7 */
+ 3938 "10100100" // /* MW 6 */
+ 3939 "01001001" // /* MW 5 */
+ 3940 "00111111" // /* MW 4 */
+ 3941 "00110000" // /* MW 3 */
+ 3942 "11010010" // /* MW 2 */
+ 3943 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 663 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3945 "10011100" // /* MW 5 */
+ 3946 "01010110" // /* MW 4 */
+ 3947 "00110001" // /* MW 3 */
+ 3948 "11000110" // /* MW 2 */
+ 3949 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+ 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3951 "10000001" // /* MW 5 */
+ 3952 "01111010" // /* MW 4 */
+ 3953 "00111111" // /* MW 3 */
+ 3954 "10001010" // /* MW 2 */
+ 3955 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3957 "11100011" // /* MW 5 */
+ 3958 "01110011" // /* MW 4 */
+ 3959 "00111000" // /* MW 3 */
+ 3960 "11111010" // /* MW 2 */
+ 3961 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3963 "01011001" // /* MW 9 */
+ 3964 "00000000" // /* MW 8 */
+ 3965 "01100000" // /* MW 7 */
+ 3966 "00110000" // /* MW 6 */
+ 3967 "11111000" // /* MW 5 */
+ 3968 "00101101" // /* MW 4 */
+ 3969 "00110000" // /* MW 3 */
+ 3970 "11010110" // /* MW 2 */
+ 3971 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3973 "11001001" // /* MW 9 */
+ 3974 "01111111" // /* MW 8 */
+ 3975 "00101100" // /* MW 7 */
+ 3976 "01111110" // /* MW 6 */
+ 3977 "00100000" // /* MW 5 */
+ 3978 "00111110" // /* MW 4 */
+ 3979 "00110000" // /* MW 3 */
+ 3980 "10001100" // /* MW 2 */
+ 3981 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 705 50 first
+.src_ref 2 "conv2d_bf16_params.h" 705 61 first
+ 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3983 "00001100" // /* MW 5 */
+ 3984 "10111000" // /* MW 4 */
+ 3985 "00111000" // /* MW 3 */
+ 3986 "10001100" // /* MW 2 */
+ 3987 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10
+.src_ref 2 "conv2d_bf16_params.h" 674 24 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.src_ref 2 "conv2d_bf16_params.h" 720 50
+ 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3989 "01011001" // /* MW 9 */
+ 3990 "00000000" // /* MW 8 */
+ 3991 "01001000" // /* MW 7 */
+ 3992 "00100100" // /* MW 6 */
+ 3993 "00000001" // /* MW 5 */
+ 3994 "00100111" // /* MW 4 */
+ 3995 "00110000" // /* MW 3 */
+ 3996 "11011010" // /* MW 2 */
+ 3997 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3999 "01111001" // /* MW 9 */
+ 4000 "00001110" // /* MW 8 */
+ 4001 "01110000" // /* MW 7 */
+ 4002 "10001111" // /* MW 6 */
+ 4003 "00011111" // /* MW 5 */
+ 4004 "00000101" // /* MW 4 */
+ 4005 "00110000" // /* MW 3 */
+ 4006 "11110010" // /* MW 2 */
+ 4007 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 707 61 first
+ 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4009 "11011111" // /* MW 5 */
+ 4010 "10111001" // /* MW 4 */
+ 4011 "00111011" // /* MW 3 */
+ 4012 "10010010" // /* MW 2 */
+ 4013 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 674 22 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4015 "01011001" // /* MW 9 */
+ 4016 "00000110" // /* MW 8 */
+ 4017 "00001000" // /* MW 7 */
+ 4018 "10001100" // /* MW 6 */
+ 4019 "00001111" // /* MW 5 */
+ 4020 "00100001" // /* MW 4 */
+ 4021 "00110000" // /* MW 3 */
+ 4022 "11000110" // /* MW 2 */
+ 4023 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4025 "01111000" // /* MW 11 */
+ 4026 "10010000" // /* MW 10 */
+ 4027 "01101001" // /* MW 9 */
+ 4028 "00010011" // /* MW 8 */
+ 4029 "00000000" // /* MW 7 */
+ 4030 "10011011" // /* MW 6 */
+ 4031 "00010001" // /* MW 5 */
+ 4032 "00011110" // /* MW 4 */
+ 4033 "00000010" // /* MW 3 */
+ 4034 "00000000" // /* MW 2 */
+ 4035 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4037 "10100100" // /* MW 5 */
+ 4038 "00010100" // /* MW 4 */
+ 4039 "00100000" // /* MW 3 */
+ 4040 "00010110" // /* MW 2 */
+ 4041 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 691 56 first
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4043 "10101111" // /* MW 3 */
+ 4044 "01100011" // /* MW 2 */
+ 4045 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 709 71 first
+ 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4047 "01011001" // /* MW 9 */
+ 4048 "11001000" // /* MW 8 */
+ 4049 "00000111" // /* MW 7 */
+ 4050 "01101101" // /* MW 6 */
+ 4051 "00001000" // /* MW 5 */
+ 4052 "00000111" // /* MW 4 */
+ 4053 "00110000" // /* MW 3 */
+ 4054 "10001100" // /* MW 2 */
+ 4055 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 706 23 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4057 "11001000" // /* MW 11 */
+ 4058 "11000001" // /* MW 10 */
+ 4059 "10101000" // /* MW 9 */
+ 4060 "11101101" // /* MW 8 */
+ 4061 "11110111" // /* MW 7 */
+ 4062 "10100000" // /* MW 6 */
+ 4063 "01100001" // /* MW 5 */
+ 4064 "01001000" // /* MW 4 */
+ 4065 "00000010" // /* MW 3 */
+ 4066 "01100011" // /* MW 2 */
+ 4067 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 682 38 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4069 "01111011" // /* MW 5 */
+ 4070 "11000000" // /* MW 4 */
+ 4071 "00110110" // /* MW 3 */
+ 4072 "00001010" // /* MW 2 */
+ 4073 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+ 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4075 "01000001" // /* MW 5 */
+ 4076 "10001110" // /* MW 4 */
+ 4077 "00111000" // /* MW 3 */
+ 4078 "11011010" // /* MW 2 */
+ 4079 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+ 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4081 "10011100" // /* MW 5 */
+ 4082 "11001000" // /* MW 4 */
+ 4083 "00111000" // /* MW 3 */
+ 4084 "11001010" // /* MW 2 */
+ 4085 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4087 "11011011" // /* MW 5 */
+ 4088 "10010100" // /* MW 4 */
+ 4089 "00110010" // /* MW 3 */
+ 4090 "10010010" // /* MW 2 */
+ 4091 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 706 28 first
+ 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4093 "01011001" // /* MW 9 */
+ 4094 "11111101" // /* MW 8 */
+ 4095 "00001111" // /* MW 7 */
+ 4096 "00000100" // /* MW 6 */
+ 4097 "00111000" // /* MW 5 */
+ 4098 "00011010" // /* MW 4 */
+ 4099 "00110000" // /* MW 3 */
+ 4100 "10001110" // /* MW 2 */
+ 4101 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4103 "00001110" // /* MW 3 */
+ 4104 "11000000" // /* MW 2 */
+ 4105 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 707 66 first
+ 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4107 "00011111" // /* MW 5 */
+ 4108 "00010000" // /* MW 4 */
+ 4109 "00110111" // /* MW 3 */
+ 4110 "11001010" // /* MW 2 */
+ 4111 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 709 96 first
+ 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4113 "00111011" // /* MW 5 */
+ 4114 "00001100" // /* MW 4 */
+ 4115 "00110000" // /* MW 3 */
+ 4116 "10001100" // /* MW 2 */
+ 4117 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 709 90
+ 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4119 "00110001" // /* MW 9 */
+ 4120 "11000110" // /* MW 8 */
+ 4121 "00000011" // /* MW 7 */
+ 4122 "10000000" // /* MW 6 */
+ 4123 "01100001" // /* MW 5 */
+ 4124 "00011100" // /* MW 4 */
+ 4125 "00100010" // /* MW 3 */
+ 4126 "10110110" // /* MW 2 */
+ 4127 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 707 50 first
+.src_ref 2 "conv2d_bf16_params.h" 708 59
+.src_ref 2 "conv2d_bf16_params.h" 710 60 first
+.src_ref 2 "conv2d_bf16_params.h" 710 65 first
+ 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4129 "11001000" // /* MW 11 */
+ 4130 "00111111" // /* MW 10 */
+ 4131 "00101000" // /* MW 9 */
+ 4132 "00110000" // /* MW 8 */
+ 4133 "01110000" // /* MW 7 */
+ 4134 "10111010" // /* MW 6 */
+ 4135 "10010001" // /* MW 5 */
+ 4136 "00011100" // /* MW 4 */
+ 4137 "00100010" // /* MW 3 */
+ 4138 "00111010" // /* MW 2 */
+ 4139 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 708 48 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4141 "10101111" // /* MW 9 */
+ 4142 "01000001" // /* MW 8 */
+ 4143 "00000001" // /* MW 7 */
+ 4144 "10000000" // /* MW 6 */
+ 4145 "00110001" // /* MW 5 */
+ 4146 "00011100" // /* MW 4 */
+ 4147 "00100010" // /* MW 3 */
+ 4148 "10111110" // /* MW 2 */
+ 4149 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 709 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+ 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4151 "00000000" // /* MW 5 */
+ 4152 "01010000" // /* MW 4 */
+ 4153 "00110000" // /* MW 3 */
+ 4154 "10001110" // /* MW 2 */
+ 4155 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 710 50 first
+.delay_slot
+ 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4157 "11110001" // /* MW 3 */
+ 4158 "01011100" // /* MW 2 */
+ 4159 "00001010" // /* MW 1 */
+.delay_slot
+ 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4161 "00010001" // /* MW 3 */
+ 4162 "00011100" // /* MW 2 */
+ 4163 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48 first
+.delay_slot
+ 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4165 "01010001" // /* MW 3 */
+ 4166 "00011100" // /* MW 2 */
+ 4167 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.delay_slot
+ 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4169 "01010001" // /* MW 3 */
+ 4170 "00000100" // /* MW 2 */
+ 4171 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 720 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+.delay_slot
+ 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4173 "01110001" // /* MW 9 */
+ 4174 "00000000" // /* MW 8 */
+ 4175 "00000000" // /* MW 7 */
+ 4176 "00000000" // /* MW 6 */
+ 4177 "11111110" // /* MW 5 */
+ 4178 "00111111" // /* MW 4 */
+ 4179 "00110000" // /* MW 3 */
+ 4180 "10001010" // /* MW 2 */
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0
+ 4181 "01000010" // /* MW 1 */
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 689 first
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 704 12
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.function_start
+ 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4193 "01111000" // /* MW 11 */
+ 4194 "01100000" // /* MW 10 */
+ 4195 "00001010" // /* MW 9 */
+ 4196 "00001000" // /* MW 8 */
+ 4197 "10000000" // /* MW 7 */
+ 4198 "00000001" // /* MW 6 */
+ 4199 "10001011" // /* MW 5 */
+ 4200 "10000100" // /* MW 4 */
+ 4201 "10000010" // /* MW 3 */
+ 4202 "00000011" // /* MW 2 */
+ 4203 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 526 11
+.src_ref 2 "conv2d_bf16.h" 698 28 first
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+ 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4205 "01100000" // /* MW 13 */
+ 4206 "00001001" // /* MW 12 */
+ 4207 "00100000" // /* MW 11 */
+ 4208 "00100001" // /* MW 10 */
+ 4209 "00000000" // /* MW 9 */
+ 4210 "00110110" // /* MW 8 */
+ 4211 "00000001" // /* MW 7 */
+ 4212 "00110100" // /* MW 6 */
+ 4213 "00101000" // /* MW 5 */
+ 4214 "00101000" // /* MW 4 */
+ 4215 "10001000" // /* MW 3 */
+ 4216 "00000110" // /* MW 2 */
+ 4217 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4219 "00010000" // /* MW 9 */
+ 4220 "00110100" // /* MW 8 */
+ 4221 "00110010" // /* MW 7 */
+ 4222 "11110010" // /* MW 6 */
+ 4223 "00000001" // /* MW 5 */
+ 4224 "00000000" // /* MW 4 */
+ 4225 "11010000" // /* MW 3 */
+ 4226 "10010100" // /* MW 2 */
+ 4227 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 43
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+ 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4229 "00010000" // /* MW 9 */
+ 4230 "01111000" // /* MW 8 */
+ 4231 "01111000" // /* MW 7 */
+ 4232 "00000100" // /* MW 6 */
+ 4233 "00000000" // /* MW 5 */
+ 4234 "00000000" // /* MW 4 */
+ 4235 "11010000" // /* MW 3 */
+ 4236 "10010000" // /* MW 2 */
+ 4237 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 699 43 first
+.src_ref 2 "conv2d_bf16.h" 702 4
+ 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4239 "00010000" // /* MW 9 */
+ 4240 "10010000" // /* MW 8 */
+ 4241 "10111000" // /* MW 7 */
+ 4242 "00000101" // /* MW 6 */
+ 4243 "00000000" // /* MW 5 */
+ 4244 "00000000" // /* MW 4 */
+ 4245 "11010000" // /* MW 3 */
+ 4246 "10000000" // /* MW 2 */
+ 4247 "01100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 702 37 first
+ 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4249 "00000001" // /* MW 5 */
+ 4250 "00000000" // /* MW 4 */
+ 4251 "11010001" // /* MW 3 */
+ 4252 "10000010" // /* MW 2 */
+ 4253 "01111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4255 "00100010" // /* MW 3 */
+ 4256 "00000100" // /* MW 2 */
+ 4257 "00000100" // /* MW 1 */
+ 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4259 "00000000" // /* MW 1 */
+ 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4261 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+ 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4263 "00000001" // /* MW 5 */
+ 4264 "10000101" // /* MW 4 */
+ 4265 "10000000" // /* MW 3 */
+ 4266 "00001010" // /* MW 2 */
+ 4267 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+ 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4269 "00010100" // /* MW 3 */
+ 4270 "00110000" // /* MW 2 */
+ 4271 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4273 "00010100" // /* MW 3 */
+ 4274 "00010100" // /* MW 2 */
+ 4275 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4277 "11111101" // /* MW 5 */
+ 4278 "11100000" // /* MW 4 */
+ 4279 "10001010" // /* MW 3 */
+ 4280 "00001010" // /* MW 2 */
+ 4281 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4283 "00000000" // /* MW 5 */
+ 4284 "11110101" // /* MW 4 */
+ 4285 "10000000" // /* MW 3 */
+ 4286 "00000010" // /* MW 2 */
+ 4287 "11000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4289 "00010100" // /* MW 3 */
+ 4290 "00010100" // /* MW 2 */
+ 4291 "00111100" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4293 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4295 "01111110" // /* MW 9 */
+ 4296 "10100101" // /* MW 8 */
+ 4297 "00000001" // /* MW 7 */
+ 4298 "00000000" // /* MW 6 */
+ 4299 "01010100" // /* MW 5 */
+ 4300 "00000000" // /* MW 4 */
+ 4301 "11110000" // /* MW 3 */
+ 4302 "00101100" // /* MW 2 */
+ 4303 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4305 "00000000" // /* MW 15 */
+ 4306 "00000000" // /* MW 14 */
+ 4307 "01111000" // /* MW 13 */
+ 4308 "11000101" // /* MW 12 */
+ 4309 "00000001" // /* MW 11 */
+ 4310 "00000000" // /* MW 10 */
+ 4311 "00000000" // /* MW 9 */
+ 4312 "00000000" // /* MW 8 */
+ 4313 "01011011" // /* MW 7 */
+ 4314 "00000001" // /* MW 6 */
+ 4315 "00101000" // /* MW 5 */
+ 4316 "01100000" // /* MW 4 */
+ 4317 "11111100" // /* MW 3 */
+ 4318 "00101100" // /* MW 2 */
+ 4319 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4321 "00000000" // /* MW 15 */
+ 4322 "00000000" // /* MW 14 */
+ 4323 "01111000" // /* MW 13 */
+ 4324 "11000101" // /* MW 12 */
+ 4325 "01000000" // /* MW 11 */
+ 4326 "00000000" // /* MW 10 */
+ 4327 "00000000" // /* MW 9 */
+ 4328 "00000000" // /* MW 8 */
+ 4329 "01011011" // /* MW 7 */
+ 4330 "00000001" // /* MW 6 */
+ 4331 "00100000" // /* MW 5 */
+ 4332 "00000000" // /* MW 4 */
+ 4333 "11110000" // /* MW 3 */
+ 4334 "00101100" // /* MW 2 */
+ 4335 "00000000" // /* MW 1 */
+.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4337 "00000000" // /* MW 15 */
+ 4338 "00000000" // /* MW 14 */
+ 4339 "01111000" // /* MW 13 */
+ 4340 "10100101" // /* MW 12 */
+ 4341 "00000001" // /* MW 11 */
+ 4342 "00000000" // /* MW 10 */
+ 4343 "00000000" // /* MW 9 */
+ 4344 "00000000" // /* MW 8 */
+ 4345 "01011011" // /* MW 7 */
+ 4346 "00000001" // /* MW 6 */
+ 4347 "00101000" // /* MW 5 */
+ 4348 "00101000" // /* MW 4 */
+ 4349 "11111000" // /* MW 3 */
+ 4350 "00101100" // /* MW 2 */
+ 4351 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4353 "00000000" // /* MW 15 */
+ 4354 "00000000" // /* MW 14 */
+ 4355 "01111000" // /* MW 13 */
+ 4356 "10100101" // /* MW 12 */
+ 4357 "00000001" // /* MW 11 */
+ 4358 "00000000" // /* MW 10 */
+ 4359 "00000000" // /* MW 9 */
+ 4360 "00000000" // /* MW 8 */
+ 4361 "00000011" // /* MW 7 */
+ 4362 "10000000" // /* MW 6 */
+ 4363 "10101101" // /* MW 5 */
+ 4364 "00000000" // /* MW 4 */
+ 4365 "11110000" // /* MW 3 */
+ 4366 "00101100" // /* MW 2 */
+ 4367 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4369 "00000000" // /* MW 15 */
+ 4370 "00000000" // /* MW 14 */
+ 4371 "01111000" // /* MW 13 */
+ 4372 "11000101" // /* MW 12 */
+ 4373 "00000001" // /* MW 11 */
+ 4374 "00000000" // /* MW 10 */
+ 4375 "00000000" // /* MW 9 */
+ 4376 "00000000" // /* MW 8 */
+ 4377 "00000011" // /* MW 7 */
+ 4378 "00000000" // /* MW 6 */
+ 4379 "00101001" // /* MW 5 */
+ 4380 "01100000" // /* MW 4 */
+ 4381 "11111100" // /* MW 3 */
+ 4382 "00101100" // /* MW 2 */
+ 4383 "00000000" // /* MW 1 */
+.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 3 "utils.h" 531 4 first
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4385 "00000000" // /* MW 15 */
+ 4386 "00000000" // /* MW 14 */
+ 4387 "01111000" // /* MW 13 */
+ 4388 "11000101" // /* MW 12 */
+ 4389 "01000000" // /* MW 11 */
+ 4390 "00000000" // /* MW 10 */
+ 4391 "00000000" // /* MW 9 */
+ 4392 "00000000" // /* MW 8 */
+ 4393 "00000011" // /* MW 7 */
+ 4394 "00000000" // /* MW 6 */
+ 4395 "00100011" // /* MW 5 */
+ 4396 "00000000" // /* MW 4 */
+ 4397 "11110000" // /* MW 3 */
+ 4398 "00101100" // /* MW 2 */
+ 4399 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4401 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4403 "00000011" // /* MW 3 */
+ 4404 "10000000" // /* MW 2 */
+ 4405 "00001101" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4407 "01110000" // /* MW 7 */
+ 4408 "11000101" // /* MW 6 */
+ 4409 "00000001" // /* MW 5 */
+ 4410 "00000000" // /* MW 4 */
+ 4411 "01100000" // /* MW 3 */
+ 4412 "00000000" // /* MW 2 */
+ 4413 "00100000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4415 "10001010" // /* MW 3 */
+ 4416 "10000001" // /* MW 2 */
+ 4417 "00011000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4419 "00000011" // /* MW 3 */
+ 4420 "00000000" // /* MW 2 */
+ 4421 "00001011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+ 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4423 "01110000" // /* MW 7 */
+ 4424 "11000101" // /* MW 6 */
+ 4425 "00000001" // /* MW 5 */
+ 4426 "00000000" // /* MW 4 */
+ 4427 "01100000" // /* MW 3 */
+ 4428 "00000000" // /* MW 2 */
+ 4429 "10110000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+ 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4431 "01110000" // /* MW 7 */
+ 4432 "11000101" // /* MW 6 */
+ 4433 "01000000" // /* MW 5 */
+ 4434 "00000000" // /* MW 4 */
+ 4435 "01100000" // /* MW 3 */
+ 4436 "00000000" // /* MW 2 */
+ 4437 "00100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4439 "00000011" // /* MW 3 */
+ 4440 "00000000" // /* MW 2 */
+ 4441 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.src_ref 2 "conv2d_bf16.h" 723 first
+ 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4443 "00000000" // /* MW 5 */
+ 4444 "01010000" // /* MW 4 */
+ 4445 "01100000" // /* MW 3 */
+ 4446 "00000000" // /* MW 2 */
+ 4447 "10110000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.delay_slot
+ 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4449 "00000011" // /* MW 3 */
+ 4450 "00000000" // /* MW 2 */
+ 4451 "00001001" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+.delay_slot
+ 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4453 "00000011" // /* MW 3 */
+ 4454 "00000000" // /* MW 2 */
+ 4455 "00001011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4457 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4459 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0
+ 4461 "00000000" // /* MW 1 */
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.function_start
+ 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4465 "01100000" // /* MW 13 */
+ 4466 "00010001" // /* MW 12 */
+ 4467 "10010001" // /* MW 11 */
+ 4468 "00001110" // /* MW 10 */
+ 4469 "00000000" // /* MW 9 */
+ 4470 "00000000" // /* MW 8 */
+ 4471 "10000000" // /* MW 7 */
+ 4472 "00000000" // /* MW 6 */
+ 4473 "00100000" // /* MW 5 */
+ 4474 "00111111" // /* MW 4 */
+ 4475 "10000110" // /* MW 3 */
+ 4476 "11100000" // /* MW 2 */
+ 4477 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 241 95
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4479 "01111000" // /* MW 11 */
+ 4480 "01100000" // /* MW 10 */
+ 4481 "00101011" // /* MW 9 */
+ 4482 "00001010" // /* MW 8 */
+ 4483 "11000101" // /* MW 7 */
+ 4484 "10111111" // /* MW 6 */
+ 4485 "10010101" // /* MW 5 */
+ 4486 "11110001" // /* MW 4 */
+ 4487 "00000111" // /* MW 3 */
+ 4488 "01110011" // /* MW 2 */
+ 4489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 94
+.src_ref 2 "conv2d_bf16_params.h" 242 100
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 245 28
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4491 "00001000" // /* MW 11 */
+ 4492 "01000111" // /* MW 10 */
+ 4493 "00110100" // /* MW 9 */
+ 4494 "00101001" // /* MW 8 */
+ 4495 "00010000" // /* MW 7 */
+ 4496 "10000001" // /* MW 6 */
+ 4497 "00110101" // /* MW 5 */
+ 4498 "11011010" // /* MW 4 */
+ 4499 "00000111" // /* MW 3 */
+ 4500 "00011001" // /* MW 2 */
+ 4501 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 240 68 first
+ 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4503 "00010000" // /* MW 11 */
+ 4504 "00000000" // /* MW 10 */
+ 4505 "10101000" // /* MW 9 */
+ 4506 "00000011" // /* MW 8 */
+ 4507 "01000000" // /* MW 7 */
+ 4508 "10000000" // /* MW 6 */
+ 4509 "00110101" // /* MW 5 */
+ 4510 "11110101" // /* MW 4 */
+ 4511 "11010111" // /* MW 3 */
+ 4512 "11001010" // /* MW 2 */
+ 4513 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.src_ref 2 "conv2d_bf16_params.h" 245 20
+ 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4515 "10010000" // /* MW 11 */
+ 4516 "11111111" // /* MW 10 */
+ 4517 "11101111" // /* MW 9 */
+ 4518 "11111111" // /* MW 8 */
+ 4519 "01111111" // /* MW 7 */
+ 4520 "10000000" // /* MW 6 */
+ 4521 "11010101" // /* MW 5 */
+ 4522 "11111101" // /* MW 4 */
+ 4523 "10000111" // /* MW 3 */
+ 4524 "00011000" // /* MW 2 */
+ 4525 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4527 "01011000" // /* MW 11 */
+ 4528 "11101100" // /* MW 10 */
+ 4529 "00000111" // /* MW 9 */
+ 4530 "00001010" // /* MW 8 */
+ 4531 "01100001" // /* MW 7 */
+ 4532 "10000001" // /* MW 6 */
+ 4533 "10110101" // /* MW 5 */
+ 4534 "11100001" // /* MW 4 */
+ 4535 "00000111" // /* MW 3 */
+ 4536 "10110100" // /* MW 2 */
+ 4537 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.src_ref 2 "conv2d_bf16_params.h" 250 71
+ 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4539 "01011000" // /* MW 11 */
+ 4540 "11000100" // /* MW 10 */
+ 4541 "10000111" // /* MW 9 */
+ 4542 "11001010" // /* MW 8 */
+ 4543 "01110111" // /* MW 7 */
+ 4544 "10000111" // /* MW 6 */
+ 4545 "11110101" // /* MW 5 */
+ 4546 "11101101" // /* MW 4 */
+ 4547 "00000111" // /* MW 3 */
+ 4548 "10010101" // /* MW 2 */
+ 4549 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44
+ 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4551 "01010000" // /* MW 7 */
+ 4552 "01000000" // /* MW 6 */
+ 4553 "10000000" // /* MW 5 */
+ 4554 "00000011" // /* MW 4 */
+ 4555 "10110000" // /* MW 3 */
+ 4556 "01110011" // /* MW 2 */
+ 4557 "11111111" // /* MW 1 */
+ 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4559 "00111101" // /* MW 3 */
+ 4560 "11100100" // /* MW 2 */
+ 4561 "00001111" // /* MW 1 */
+ 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4563 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+ 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4565 "00100000" // /* MW 3 */
+ 4566 "01011001" // /* MW 2 */
+ 4567 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+ 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4569 "10011011" // /* MW 5 */
+ 4570 "01110111" // /* MW 4 */
+ 4571 "00110110" // /* MW 3 */
+ 4572 "00110010" // /* MW 2 */
+ 4573 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54 first
+.src_ref 2 "conv2d_bf16_params.h" 242 94 first
+ 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4575 "00101111" // /* MW 5 */
+ 4576 "11110010" // /* MW 4 */
+ 4577 "01011110" // /* MW 3 */
+ 4578 "11111001" // /* MW 2 */
+ 4579 "01011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 20 first
+ 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4581 "00101010" // /* MW 3 */
+ 4582 "11001001" // /* MW 2 */
+ 4583 "00000010" // /* MW 1 */
+ 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4585 "00000000" // /* MW 1 */
+ 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4587 "00000000" // /* MW 1 */
+ 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4589 "00000000" // /* MW 1 */
+ 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4591 "00000000" // /* MW 1 */
+ 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4593 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 174 first
+ 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4595 "11101100" // /* MW 3 */
+ 4596 "01110111" // /* MW 2 */
+ 4597 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+ 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4599 "00110010" // /* MW 3 */
+ 4600 "01011101" // /* MW 2 */
+ 4601 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4603 "11001100" // /* MW 3 */
+ 4604 "11110110" // /* MW 2 */
+ 4605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4607 "11001111" // /* MW 5 */
+ 4608 "10110111" // /* MW 4 */
+ 4609 "11101110" // /* MW 3 */
+ 4610 "01110000" // /* MW 2 */
+ 4611 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 100 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4613 "00011101" // /* MW 3 */
+ 4614 "01111111" // /* MW 2 */
+ 4615 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4617 "11100010" // /* MW 3 */
+ 4618 "01011000" // /* MW 2 */
+ 4619 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 98
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4621 "11000101" // /* MW 3 */
+ 4622 "11111001" // /* MW 2 */
+ 4623 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 151
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4625 "01100010" // /* MW 5 */
+ 4626 "00111100" // /* MW 4 */
+ 4627 "10011110" // /* MW 3 */
+ 4628 "11111101" // /* MW 2 */
+ 4629 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4631 "11000010" // /* MW 3 */
+ 4632 "01111001" // /* MW 2 */
+ 4633 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+ 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4635 "11001100" // /* MW 3 */
+ 4636 "01111111" // /* MW 2 */
+ 4637 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 117 first
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+ 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4639 "11010001" // /* MW 5 */
+ 4640 "11110111" // /* MW 4 */
+ 4641 "00111110" // /* MW 3 */
+ 4642 "01111110" // /* MW 2 */
+ 4643 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44 first
+.src_ref 2 "conv2d_bf16_params.h" 245 28 first
+ 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4645 "00110001" // /* MW 5 */
+ 4646 "10110010" // /* MW 4 */
+ 4647 "01010100" // /* MW 3 */
+ 4648 "01111001" // /* MW 2 */
+ 4649 "01011101" // /* MW 1 */
+ 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4651 "00000000" // /* MW 1 */
+ 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4653 "00000000" // /* MW 1 */
+ 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4655 "00000000" // /* MW 1 */
+ 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4657 "00000000" // /* MW 1 */
+ 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4659 "00000000" // /* MW 1 */
+ 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4661 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+ 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4663 "11111100" // /* MW 5 */
+ 4664 "10111110" // /* MW 4 */
+ 4665 "00011111" // /* MW 3 */
+ 4666 "10101101" // /* MW 2 */
+ 4667 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4669 "00000001" // /* MW 5 */
+ 4670 "01000000" // /* MW 4 */
+ 4671 "01000000" // /* MW 3 */
+ 4672 "00001001" // /* MW 2 */
+ 4673 "01100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.delay_slot
+ 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4675 "01001000" // /* MW 3 */
+ 4676 "10010011" // /* MW 2 */
+ 4677 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4679 "10010000" // /* MW 3 */
+ 4680 "11111110" // /* MW 2 */
+ 4681 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4683 "01100100" // /* MW 3 */
+ 4684 "01101101" // /* MW 2 */
+ 4685 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4687 "01111100" // /* MW 3 */
+ 4688 "11101111" // /* MW 2 */
+ 4689 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 132
+.delay_slot
+ 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4691 "01100100" // /* MW 3 */
+ 4692 "11100001" // /* MW 2 */
+ 4693 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4695 "00000001" // /* MW 5 */
+ 4696 "01000000" // /* MW 4 */
+ 4697 "01000000" // /* MW 3 */
+ 4698 "00001001" // /* MW 2 */
+ 4699 "11101000" // /* MW 1 */
+.delay_slot
+ 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4701 "00011101" // /* MW 3 */
+ 4702 "11101011" // /* MW 2 */
+ 4703 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4705 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4707 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4709 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4711 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+ 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */
+ 4713 "00100000" // /* MW 9 */
+ 4714 "00000000" // /* MW 8 */
+ 4715 "00000000" // /* MW 7 */
+ 4716 "01010110" // /* MW 6 */
+ 4717 "00000010" // /* MW 5 */
+ 4718 "00000000" // /* MW 4 */
+ 4719 "00000000" // /* MW 3 */
+ 4720 "00111011" // /* MW 2 */
+ 4721 "00000000" // /* MW 1 */
+.delay_slot
+ 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4723 "10011100" // /* MW 3 */
+ 4724 "00011001" // /* MW 2 */
+ 4725 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1849 12
+.delay_slot
+ 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4727 "00000101" // /* MW 3 */
+ 4728 "00100110" // /* MW 2 */
+ 4729 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4731 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4733 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4735 "00000000" // /* MW 1 */
+.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 2 "conv2d_bf16_params.h" 250 71 first
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4737 "01111000" // /* MW 11 */
+ 4738 "11001110" // /* MW 10 */
+ 4739 "00001100" // /* MW 9 */
+ 4740 "00111100" // /* MW 8 */
+ 4741 "10111111" // /* MW 7 */
+ 4742 "10101011" // /* MW 6 */
+ 4743 "00011101" // /* MW 5 */
+ 4744 "11101011" // /* MW 4 */
+ 4745 "00000111" // /* MW 3 */
+ 4746 "10010101" // /* MW 2 */
+ 4747 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4749 "01011101" // /* MW 3 */
+ 4750 "10101011" // /* MW 2 */
+ 4751 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+ 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4753 "10010010" // /* MW 3 */
+ 4754 "01101011" // /* MW 2 */
+ 4755 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4757 "11100111" // /* MW 3 */
+ 4758 "11110111" // /* MW 2 */
+ 4759 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4761 "01000001" // /* MW 5 */
+ 4762 "10110000" // /* MW 4 */
+ 4763 "01001101" // /* MW 3 */
+ 4764 "11110010" // /* MW 2 */
+ 4765 "10101100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4767 "00110010" // /* MW 3 */
+ 4768 "01100111" // /* MW 2 */
+ 4769 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87 first
+ 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4771 "01000100" // /* MW 3 */
+ 4772 "00101001" // /* MW 2 */
+ 4773 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4775 "11110000" // /* MW 3 */
+ 4776 "00110110" // /* MW 2 */
+ 4777 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 152 first
+ 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4779 "10001011" // /* MW 5 */
+ 4780 "11001111" // /* MW 4 */
+ 4781 "11111001" // /* MW 3 */
+ 4782 "00101100" // /* MW 2 */
+ 4783 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320
+.src_ref 2 "conv2d_bf16_params.h" 258 8 first
+ 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */
+ 4785 "01100000" // /* MW 11 */
+ 4786 "00000000" // /* MW 10 */
+ 4787 "00010000" // /* MW 9 */
+ 4788 "01011100" // /* MW 8 */
+ 4789 "00000010" // /* MW 7 */
+ 4790 "10111010" // /* MW 6 */
+ 4791 "01110001" // /* MW 5 */
+ 4792 "01101111" // /* MW 4 */
+ 4793 "10000010" // /* MW 3 */
+ 4794 "10010000" // /* MW 2 */
+ 4795 "00000001" // /* MW 1 */
+.delay_slot
+ 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4797 "01100111" // /* MW 3 */
+ 4798 "10001010" // /* MW 2 */
+ 4799 "00000010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4801 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4803 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4805 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4807 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+ 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4809 "11111110" // /* MW 5 */
+ 4810 "00111111" // /* MW 4 */
+ 4811 "11111010" // /* MW 3 */
+ 4812 "11111111" // /* MW 2 */
+ 4813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71 first
+ 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4815 "01000100" // /* MW 3 */
+ 4816 "10100101" // /* MW 2 */
+ 4817 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4819 "00011100" // /* MW 13 */
+ 4820 "00000000" // /* MW 12 */
+ 4821 "00000000" // /* MW 11 */
+ 4822 "01010111" // /* MW 10 */
+ 4823 "00011010" // /* MW 9 */
+ 4824 "01000000" // /* MW 8 */
+ 4825 "00000000" // /* MW 7 */
+ 4826 "00000000" // /* MW 6 */
+ 4827 "10100011" // /* MW 5 */
+ 4828 "11101100" // /* MW 4 */
+ 4829 "11110110" // /* MW 3 */
+ 4830 "00101100" // /* MW 2 */
+ 4831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368
+.src_ref 2 "conv2d_bf16.h" 1841 65 first
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16.h" 1849 12 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4833 "01011000" // /* MW 9 */
+ 4834 "11111101" // /* MW 8 */
+ 4835 "11001111" // /* MW 7 */
+ 4836 "10000010" // /* MW 6 */
+ 4837 "01000100" // /* MW 5 */
+ 4838 "00100111" // /* MW 4 */
+ 4839 "11010000" // /* MW 3 */
+ 4840 "11010010" // /* MW 2 */
+ 4841 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1841 34
+.src_ref 2 "conv2d_bf16.h" 1842 36
+.src_ref 2 "conv2d_bf16.h" 1842 67
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4843 "01011000" // /* MW 9 */
+ 4844 "00100100" // /* MW 8 */
+ 4845 "00000000" // /* MW 7 */
+ 4846 "11111010" // /* MW 6 */
+ 4847 "01011111" // /* MW 5 */
+ 4848 "00101001" // /* MW 4 */
+ 4849 "00000000" // /* MW 3 */
+ 4850 "01010010" // /* MW 2 */
+ 4851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 67 first
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4853 "01011000" // /* MW 11 */
+ 4854 "11001100" // /* MW 10 */
+ 4855 "00000111" // /* MW 9 */
+ 4856 "00100110" // /* MW 8 */
+ 4857 "01101011" // /* MW 7 */
+ 4858 "10101011" // /* MW 6 */
+ 4859 "00101101" // /* MW 5 */
+ 4860 "11010000" // /* MW 4 */
+ 4861 "11010111" // /* MW 3 */
+ 4862 "01011010" // /* MW 2 */
+ 4863 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1845 80
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4865 "01011000" // /* MW 11 */
+ 4866 "11000100" // /* MW 10 */
+ 4867 "00000000" // /* MW 9 */
+ 4868 "11101010" // /* MW 8 */
+ 4869 "00110111" // /* MW 7 */
+ 4870 "10111111" // /* MW 6 */
+ 4871 "11010101" // /* MW 5 */
+ 4872 "11011110" // /* MW 4 */
+ 4873 "11010111" // /* MW 3 */
+ 4874 "01011110" // /* MW 2 */
+ 4875 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 63 first
+ 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4877 "10110110" // /* MW 3 */
+ 4878 "11111111" // /* MW 2 */
+ 4879 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52 first
+ 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4881 "11110110" // /* MW 3 */
+ 4882 "10001011" // /* MW 2 */
+ 4883 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4885 "10110110" // /* MW 3 */
+ 4886 "00000110" // /* MW 2 */
+ 4887 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+ 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4889 "01011011" // /* MW 5 */
+ 4890 "00100110" // /* MW 4 */
+ 4891 "11011010" // /* MW 3 */
+ 4892 "11010010" // /* MW 2 */
+ 4893 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4895 "11010110" // /* MW 3 */
+ 4896 "00000111" // /* MW 2 */
+ 4897 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+ 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4899 "00101101" // /* MW 3 */
+ 4900 "10101101" // /* MW 2 */
+ 4901 "00010101" // /* MW 1 */
+ 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4903 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 80 first
+ 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4905 "00111110" // /* MW 3 */
+ 4906 "01100111" // /* MW 2 */
+ 4907 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21 first
+ 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4909 "00011000" // /* MW 3 */
+ 4910 "11100011" // /* MW 2 */
+ 4911 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 12
+ 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */
+ 4913 "00000001" // /* MW 5 */
+ 4914 "01000000" // /* MW 4 */
+ 4915 "11010000" // /* MW 3 */
+ 4916 "00001001" // /* MW 2 */
+ 4917 "10001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+.src_ref 2 "conv2d_bf16.h" 1842 75 first
+.delay_slot
+ 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4919 "10110010" // /* MW 5 */
+ 4920 "10110101" // /* MW 4 */
+ 4921 "10111010" // /* MW 3 */
+ 4922 "10100101" // /* MW 2 */
+ 4923 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4925 "10110010" // /* MW 5 */
+ 4926 "10010101" // /* MW 4 */
+ 4927 "10110000" // /* MW 3 */
+ 4928 "01100101" // /* MW 2 */
+ 4929 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+.delay_slot
+ 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4931 "10100000" // /* MW 7 */
+ 4932 "01101000" // /* MW 6 */
+ 4933 "11001010" // /* MW 5 */
+ 4934 "00000001" // /* MW 4 */
+ 4935 "10110000" // /* MW 3 */
+ 4936 "10000100" // /* MW 2 */
+ 4937 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4939 "10000000" // /* MW 3 */
+ 4940 "11010000" // /* MW 2 */
+ 4941 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4943 "11111001" // /* MW 3 */
+ 4944 "01101010" // /* MW 2 */
+ 4945 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4947 "11010000" // /* MW 5 */
+ 4948 "11001000" // /* MW 4 */
+ 4949 "11001110" // /* MW 3 */
+ 4950 "00000111" // /* MW 2 */
+ 4951 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18 first
+ 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4953 "10000000" // /* MW 5 */
+ 4954 "10110100" // /* MW 4 */
+ 4955 "01010000" // /* MW 3 */
+ 4956 "11000100" // /* MW 2 */
+ 4957 "11100000" // /* MW 1 */
+ 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4959 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4961 "00000000" // /* MW 5 */
+ 4962 "00100000" // /* MW 4 */
+ 4963 "00001010" // /* MW 3 */
+ 4964 "01111111" // /* MW 2 */
+ 4965 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4967 "10010001" // /* MW 3 */
+ 4968 "00000010" // /* MW 2 */
+ 4969 "00011000" // /* MW 1 */
+ 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4971 "11100000" // /* MW 3 */
+ 4972 "00010101" // /* MW 2 */
+ 4973 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4975 "01011111" // /* MW 3 */
+ 4976 "01101010" // /* MW 2 */
+ 4977 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4979 "00100101" // /* MW 5 */
+ 4980 "00000001" // /* MW 4 */
+ 4981 "11100000" // /* MW 3 */
+ 4982 "11000110" // /* MW 2 */
+ 4983 "11100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4985 "10000000" // /* MW 3 */
+ 4986 "01111010" // /* MW 2 */
+ 4987 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4989 "00010110" // /* MW 3 */
+ 4990 "01000000" // /* MW 2 */
+ 4991 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4993 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4995 "00000001" // /* MW 3 */
+ 4996 "01000001" // /* MW 2 */
+ 4997 "00011100" // /* MW 1 */
+ 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4999 "00000000" // /* MW 1 */
+ 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5001 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5003 "00110010" // /* MW 3 */
+ 5004 "00000110" // /* MW 2 */
+ 5005 "00000111" // /* MW 1 */
+ 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5007 "00000000" // /* MW 1 */
+ 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5009 "00000000" // /* MW 1 */
+ 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5011 "00000000" // /* MW 1 */
+ 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5013 "00000000" // /* MW 1 */
+ 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5015 "00000000" // /* MW 1 */
+ 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5017 "00000000" // /* MW 1 */
+ 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5019 "01101011" // /* MW 5 */
+ 5020 "10100100" // /* MW 4 */
+ 5021 "11111111" // /* MW 3 */
+ 5022 "00101100" // /* MW 2 */
+ 5023 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560
+.src_ref 2 "conv2d_bf16.h" 881 76
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5025 "00010000" // /* MW 11 */
+ 5026 "00110100" // /* MW 10 */
+ 5027 "10110010" // /* MW 9 */
+ 5028 "11110001" // /* MW 8 */
+ 5029 "00000001" // /* MW 7 */
+ 5030 "00000000" // /* MW 6 */
+ 5031 "00001011" // /* MW 5 */
+ 5032 "10001110" // /* MW 4 */
+ 5033 "10000001" // /* MW 3 */
+ 5034 "10010000" // /* MW 2 */
+ 5035 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.src_ref 2 "conv2d_bf16.h" 876 51 first
+.src_ref 2 "conv2d_bf16.h" 881 76 first
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5037 "01011000" // /* MW 11 */
+ 5038 "00001011" // /* MW 10 */
+ 5039 "01101000" // /* MW 9 */
+ 5040 "10010010" // /* MW 8 */
+ 5041 "00011001" // /* MW 7 */
+ 5042 "00110011" // /* MW 6 */
+ 5043 "10001011" // /* MW 5 */
+ 5044 "10000100" // /* MW 4 */
+ 5045 "01010000" // /* MW 3 */
+ 5046 "01000101" // /* MW 2 */
+ 5047 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5049 "01111000" // /* MW 9 */
+ 5050 "01100000" // /* MW 8 */
+ 5051 "10101010" // /* MW 7 */
+ 5052 "01100101" // /* MW 6 */
+ 5053 "10111001" // /* MW 5 */
+ 5054 "00111001" // /* MW 4 */
+ 5055 "00000000" // /* MW 3 */
+ 5056 "10010110" // /* MW 2 */
+ 5057 "01100001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 883 4 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5059 "01100111" // /* MW 3 */
+ 5060 "00000110" // /* MW 2 */
+ 5061 "00000011" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5063 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 884 4 first
+.aggressive_scheduled_block_id 4
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5065 "00000001" // /* MW 5 */
+ 5066 "00000000" // /* MW 4 */
+ 5067 "00110000" // /* MW 3 */
+ 5068 "00001000" // /* MW 2 */
+ 5069 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5071 "00101101" // /* MW 3 */
+ 5072 "01101011" // /* MW 2 */
+ 5073 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.delay_slot
+ 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5075 "11111001" // /* MW 3 */
+ 5076 "01101010" // /* MW 2 */
+ 5077 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45 first
+.delay_slot
+ 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5079 "00010001" // /* MW 3 */
+ 5080 "01100011" // /* MW 2 */
+ 5081 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.delay_slot
+ 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5083 "00110101" // /* MW 5 */
+ 5084 "00101100" // /* MW 4 */
+ 5085 "10111010" // /* MW 3 */
+ 5086 "01100101" // /* MW 2 */
+ 5087 "10001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.delay_slot
+ 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5089 "00000000" // /* MW 15 */
+ 5090 "00000000" // /* MW 14 */
+ 5091 "10101000" // /* MW 13 */
+ 5092 "11100010" // /* MW 12 */
+ 5093 "10001011" // /* MW 11 */
+ 5094 "00010001" // /* MW 10 */
+ 5095 "10011010" // /* MW 9 */
+ 5096 "00101100" // /* MW 8 */
+ 5097 "01011011" // /* MW 7 */
+ 5098 "00000001" // /* MW 6 */
+ 5099 "00100000" // /* MW 5 */
+ 5100 "00000000" // /* MW 4 */
+ 5101 "11110000" // /* MW 3 */
+ 5102 "00101100" // /* MW 2 */
+ 5103 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.return_address
+ 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5105 "10011001" // /* MW 3 */
+ 5106 "11010100" // /* MW 2 */
+ 5107 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4 first
+.no_stack_arguments
+ 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5109 "00000001" // /* MW 5 */
+ 5110 "00000000" // /* MW 4 */
+ 5111 "00110000" // /* MW 3 */
+ 5112 "00001000" // /* MW 2 */
+ 5113 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5115 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5117 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.delay_slot
+ 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5119 "10010000" // /* MW 3 */
+ 5120 "01010110" // /* MW 2 */
+ 5121 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5123 "10100000" // /* MW 3 */
+ 5124 "01100110" // /* MW 2 */
+ 5125 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5127 "00000000" // /* MW 9 */
+ 5128 "00000000" // /* MW 8 */
+ 5129 "00000000" // /* MW 7 */
+ 5130 "00000000" // /* MW 6 */
+ 5131 "00001011" // /* MW 5 */
+ 5132 "10001111" // /* MW 4 */
+ 5133 "11110000" // /* MW 3 */
+ 5134 "00101100" // /* MW 2 */
+ 5135 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.return_address
+ 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5137 "00001000" // /* MW 9 */
+ 5138 "01100011" // /* MW 8 */
+ 5139 "00110011" // /* MW 7 */
+ 5140 "11101010" // /* MW 6 */
+ 5141 "00110111" // /* MW 5 */
+ 5142 "00000001" // /* MW 4 */
+ 5143 "10000000" // /* MW 3 */
+ 5144 "10011010" // /* MW 2 */
+ 5145 "11010110" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 886 4
+.src_ref 2 "conv2d_bf16.h" 896 23 first
+.src_ref 2 "conv2d_bf16.h" 1123 71
+ 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5147 "01100010" // /* MW 5 */
+ 5148 "00110100" // /* MW 4 */
+ 5149 "11010000" // /* MW 3 */
+ 5150 "10000100" // /* MW 2 */
+ 5151 "10000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5153 "01000110" // /* MW 3 */
+ 5154 "00011100" // /* MW 2 */
+ 5155 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5157 "00100110" // /* MW 3 */
+ 5158 "00011110" // /* MW 2 */
+ 5159 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5161 "01000110" // /* MW 3 */
+ 5162 "00011110" // /* MW 2 */
+ 5163 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5165 "00000110" // /* MW 3 */
+ 5166 "00011100" // /* MW 2 */
+ 5167 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5169 "01100110" // /* MW 3 */
+ 5170 "00011100" // /* MW 2 */
+ 5171 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5173 "01100110" // /* MW 3 */
+ 5174 "00011110" // /* MW 2 */
+ 5175 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23 first
+ 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5177 "11010110" // /* MW 3 */
+ 5178 "00011110" // /* MW 2 */
+ 5179 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5181 "00110110" // /* MW 3 */
+ 5182 "00011110" // /* MW 2 */
+ 5183 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5185 "10010110" // /* MW 3 */
+ 5186 "00011111" // /* MW 2 */
+ 5187 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5189 "10110110" // /* MW 3 */
+ 5190 "00011110" // /* MW 2 */
+ 5191 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5193 "11110110" // /* MW 3 */
+ 5194 "00011110" // /* MW 2 */
+ 5195 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5197 "10011110" // /* MW 3 */
+ 5198 "00011101" // /* MW 2 */
+ 5199 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5201 "00100110" // /* MW 3 */
+ 5202 "00011101" // /* MW 2 */
+ 5203 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5205 "10100110" // /* MW 3 */
+ 5206 "00011100" // /* MW 2 */
+ 5207 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5209 "11000110" // /* MW 3 */
+ 5210 "00011100" // /* MW 2 */
+ 5211 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5213 "10100110" // /* MW 3 */
+ 5214 "00011110" // /* MW 2 */
+ 5215 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5217 "11010110" // /* MW 3 */
+ 5218 "00011111" // /* MW 2 */
+ 5219 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5221 "10110110" // /* MW 3 */
+ 5222 "00011111" // /* MW 2 */
+ 5223 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5225 "11100110" // /* MW 3 */
+ 5226 "00011100" // /* MW 2 */
+ 5227 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5229 "01001010" // /* MW 3 */
+ 5230 "11000010" // /* MW 2 */
+ 5231 "00000100" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25
+ 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5233 "10010001" // /* MW 3 */
+ 5234 "11010010" // /* MW 2 */
+ 5235 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5237 "01010110" // /* MW 3 */
+ 5238 "00000100" // /* MW 2 */
+ 5239 "00000100" // /* MW 1 */
+ 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5241 "00000000" // /* MW 1 */
+ 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5243 "00000000" // /* MW 1 */
+ 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5245 "00000000" // /* MW 1 */
+ 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5247 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5249 "00101100" // /* MW 3 */
+ 5250 "11100111" // /* MW 2 */
+ 5251 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 12
+ 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */
+ 5253 "00000001" // /* MW 5 */
+ 5254 "01000000" // /* MW 4 */
+ 5255 "00010000" // /* MW 3 */
+ 5256 "00001100" // /* MW 2 */
+ 5257 "10011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4
+.delay_slot
+ 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5259 "11010000" // /* MW 5 */
+ 5260 "11001000" // /* MW 4 */
+ 5261 "11000100" // /* MW 3 */
+ 5262 "00000111" // /* MW 2 */
+ 5263 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4 first
+.delay_slot
+ 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5265 "10100111" // /* MW 3 */
+ 5266 "00000101" // /* MW 2 */
+ 5267 "00000010" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5269 "01110010" // /* MW 3 */
+ 5270 "11010001" // /* MW 2 */
+ 5271 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5273 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5275 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 71 first
+ 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5277 "01011000" // /* MW 9 */
+ 5278 "10000100" // /* MW 8 */
+ 5279 "10000000" // /* MW 7 */
+ 5280 "00111111" // /* MW 6 */
+ 5281 "10111001" // /* MW 5 */
+ 5282 "00011011" // /* MW 4 */
+ 5283 "00100000" // /* MW 3 */
+ 5284 "01000011" // /* MW 2 */
+ 5285 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+.src_ref 2 "conv2d_bf16.h" 1154 80
+ 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5287 "01011000" // /* MW 9 */
+ 5288 "00111100" // /* MW 8 */
+ 5289 "00000000" // /* MW 7 */
+ 5290 "00001010" // /* MW 6 */
+ 5291 "00100000" // /* MW 5 */
+ 5292 "00111101" // /* MW 4 */
+ 5293 "00000000" // /* MW 3 */
+ 5294 "00010011" // /* MW 2 */
+ 5295 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5297 "01111000" // /* MW 9 */
+ 5298 "11010000" // /* MW 8 */
+ 5299 "11100100" // /* MW 7 */
+ 5300 "00001011" // /* MW 6 */
+ 5301 "10100000" // /* MW 5 */
+ 5302 "00000001" // /* MW 4 */
+ 5303 "10000000" // /* MW 3 */
+ 5304 "00010100" // /* MW 2 */
+ 5305 "11111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 746 83
+ 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5307 "01111000" // /* MW 11 */
+ 5308 "11000000" // /* MW 10 */
+ 5309 "10100111" // /* MW 9 */
+ 5310 "00000001" // /* MW 8 */
+ 5311 "11010100" // /* MW 7 */
+ 5312 "00011011" // /* MW 6 */
+ 5313 "01001011" // /* MW 5 */
+ 5314 "00011100" // /* MW 4 */
+ 5315 "10000010" // /* MW 3 */
+ 5316 "10011000" // /* MW 2 */
+ 5317 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.src_ref 2 "conv2d_bf16.h" 1199 26
+.src_ref 2 "conv2d_bf16.h" 1200 26
+.src_ref 2 "conv2d_bf16.h" 1201 26
+.src_ref 2 "conv2d_bf16.h" 1202 26
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5319 "01011000" // /* MW 11 */
+ 5320 "00000111" // /* MW 10 */
+ 5321 "11101000" // /* MW 9 */
+ 5322 "10001001" // /* MW 8 */
+ 5323 "11110111" // /* MW 7 */
+ 5324 "00000001" // /* MW 6 */
+ 5325 "01001011" // /* MW 5 */
+ 5326 "00011100" // /* MW 4 */
+ 5327 "00100110" // /* MW 3 */
+ 5328 "10010110" // /* MW 2 */
+ 5329 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 12
+.src_ref 2 "conv2d_bf16.h" 1218 20
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5331 "00010000" // /* MW 9 */
+ 5332 "10100000" // /* MW 8 */
+ 5333 "00110010" // /* MW 7 */
+ 5334 "00000101" // /* MW 6 */
+ 5335 "00000000" // /* MW 5 */
+ 5336 "00000000" // /* MW 4 */
+ 5337 "00100000" // /* MW 3 */
+ 5338 "11001010" // /* MW 2 */
+ 5339 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 749 26
+.src_ref 2 "conv2d_bf16.h" 750 26
+.src_ref 2 "conv2d_bf16.h" 751 26
+.src_ref 2 "conv2d_bf16.h" 752 26
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5341 "01011000" // /* MW 9 */
+ 5342 "00001100" // /* MW 8 */
+ 5343 "10001011" // /* MW 7 */
+ 5344 "00010010" // /* MW 6 */
+ 5345 "01101001" // /* MW 5 */
+ 5346 "00110100" // /* MW 4 */
+ 5347 "00100000" // /* MW 3 */
+ 5348 "00110110" // /* MW 2 */
+ 5349 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1873
+ 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5351 "01011000" // /* MW 11 */
+ 5352 "00000000" // /* MW 10 */
+ 5353 "00001000" // /* MW 9 */
+ 5354 "00001011" // /* MW 8 */
+ 5355 "10010000" // /* MW 7 */
+ 5356 "00000001" // /* MW 6 */
+ 5357 "00100000" // /* MW 5 */
+ 5358 "11010111" // /* MW 4 */
+ 5359 "00101001" // /* MW 3 */
+ 5360 "10000111" // /* MW 2 */
+ 5361 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5363 "00010110" // /* MW 3 */
+ 5364 "10001000" // /* MW 2 */
+ 5365 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5367 "00100110" // /* MW 3 */
+ 5368 "10101011" // /* MW 2 */
+ 5369 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5371 "01110110" // /* MW 3 */
+ 5372 "00101111" // /* MW 2 */
+ 5373 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 80 first
+ 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5375 "10000110" // /* MW 3 */
+ 5376 "00011110" // /* MW 2 */
+ 5377 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 80 first
+ 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5379 "11000110" // /* MW 3 */
+ 5380 "10001010" // /* MW 2 */
+ 5381 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 87 first
+ 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5383 "00000110" // /* MW 3 */
+ 5384 "10011110" // /* MW 2 */
+ 5385 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 83 first
+ 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5387 "00110110" // /* MW 3 */
+ 5388 "00011100" // /* MW 2 */
+ 5389 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 83 first
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5391 "00000010" // /* MW 5 */
+ 5392 "00000110" // /* MW 4 */
+ 5393 "11011101" // /* MW 3 */
+ 5394 "00000010" // /* MW 2 */
+ 5395 "10011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 66 first
+ 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5397 "01110110" // /* MW 3 */
+ 5398 "00010100" // /* MW 2 */
+ 5399 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1206 63 first
+ 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5401 "10010110" // /* MW 3 */
+ 5402 "00000100" // /* MW 2 */
+ 5403 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89
+ 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5405 "00000000" // /* MW 3 */
+ 5406 "11011010" // /* MW 2 */
+ 5407 "00011001" // /* MW 1 */
+ 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5409 "10011001" // /* MW 3 */
+ 5410 "10000011" // /* MW 2 */
+ 5411 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89
+ 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5413 "00000000" // /* MW 3 */
+ 5414 "00011011" // /* MW 2 */
+ 5415 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5417 "10011001" // /* MW 3 */
+ 5418 "00001101" // /* MW 2 */
+ 5419 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89 first
+ 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5421 "11100000" // /* MW 3 */
+ 5422 "00000011" // /* MW 2 */
+ 5423 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89 first
+ 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5425 "11000000" // /* MW 5 */
+ 5426 "00010000" // /* MW 4 */
+ 5427 "11101110" // /* MW 3 */
+ 5428 "11111111" // /* MW 2 */
+ 5429 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5431 "01111110" // /* MW 9 */
+ 5432 "10000000" // /* MW 8 */
+ 5433 "10000010" // /* MW 7 */
+ 5434 "00000000" // /* MW 6 */
+ 5435 "00010000" // /* MW 5 */
+ 5436 "00000000" // /* MW 4 */
+ 5437 "11110000" // /* MW 3 */
+ 5438 "00101100" // /* MW 2 */
+ 5439 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1147 31 first
+.src_ref 2 "conv2d_bf16.h" 1187 40 first
+.loop_nesting 1
+ 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5441 "01111000" // /* MW 11 */
+ 5442 "10010000" // /* MW 10 */
+ 5443 "00110011" // /* MW 9 */
+ 5444 "11101100" // /* MW 8 */
+ 5445 "11100111" // /* MW 7 */
+ 5446 "00000100" // /* MW 6 */
+ 5447 "00001011" // /* MW 5 */
+ 5448 "10000101" // /* MW 4 */
+ 5449 "01110001" // /* MW 3 */
+ 5450 "10000101" // /* MW 2 */
+ 5451 "11000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1188 50 first
+ 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5453 "10100000" // /* MW 11 */
+ 5454 "10011000" // /* MW 10 */
+ 5455 "00110011" // /* MW 9 */
+ 5456 "00000010" // /* MW 8 */
+ 5457 "01001011" // /* MW 7 */
+ 5458 "00001110" // /* MW 6 */
+ 5459 "00101011" // /* MW 5 */
+ 5460 "00101000" // /* MW 4 */
+ 5461 "01111000" // /* MW 3 */
+ 5462 "10000001" // /* MW 2 */
+ 5463 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+ 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5465 "01110000" // /* MW 11 */
+ 5466 "10000000" // /* MW 10 */
+ 5467 "11000110" // /* MW 9 */
+ 5468 "00000011" // /* MW 8 */
+ 5469 "01001011" // /* MW 7 */
+ 5470 "01011010" // /* MW 6 */
+ 5471 "00101111" // /* MW 5 */
+ 5472 "00101000" // /* MW 4 */
+ 5473 "01111000" // /* MW 3 */
+ 5474 "00111001" // /* MW 2 */
+ 5475 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1149 31 first
+ 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5477 "01110000" // /* MW 11 */
+ 5478 "00000000" // /* MW 10 */
+ 5479 "10000010" // /* MW 9 */
+ 5480 "00000001" // /* MW 8 */
+ 5481 "00001011" // /* MW 7 */
+ 5482 "01010011" // /* MW 6 */
+ 5483 "00101011" // /* MW 5 */
+ 5484 "00000011" // /* MW 4 */
+ 5485 "01110100" // /* MW 3 */
+ 5486 "00001101" // /* MW 2 */
+ 5487 "11011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+ 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5489 "01011110" // /* MW 9 */
+ 5490 "00000000" // /* MW 8 */
+ 5491 "11000000" // /* MW 7 */
+ 5492 "00000001" // /* MW 6 */
+ 5493 "11010100" // /* MW 5 */
+ 5494 "00010010" // /* MW 4 */
+ 5495 "01110100" // /* MW 3 */
+ 5496 "01000001" // /* MW 2 */
+ 5497 "01110001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1152 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+ 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5499 "00010000" // /* MW 11 */
+ 5500 "01000000" // /* MW 10 */
+ 5501 "10111011" // /* MW 9 */
+ 5502 "00000101" // /* MW 8 */
+ 5503 "00000000" // /* MW 7 */
+ 5504 "00000000" // /* MW 6 */
+ 5505 "00101000" // /* MW 5 */
+ 5506 "00101000" // /* MW 4 */
+ 5507 "01111000" // /* MW 3 */
+ 5508 "10010101" // /* MW 2 */
+ 5509 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 1154 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8
+ 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5511 "00010000" // /* MW 11 */
+ 5512 "00101000" // /* MW 10 */
+ 5513 "01111011" // /* MW 9 */
+ 5514 "00000100" // /* MW 8 */
+ 5515 "00000000" // /* MW 7 */
+ 5516 "00000000" // /* MW 6 */
+ 5517 "00101000" // /* MW 5 */
+ 5518 "00101000" // /* MW 4 */
+ 5519 "01111000" // /* MW 3 */
+ 5520 "00011101" // /* MW 2 */
+ 5521 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+ 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5523 "00101000" // /* MW 5 */
+ 5524 "00000001" // /* MW 4 */
+ 5525 "01110100" // /* MW 3 */
+ 5526 "10110101" // /* MW 2 */
+ 5527 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1157 31 first
+ 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5529 "00101000" // /* MW 5 */
+ 5530 "00100010" // /* MW 4 */
+ 5531 "01111000" // /* MW 3 */
+ 5532 "10100101" // /* MW 2 */
+ 5533 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1159 31 first
+ 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5535 "00101000" // /* MW 5 */
+ 5536 "00101000" // /* MW 4 */
+ 5537 "01111000" // /* MW 3 */
+ 5538 "00101101" // /* MW 2 */
+ 5539 "11011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5541 "00101000" // /* MW 5 */
+ 5542 "00101000" // /* MW 4 */
+ 5543 "01111000" // /* MW 3 */
+ 5544 "10000001" // /* MW 2 */
+ 5545 "00100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1192 29 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5547 "00101000" // /* MW 5 */
+ 5548 "00000001" // /* MW 4 */
+ 5549 "01110100" // /* MW 3 */
+ 5550 "10111101" // /* MW 2 */
+ 5551 "10000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5553 "11101110" // /* MW 9 */
+ 5554 "11000011" // /* MW 8 */
+ 5555 "10011010" // /* MW 7 */
+ 5556 "00000010" // /* MW 6 */
+ 5557 "00010100" // /* MW 5 */
+ 5558 "00010001" // /* MW 4 */
+ 5559 "01110100" // /* MW 3 */
+ 5560 "11001101" // /* MW 2 */
+ 5561 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1162 81
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5563 "11100000" // /* MW 11 */
+ 5564 "11000001" // /* MW 10 */
+ 5565 "10011010" // /* MW 9 */
+ 5566 "00000001" // /* MW 8 */
+ 5567 "10001011" // /* MW 7 */
+ 5568 "10011000" // /* MW 6 */
+ 5569 "00101100" // /* MW 5 */
+ 5570 "00101000" // /* MW 4 */
+ 5571 "01111000" // /* MW 3 */
+ 5572 "11000101" // /* MW 2 */
+ 5573 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5575 "11101001" // /* MW 9 */
+ 5576 "00010100" // /* MW 8 */
+ 5577 "01001000" // /* MW 7 */
+ 5578 "00011101" // /* MW 6 */
+ 5579 "01010100" // /* MW 5 */
+ 5580 "00000000" // /* MW 4 */
+ 5581 "01110011" // /* MW 3 */
+ 5582 "10000001" // /* MW 2 */
+ 5583 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5585 "11101001" // /* MW 13 */
+ 5586 "00101100" // /* MW 12 */
+ 5587 "01001001" // /* MW 11 */
+ 5588 "00000111" // /* MW 10 */
+ 5589 "01011000" // /* MW 9 */
+ 5590 "01011100" // /* MW 8 */
+ 5591 "00000000" // /* MW 7 */
+ 5592 "00000000" // /* MW 6 */
+ 5593 "10010110" // /* MW 5 */
+ 5594 "10010100" // /* MW 4 */
+ 5595 "01110110" // /* MW 3 */
+ 5596 "00110101" // /* MW 2 */
+ 5597 "11001111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1162 81 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5599 "00001001" // /* MW 13 */
+ 5600 "01010101" // /* MW 12 */
+ 5601 "01001010" // /* MW 11 */
+ 5602 "00111110" // /* MW 10 */
+ 5603 "10010000" // /* MW 9 */
+ 5604 "01001100" // /* MW 8 */
+ 5605 "00000000" // /* MW 7 */
+ 5606 "00000000" // /* MW 6 */
+ 5607 "10010110" // /* MW 5 */
+ 5608 "00111000" // /* MW 4 */
+ 5609 "01111010" // /* MW 3 */
+ 5610 "10111101" // /* MW 2 */
+ 5611 "10000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1199 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5613 "00111101" // /* MW 13 */
+ 5614 "01100000" // /* MW 12 */
+ 5615 "11111000" // /* MW 11 */
+ 5616 "00011110" // /* MW 10 */
+ 5617 "10010000" // /* MW 9 */
+ 5618 "01010100" // /* MW 8 */
+ 5619 "00000000" // /* MW 7 */
+ 5620 "00000000" // /* MW 6 */
+ 5621 "10010110" // /* MW 5 */
+ 5622 "10011000" // /* MW 4 */
+ 5623 "01110100" // /* MW 3 */
+ 5624 "00000001" // /* MW 2 */
+ 5625 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1200 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5627 "00111101" // /* MW 7 */
+ 5628 "01100100" // /* MW 6 */
+ 5629 "11111001" // /* MW 5 */
+ 5630 "00000100" // /* MW 4 */
+ 5631 "01110000" // /* MW 3 */
+ 5632 "10000001" // /* MW 2 */
+ 5633 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1201 26 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5635 "00111101" // /* MW 7 */
+ 5636 "10001000" // /* MW 6 */
+ 5637 "11111010" // /* MW 5 */
+ 5638 "00000100" // /* MW 4 */
+ 5639 "01110000" // /* MW 3 */
+ 5640 "00001001" // /* MW 2 */
+ 5641 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5643 "00001001" // /* MW 7 */
+ 5644 "01101101" // /* MW 6 */
+ 5645 "01001011" // /* MW 5 */
+ 5646 "00000100" // /* MW 4 */
+ 5647 "01110000" // /* MW 3 */
+ 5648 "00000001" // /* MW 2 */
+ 5649 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5651 "00101000" // /* MW 5 */
+ 5652 "00000001" // /* MW 4 */
+ 5653 "01110100" // /* MW 3 */
+ 5654 "10000001" // /* MW 2 */
+ 5655 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5657 "00010100" // /* MW 3 */
+ 5658 "00010001" // /* MW 2 */
+ 5659 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1202 26 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5661 "00111101" // /* MW 11 */
+ 5662 "10001100" // /* MW 10 */
+ 5663 "11111011" // /* MW 9 */
+ 5664 "10000010" // /* MW 8 */
+ 5665 "01111101" // /* MW 7 */
+ 5666 "01110010" // /* MW 6 */
+ 5667 "00101101" // /* MW 5 */
+ 5668 "00101000" // /* MW 4 */
+ 5669 "01111000" // /* MW 3 */
+ 5670 "00001001" // /* MW 2 */
+ 5671 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5673 "00101001" // /* MW 9 */
+ 5674 "00000110" // /* MW 8 */
+ 5675 "10100000" // /* MW 7 */
+ 5676 "00011101" // /* MW 6 */
+ 5677 "00010100" // /* MW 5 */
+ 5678 "00010100" // /* MW 4 */
+ 5679 "01110100" // /* MW 3 */
+ 5680 "00000001" // /* MW 2 */
+ 5681 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5683 "00001001" // /* MW 13 */
+ 5684 "01000110" // /* MW 12 */
+ 5685 "10100010" // /* MW 11 */
+ 5686 "00001111" // /* MW 10 */
+ 5687 "10101010" // /* MW 9 */
+ 5688 "01011000" // /* MW 8 */
+ 5689 "00000000" // /* MW 7 */
+ 5690 "00000000" // /* MW 6 */
+ 5691 "00101000" // /* MW 5 */
+ 5692 "00000001" // /* MW 4 */
+ 5693 "01110100" // /* MW 3 */
+ 5694 "10000001" // /* MW 2 */
+ 5695 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5697 "01010001" // /* MW 15 */
+ 5698 "00001001" // /* MW 14 */
+ 5699 "11101101" // /* MW 13 */
+ 5700 "00000011" // /* MW 12 */
+ 5701 "11001001" // /* MW 11 */
+ 5702 "00000000" // /* MW 10 */
+ 5703 "00000000" // /* MW 9 */
+ 5704 "00000000" // /* MW 8 */
+ 5705 "01011011" // /* MW 7 */
+ 5706 "00000001" // /* MW 6 */
+ 5707 "00101000" // /* MW 5 */
+ 5708 "00100010" // /* MW 4 */
+ 5709 "11111000" // /* MW 3 */
+ 5710 "00101100" // /* MW 2 */
+ 5711 "00000000" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.begin_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5713 "01010000" // /* MW 15 */
+ 5714 "00011011" // /* MW 14 */
+ 5715 "11101101" // /* MW 13 */
+ 5716 "00000001" // /* MW 12 */
+ 5717 "01001001" // /* MW 11 */
+ 5718 "00000001" // /* MW 10 */
+ 5719 "00000000" // /* MW 9 */
+ 5720 "00000000" // /* MW 8 */
+ 5721 "01011011" // /* MW 7 */
+ 5722 "00000001" // /* MW 6 */
+ 5723 "00101000" // /* MW 5 */
+ 5724 "00101000" // /* MW 4 */
+ 5725 "01111000" // /* MW 3 */
+ 5726 "00001001" // /* MW 2 */
+ 5727 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5729 "00110001" // /* MW 15 */
+ 5730 "00000000" // /* MW 14 */
+ 5731 "01111101" // /* MW 13 */
+ 5732 "10100101" // /* MW 12 */
+ 5733 "00000001" // /* MW 11 */
+ 5734 "00000000" // /* MW 10 */
+ 5735 "00000000" // /* MW 9 */
+ 5736 "00000000" // /* MW 8 */
+ 5737 "01011011" // /* MW 7 */
+ 5738 "00000001" // /* MW 6 */
+ 5739 "00101000" // /* MW 5 */
+ 5740 "00101000" // /* MW 4 */
+ 5741 "01111000" // /* MW 3 */
+ 5742 "00000001" // /* MW 2 */
+ 5743 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5745 "00110000" // /* MW 15 */
+ 5746 "00010010" // /* MW 14 */
+ 5747 "01111101" // /* MW 13 */
+ 5748 "10100101" // /* MW 12 */
+ 5749 "00000001" // /* MW 11 */
+ 5750 "00000000" // /* MW 10 */
+ 5751 "00000000" // /* MW 9 */
+ 5752 "00000000" // /* MW 8 */
+ 5753 "01011011" // /* MW 7 */
+ 5754 "00000001" // /* MW 6 */
+ 5755 "00101000" // /* MW 5 */
+ 5756 "00000001" // /* MW 4 */
+ 5757 "01110100" // /* MW 3 */
+ 5758 "10000001" // /* MW 2 */
+ 5759 "00100010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.end_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5761 "01010001" // /* MW 15 */
+ 5762 "00001001" // /* MW 14 */
+ 5763 "11101101" // /* MW 13 */
+ 5764 "00000011" // /* MW 12 */
+ 5765 "11001001" // /* MW 11 */
+ 5766 "00000000" // /* MW 10 */
+ 5767 "00000000" // /* MW 9 */
+ 5768 "00000000" // /* MW 8 */
+ 5769 "01011011" // /* MW 7 */
+ 5770 "00000001" // /* MW 6 */
+ 5771 "00101000" // /* MW 5 */
+ 5772 "00100010" // /* MW 4 */
+ 5773 "11111000" // /* MW 3 */
+ 5774 "00101100" // /* MW 2 */
+ 5775 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5777 "00001001" // /* MW 13 */
+ 5778 "01101010" // /* MW 12 */
+ 5779 "10100011" // /* MW 11 */
+ 5780 "00011110" // /* MW 10 */
+ 5781 "10010000" // /* MW 9 */
+ 5782 "01010100" // /* MW 8 */
+ 5783 "00000000" // /* MW 7 */
+ 5784 "00000000" // /* MW 6 */
+ 5785 "10010110" // /* MW 5 */
+ 5786 "10111100" // /* MW 4 */
+ 5787 "01111100" // /* MW 3 */
+ 5788 "00001001" // /* MW 2 */
+ 5789 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5791 "00101001" // /* MW 13 */
+ 5792 "00000110" // /* MW 12 */
+ 5793 "10100000" // /* MW 11 */
+ 5794 "00000111" // /* MW 10 */
+ 5795 "00111000" // /* MW 9 */
+ 5796 "01111100" // /* MW 8 */
+ 5797 "00000000" // /* MW 7 */
+ 5798 "00000000" // /* MW 6 */
+ 5799 "10010110" // /* MW 5 */
+ 5800 "00011100" // /* MW 4 */
+ 5801 "01111110" // /* MW 3 */
+ 5802 "00000001" // /* MW 2 */
+ 5803 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5805 "00001001" // /* MW 9 */
+ 5806 "01000110" // /* MW 8 */
+ 5807 "10100010" // /* MW 7 */
+ 5808 "11100100" // /* MW 6 */
+ 5809 "00000000" // /* MW 5 */
+ 5810 "01010101" // /* MW 4 */
+ 5811 "01100001" // /* MW 3 */
+ 5812 "10010001" // /* MW 2 */
+ 5813 "01100001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5815 "00101001" // /* MW 9 */
+ 5816 "00101010" // /* MW 8 */
+ 5817 "10100001" // /* MW 7 */
+ 5818 "11000100" // /* MW 6 */
+ 5819 "00000111" // /* MW 5 */
+ 5820 "10010010" // /* MW 4 */
+ 5821 "01100001" // /* MW 3 */
+ 5822 "11000001" // /* MW 2 */
+ 5823 "01101010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5825 "00001001" // /* MW 9 */
+ 5826 "01101010" // /* MW 8 */
+ 5827 "10100011" // /* MW 7 */
+ 5828 "11000100" // /* MW 6 */
+ 5829 "00000011" // /* MW 5 */
+ 5830 "10010010" // /* MW 4 */
+ 5831 "01100010" // /* MW 3 */
+ 5832 "10000001" // /* MW 2 */
+ 5833 "11101011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1285 32 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5835 "00101001" // /* MW 11 */
+ 5836 "00000110" // /* MW 10 */
+ 5837 "10100000" // /* MW 9 */
+ 5838 "11100110" // /* MW 8 */
+ 5839 "00000000" // /* MW 7 */
+ 5840 "10001111" // /* MW 6 */
+ 5841 "00100010" // /* MW 5 */
+ 5842 "01010111" // /* MW 4 */
+ 5843 "01101111" // /* MW 3 */
+ 5844 "10010001" // /* MW 2 */
+ 5845 "10110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5847 "00001001" // /* MW 9 */
+ 5848 "01000110" // /* MW 8 */
+ 5849 "10100010" // /* MW 7 */
+ 5850 "11100100" // /* MW 6 */
+ 5851 "00000000" // /* MW 5 */
+ 5852 "00000110" // /* MW 4 */
+ 5853 "01100010" // /* MW 3 */
+ 5854 "10010001" // /* MW 2 */
+ 5855 "10010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5857 "00101001" // /* MW 7 */
+ 5858 "00101010" // /* MW 6 */
+ 5859 "10100001" // /* MW 5 */
+ 5860 "11000110" // /* MW 4 */
+ 5861 "00000011" // /* MW 3 */
+ 5862 "10010010" // /* MW 2 */
+ 5863 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5865 "00001001" // /* MW 7 */
+ 5866 "01101010" // /* MW 6 */
+ 5867 "10100011" // /* MW 5 */
+ 5868 "11000110" // /* MW 4 */
+ 5869 "00000111" // /* MW 3 */
+ 5870 "10010010" // /* MW 2 */
+ 5871 "00000001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+ 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5873 "00000000" // /* MW 3 */
+ 5874 "10001011" // /* MW 2 */
+ 5875 "00011111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+ 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5877 "00101001" // /* MW 7 */
+ 5878 "00101010" // /* MW 6 */
+ 5879 "10100001" // /* MW 5 */
+ 5880 "11100110" // /* MW 4 */
+ 5881 "10100000" // /* MW 3 */
+ 5882 "00001011" // /* MW 2 */
+ 5883 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+ 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5885 "00101001" // /* MW 7 */
+ 5886 "00000110" // /* MW 6 */
+ 5887 "10100000" // /* MW 5 */
+ 5888 "11100110" // /* MW 4 */
+ 5889 "10100000" // /* MW 3 */
+ 5890 "10001000" // /* MW 2 */
+ 5891 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+ 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5893 "00001001" // /* MW 9 */
+ 5894 "01101010" // /* MW 8 */
+ 5895 "10100011" // /* MW 7 */
+ 5896 "11100110" // /* MW 6 */
+ 5897 "00000000" // /* MW 5 */
+ 5898 "00000101" // /* MW 4 */
+ 5899 "00100011" // /* MW 3 */
+ 5900 "11110111" // /* MW 2 */
+ 5901 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32 first
+ 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5903 "00001001" // /* MW 11 */
+ 5904 "01000110" // /* MW 10 */
+ 5905 "10100010" // /* MW 9 */
+ 5906 "11100110" // /* MW 8 */
+ 5907 "10100000" // /* MW 7 */
+ 5908 "10000010" // /* MW 6 */
+ 5909 "00100101" // /* MW 5 */
+ 5910 "11010111" // /* MW 4 */
+ 5911 "01101110" // /* MW 3 */
+ 5912 "10001001" // /* MW 2 */
+ 5913 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+ 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5915 "01110000" // /* MW 7 */
+ 5916 "10000000" // /* MW 6 */
+ 5917 "11000101" // /* MW 5 */
+ 5918 "00000011" // /* MW 4 */
+ 5919 "01100000" // /* MW 3 */
+ 5920 "10001001" // /* MW 2 */
+ 5921 "01100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5923 "01110000" // /* MW 7 */
+ 5924 "00000000" // /* MW 6 */
+ 5925 "10000001" // /* MW 5 */
+ 5926 "00000001" // /* MW 4 */
+ 5927 "01100000" // /* MW 3 */
+ 5928 "01000001" // /* MW 2 */
+ 5929 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5931 "01110000" // /* MW 7 */
+ 5932 "01010000" // /* MW 6 */
+ 5933 "10000111" // /* MW 5 */
+ 5934 "00000000" // /* MW 4 */
+ 5935 "11000000" // /* MW 3 */
+ 5936 "00010010" // /* MW 2 */
+ 5937 "10110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5939 "01110000" // /* MW 7 */
+ 5940 "10010000" // /* MW 6 */
+ 5941 "11000111" // /* MW 5 */
+ 5942 "00000010" // /* MW 4 */
+ 5943 "11000000" // /* MW 3 */
+ 5944 "00000010" // /* MW 2 */
+ 5945 "10100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5947 "01110110" // /* MW 9 */
+ 5948 "01100000" // /* MW 8 */
+ 5949 "11001000" // /* MW 7 */
+ 5950 "00000001" // /* MW 6 */
+ 5951 "10010000" // /* MW 5 */
+ 5952 "00111011" // /* MW 4 */
+ 5953 "01100001" // /* MW 3 */
+ 5954 "10010001" // /* MW 2 */
+ 5955 "00010011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5957 "01110000" // /* MW 7 */
+ 5958 "00000000" // /* MW 6 */
+ 5959 "10000011" // /* MW 5 */
+ 5960 "00000000" // /* MW 4 */
+ 5961 "11000000" // /* MW 3 */
+ 5962 "00001010" // /* MW 2 */
+ 5963 "01100010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1218 20 first
+.src_ref 2 "conv2d_bf16.h" 1287 37 first
+ 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */
+ 5965 "01100000" // /* MW 11 */
+ 5966 "00000000" // /* MW 10 */
+ 5967 "00000000" // /* MW 9 */
+ 5968 "11111010" // /* MW 8 */
+ 5969 "00000010" // /* MW 7 */
+ 5970 "00100100" // /* MW 6 */
+ 5971 "00100000" // /* MW 5 */
+ 5972 "01010111" // /* MW 4 */
+ 5973 "11000000" // /* MW 3 */
+ 5974 "00100010" // /* MW 2 */
+ 5975 "01010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 738 8
+.delay_slot
+ 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5977 "01110000" // /* MW 7 */
+ 5978 "01100000" // /* MW 6 */
+ 5979 "10101001" // /* MW 5 */
+ 5980 "00000000" // /* MW 4 */
+ 5981 "11000000" // /* MW 3 */
+ 5982 "00011010" // /* MW 2 */
+ 5983 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5985 "01110000" // /* MW 7 */
+ 5986 "11000000" // /* MW 6 */
+ 5987 "10100111" // /* MW 5 */
+ 5988 "00000011" // /* MW 4 */
+ 5989 "11000000" // /* MW 3 */
+ 5990 "00110010" // /* MW 2 */
+ 5991 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5993 "01110110" // /* MW 9 */
+ 5994 "01100000" // /* MW 8 */
+ 5995 "10110101" // /* MW 7 */
+ 5996 "00000000" // /* MW 6 */
+ 5997 "10010000" // /* MW 5 */
+ 5998 "00101011" // /* MW 4 */
+ 5999 "11000101" // /* MW 3 */
+ 6000 "00111010" // /* MW 2 */
+ 6001 "00010010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.delay_slot
+ 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6003 "01110000" // /* MW 7 */
+ 6004 "10000000" // /* MW 6 */
+ 6005 "11000010" // /* MW 5 */
+ 6006 "00000010" // /* MW 4 */
+ 6007 "11000000" // /* MW 3 */
+ 6008 "00101010" // /* MW 2 */
+ 6009 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.delay_slot
+ 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6011 "01110000" // /* MW 7 */
+ 6012 "11000000" // /* MW 6 */
+ 6013 "01001101" // /* MW 5 */
+ 6014 "00000000" // /* MW 4 */
+ 6015 "01100000" // /* MW 3 */
+ 6016 "10001001" // /* MW 2 */
+ 6017 "11100001" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6019 "11101100" // /* MW 3 */
+ 6020 "11011100" // /* MW 2 */
+ 6021 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6023 "11101100" // /* MW 3 */
+ 6024 "10111100" // /* MW 2 */
+ 6025 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6027 "01110000" // /* MW 7 */
+ 6028 "01110110" // /* MW 6 */
+ 6029 "10101010" // /* MW 5 */
+ 6030 "00000010" // /* MW 4 */
+ 6031 "01100000" // /* MW 3 */
+ 6032 "01011010" // /* MW 2 */
+ 6033 "00111100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6035 "01110000" // /* MW 7 */
+ 6036 "01110110" // /* MW 6 */
+ 6037 "11011010" // /* MW 5 */
+ 6038 "00000001" // /* MW 4 */
+ 6039 "01100000" // /* MW 3 */
+ 6040 "10111010" // /* MW 2 */
+ 6041 "10100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */
+ 6043 "00100001" // /* MW 9 */
+ 6044 "00000000" // /* MW 8 */
+ 6045 "00000000" // /* MW 7 */
+ 6046 "11111110" // /* MW 6 */
+ 6047 "00000010" // /* MW 5 */
+ 6048 "00000000" // /* MW 4 */
+ 6049 "01100000" // /* MW 3 */
+ 6050 "11010010" // /* MW 2 */
+ 6051 "00100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6053 "01110000" // /* MW 7 */
+ 6054 "01110110" // /* MW 6 */
+ 6055 "10100010" // /* MW 5 */
+ 6056 "00000010" // /* MW 4 */
+ 6057 "01100000" // /* MW 3 */
+ 6058 "10111010" // /* MW 2 */
+ 6059 "00100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6061 "11101100" // /* MW 3 */
+ 6062 "10001100" // /* MW 2 */
+ 6063 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6065 "01110000" // /* MW 7 */
+ 6066 "01110110" // /* MW 6 */
+ 6067 "10010110" // /* MW 5 */
+ 6068 "00000010" // /* MW 4 */
+ 6069 "01100000" // /* MW 3 */
+ 6070 "11010010" // /* MW 2 */
+ 6071 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6073 "01110000" // /* MW 7 */
+ 6074 "01110110" // /* MW 6 */
+ 6075 "10001010" // /* MW 5 */
+ 6076 "00000000" // /* MW 4 */
+ 6077 "01100000" // /* MW 3 */
+ 6078 "10111010" // /* MW 2 */
+ 6079 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6081 "00000000" // /* MW 15 */
+ 6082 "00000000" // /* MW 14 */
+ 6083 "01111000" // /* MW 13 */
+ 6084 "10100101" // /* MW 12 */
+ 6085 "00000001" // /* MW 11 */
+ 6086 "00000000" // /* MW 10 */
+ 6087 "00000000" // /* MW 9 */
+ 6088 "00000000" // /* MW 8 */
+ 6089 "10010011" // /* MW 7 */
+ 6090 "10100010" // /* MW 6 */
+ 6091 "00100100" // /* MW 5 */
+ 6092 "00000000" // /* MW 4 */
+ 6093 "11110000" // /* MW 3 */
+ 6094 "00101100" // /* MW 2 */
+ 6095 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632
+.src_ref 4 "vector.hpp" 1152 43
+ 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6097 "10100011" // /* MW 3 */
+ 6098 "11100000" // /* MW 2 */
+ 6099 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6101 "11100011" // /* MW 3 */
+ 6102 "00010100" // /* MW 2 */
+ 6103 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6105 "00100011" // /* MW 3 */
+ 6106 "00000100" // /* MW 2 */
+ 6107 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6109 "01100011" // /* MW 3 */
+ 6110 "00010100" // /* MW 2 */
+ 6111 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6113 "00010011" // /* MW 3 */
+ 6114 "00000110" // /* MW 2 */
+ 6115 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6117 "11100011" // /* MW 3 */
+ 6118 "00010101" // /* MW 2 */
+ 6119 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6121 "01110000" // /* MW 7 */
+ 6122 "10100101" // /* MW 6 */
+ 6123 "00000001" // /* MW 5 */
+ 6124 "00000000" // /* MW 4 */
+ 6125 "01100000" // /* MW 3 */
+ 6126 "00100100" // /* MW 2 */
+ 6127 "10010100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1143 12 first
+ 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6129 "01011000" // /* MW 11 */
+ 6130 "00000000" // /* MW 10 */
+ 6131 "01000000" // /* MW 9 */
+ 6132 "00000001" // /* MW 8 */
+ 6133 "00110101" // /* MW 7 */
+ 6134 "00000110" // /* MW 6 */
+ 6135 "00100000" // /* MW 5 */
+ 6136 "01010111" // /* MW 4 */
+ 6137 "01101111" // /* MW 3 */
+ 6138 "10010010" // /* MW 2 */
+ 6139 "11100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.delay_slot
+ 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6141 "10000000" // /* MW 3 */
+ 6142 "01000100" // /* MW 2 */
+ 6143 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.delay_slot
+ 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6145 "10100000" // /* MW 3 */
+ 6146 "01001001" // /* MW 2 */
+ 6147 "00011010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.delay_slot
+ 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6149 "00000001" // /* MW 5 */
+ 6150 "00011110" // /* MW 4 */
+ 6151 "00000101" // /* MW 3 */
+ 6152 "01110010" // /* MW 2 */
+ 6153 "11101011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.delay_slot
+ 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6155 "10000000" // /* MW 3 */
+ 6156 "01001110" // /* MW 2 */
+ 6157 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6159 "00000000" // /* MW 1 */
+.loop_nesting 0
+ 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */
+ 6161 "00000000" // /* MW 5 */
+ 6162 "00000000" // /* MW 4 */
+ 6163 "01011000" // /* MW 3 */
+ 6164 "00001101" // /* MW 2 */
+ 6165 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6167 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6169 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6171 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6173 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6175 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 1364 80
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6177 "01111000" // /* MW 11 */
+ 6178 "10010000" // /* MW 10 */
+ 6179 "10110011" // /* MW 9 */
+ 6180 "00001000" // /* MW 8 */
+ 6181 "11100001" // /* MW 7 */
+ 6182 "00000100" // /* MW 6 */
+ 6183 "10001011" // /* MW 5 */
+ 6184 "00001100" // /* MW 4 */
+ 6185 "00100010" // /* MW 3 */
+ 6186 "01111110" // /* MW 2 */
+ 6187 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1369 80
+ 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6189 "01111000" // /* MW 11 */
+ 6190 "01000000" // /* MW 10 */
+ 6191 "01100010" // /* MW 9 */
+ 6192 "00000011" // /* MW 8 */
+ 6193 "11010100" // /* MW 7 */
+ 6194 "00011011" // /* MW 6 */
+ 6195 "00001011" // /* MW 5 */
+ 6196 "01010110" // /* MW 4 */
+ 6197 "10000010" // /* MW 3 */
+ 6198 "10010000" // /* MW 2 */
+ 6199 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 807 26
+.src_ref 2 "conv2d_bf16.h" 808 26
+.src_ref 2 "conv2d_bf16.h" 809 26
+.src_ref 2 "conv2d_bf16.h" 810 26
+.src_ref 2 "conv2d_bf16.h" 1436 26
+.src_ref 2 "conv2d_bf16.h" 1437 26
+.src_ref 2 "conv2d_bf16.h" 1438 26
+.src_ref 2 "conv2d_bf16.h" 1439 26
+ 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6201 "01111000" // /* MW 9 */
+ 6202 "11010000" // /* MW 8 */
+ 6203 "00000101" // /* MW 7 */
+ 6204 "10001001" // /* MW 6 */
+ 6205 "00110001" // /* MW 5 */
+ 6206 "00011001" // /* MW 4 */
+ 6207 "00000000" // /* MW 3 */
+ 6208 "10010100" // /* MW 2 */
+ 6209 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 802 83
+.src_ref 2 "conv2d_bf16.h" 1428 39
+ 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6211 "01111000" // /* MW 11 */
+ 6212 "10010000" // /* MW 10 */
+ 6213 "11000111" // /* MW 9 */
+ 6214 "11001010" // /* MW 8 */
+ 6215 "00100000" // /* MW 7 */
+ 6216 "00000001" // /* MW 6 */
+ 6217 "00001011" // /* MW 5 */
+ 6218 "01011100" // /* MW 4 */
+ 6219 "10000110" // /* MW 3 */
+ 6220 "10011000" // /* MW 2 */
+ 6221 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 794 8
+ 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6223 "01111000" // /* MW 11 */
+ 6224 "01010000" // /* MW 10 */
+ 6225 "10000111" // /* MW 9 */
+ 6226 "00001000" // /* MW 8 */
+ 6227 "10010000" // /* MW 7 */
+ 6228 "00000001" // /* MW 6 */
+ 6229 "00001011" // /* MW 5 */
+ 6230 "00000010" // /* MW 4 */
+ 6231 "00100101" // /* MW 3 */
+ 6232 "10000011" // /* MW 2 */
+ 6233 "11111010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 794 8
+.src_ref 2 "conv2d_bf16.h" 1455 20
+ 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6235 "01111000" // /* MW 9 */
+ 6236 "01010000" // /* MW 8 */
+ 6237 "01000101" // /* MW 7 */
+ 6238 "00001011" // /* MW 6 */
+ 6239 "10000000" // /* MW 5 */
+ 6240 "00000001" // /* MW 4 */
+ 6241 "00100000" // /* MW 3 */
+ 6242 "11010110" // /* MW 2 */
+ 6243 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 12
+ 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6245 "00010000" // /* MW 9 */
+ 6246 "01011000" // /* MW 8 */
+ 6247 "00110100" // /* MW 7 */
+ 6248 "00000101" // /* MW 6 */
+ 6249 "00000000" // /* MW 5 */
+ 6250 "00000000" // /* MW 4 */
+ 6251 "00100000" // /* MW 3 */
+ 6252 "00110110" // /* MW 2 */
+ 6253 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80 first
+.src_ref 2 "conv2d_bf16.h" 1873
+ 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6255 "01110010" // /* MW 5 */
+ 6256 "11011111" // /* MW 4 */
+ 6257 "00100110" // /* MW 3 */
+ 6258 "10000111" // /* MW 2 */
+ 6259 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6261 "11000110" // /* MW 3 */
+ 6262 "00011101" // /* MW 2 */
+ 6263 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 80 first
+ 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6265 "00000110" // /* MW 3 */
+ 6266 "10001010" // /* MW 2 */
+ 6267 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 799 87 first
+ 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6269 "10000110" // /* MW 3 */
+ 6270 "10011110" // /* MW 2 */
+ 6271 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 83 first
+ 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6273 "11010110" // /* MW 3 */
+ 6274 "00011110" // /* MW 2 */
+ 6275 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 83 first
+ 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6277 "11110110" // /* MW 3 */
+ 6278 "11001010" // /* MW 2 */
+ 6279 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 66 first
+ 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6281 "10110110" // /* MW 3 */
+ 6282 "00010111" // /* MW 2 */
+ 6283 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1443 71 first
+ 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6285 "10010110" // /* MW 3 */
+ 6286 "00000111" // /* MW 2 */
+ 6287 "00000011" // /* MW 1 */
+ 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6289 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1369 89
+ 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6291 "00000000" // /* MW 3 */
+ 6292 "10011000" // /* MW 2 */
+ 6293 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+.src_ref 2 "conv2d_bf16.h" 1518 37
+ 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6295 "00000000" // /* MW 3 */
+ 6296 "00000111" // /* MW 2 */
+ 6297 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+ 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6299 "00000000" // /* MW 3 */
+ 6300 "11011100" // /* MW 2 */
+ 6301 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89 first
+ 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6303 "11100000" // /* MW 3 */
+ 6304 "00001111" // /* MW 2 */
+ 6305 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 89 first
+ 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6307 "11000000" // /* MW 5 */
+ 6308 "00011110" // /* MW 4 */
+ 6309 "11101110" // /* MW 3 */
+ 6310 "01111111" // /* MW 2 */
+ 6311 "11101111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+ 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6313 "01110000" // /* MW 7 */
+ 6314 "10010000" // /* MW 6 */
+ 6315 "11000111" // /* MW 5 */
+ 6316 "00000011" // /* MW 4 */
+ 6317 "01100000" // /* MW 3 */
+ 6318 "00101011" // /* MW 2 */
+ 6319 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1362 31 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+.loop_nesting 1
+ 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6321 "01100000" // /* MW 13 */
+ 6322 "10000001" // /* MW 12 */
+ 6323 "01110001" // /* MW 11 */
+ 6324 "00000010" // /* MW 10 */
+ 6325 "10010110" // /* MW 9 */
+ 6326 "10001111" // /* MW 8 */
+ 6327 "00000000" // /* MW 7 */
+ 6328 "00000000" // /* MW 6 */
+ 6329 "00101000" // /* MW 5 */
+ 6330 "00101000" // /* MW 4 */
+ 6331 "01111010" // /* MW 3 */
+ 6332 "10000101" // /* MW 2 */
+ 6333 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1364 31 first
+.src_ref 2 "conv2d_bf16.h" 1443 16
+ 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6335 "00010000" // /* MW 11 */
+ 6336 "11001000" // /* MW 10 */
+ 6337 "10111100" // /* MW 9 */
+ 6338 "00000101" // /* MW 8 */
+ 6339 "00000000" // /* MW 7 */
+ 6340 "00000000" // /* MW 6 */
+ 6341 "00101000" // /* MW 5 */
+ 6342 "00101000" // /* MW 4 */
+ 6343 "01111010" // /* MW 3 */
+ 6344 "00001101" // /* MW 2 */
+ 6345 "11001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1428 39 first
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+ 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6347 "01001000" // /* MW 11 */
+ 6348 "00111111" // /* MW 10 */
+ 6349 "10111111" // /* MW 9 */
+ 6350 "01101110" // /* MW 8 */
+ 6351 "11101001" // /* MW 7 */
+ 6352 "00000101" // /* MW 6 */
+ 6353 "00101000" // /* MW 5 */
+ 6354 "00000101" // /* MW 4 */
+ 6355 "01110110" // /* MW 3 */
+ 6356 "10000001" // /* MW 2 */
+ 6357 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6359 "01111110" // /* MW 9 */
+ 6360 "10010000" // /* MW 8 */
+ 6361 "01000111" // /* MW 7 */
+ 6362 "00000001" // /* MW 6 */
+ 6363 "00010100" // /* MW 5 */
+ 6364 "00000001" // /* MW 4 */
+ 6365 "01110011" // /* MW 3 */
+ 6366 "01011001" // /* MW 2 */
+ 6367 "01010101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1367 31 first
+ 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6369 "00101000" // /* MW 5 */
+ 6370 "00000001" // /* MW 4 */
+ 6371 "01110110" // /* MW 3 */
+ 6372 "10010101" // /* MW 2 */
+ 6373 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1369 31 first
+ 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6375 "10101000" // /* MW 5 */
+ 6376 "00100001" // /* MW 4 */
+ 6377 "01111010" // /* MW 3 */
+ 6378 "00011101" // /* MW 2 */
+ 6379 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1372 31 first
+ 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6381 "00101000" // /* MW 5 */
+ 6382 "00101000" // /* MW 4 */
+ 6383 "01111010" // /* MW 3 */
+ 6384 "10100101" // /* MW 2 */
+ 6385 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1374 31 first
+ 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6387 "00101000" // /* MW 5 */
+ 6388 "00101000" // /* MW 4 */
+ 6389 "01111010" // /* MW 3 */
+ 6390 "00101101" // /* MW 2 */
+ 6391 "11001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1377 31 first
+ 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6393 "10101000" // /* MW 5 */
+ 6394 "00000000" // /* MW 4 */
+ 6395 "01110110" // /* MW 3 */
+ 6396 "10110101" // /* MW 2 */
+ 6397 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1379 31 first
+ 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6399 "00101000" // /* MW 5 */
+ 6400 "00000011" // /* MW 4 */
+ 6401 "01110110" // /* MW 3 */
+ 6402 "00111101" // /* MW 2 */
+ 6403 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50 first
+ 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6405 "10101000" // /* MW 5 */
+ 6406 "00000011" // /* MW 4 */
+ 6407 "01110110" // /* MW 3 */
+ 6408 "01000101" // /* MW 2 */
+ 6409 "01101000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6411 "11101110" // /* MW 9 */
+ 6412 "00101101" // /* MW 8 */
+ 6413 "01101001" // /* MW 7 */
+ 6414 "00000001" // /* MW 6 */
+ 6415 "00010100" // /* MW 5 */
+ 6416 "00010010" // /* MW 4 */
+ 6417 "01110101" // /* MW 3 */
+ 6418 "01001101" // /* MW 2 */
+ 6419 "01101000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6421 "11101110" // /* MW 9 */
+ 6422 "00101111" // /* MW 8 */
+ 6423 "10101001" // /* MW 7 */
+ 6424 "00000010" // /* MW 6 */
+ 6425 "00010100" // /* MW 5 */
+ 6426 "00010100" // /* MW 4 */
+ 6427 "01110101" // /* MW 3 */
+ 6428 "10000001" // /* MW 2 */
+ 6429 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6431 "01101001" // /* MW 11 */
+ 6432 "00001011" // /* MW 10 */
+ 6433 "01001000" // /* MW 9 */
+ 6434 "11000010" // /* MW 8 */
+ 6435 "11011011" // /* MW 7 */
+ 6436 "00010001" // /* MW 6 */
+ 6437 "00101010" // /* MW 5 */
+ 6438 "00101000" // /* MW 4 */
+ 6439 "01111010" // /* MW 3 */
+ 6440 "00000001" // /* MW 2 */
+ 6441 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6443 "01101001" // /* MW 9 */
+ 6444 "00110101" // /* MW 8 */
+ 6445 "01001001" // /* MW 7 */
+ 6446 "11000010" // /* MW 6 */
+ 6447 "11011111" // /* MW 5 */
+ 6448 "00010001" // /* MW 4 */
+ 6449 "01110101" // /* MW 3 */
+ 6450 "10000001" // /* MW 2 */
+ 6451 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6453 "01101001" // /* MW 3 */
+ 6454 "01001001" // /* MW 2 */
+ 6455 "01001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6457 "01101001" // /* MW 3 */
+ 6458 "01110101" // /* MW 2 */
+ 6459 "01001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.src_ref 2 "conv2d_bf16.h" 1437 26 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6461 "00111101" // /* MW 9 */
+ 6462 "10000100" // /* MW 8 */
+ 6463 "10100001" // /* MW 7 */
+ 6464 "11000110" // /* MW 6 */
+ 6465 "01011111" // /* MW 5 */
+ 6466 "10001011" // /* MW 4 */
+ 6467 "10101010" // /* MW 3 */
+ 6468 "00000000" // /* MW 2 */
+ 6469 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1436 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6471 "00111101" // /* MW 7 */
+ 6472 "10000000" // /* MW 6 */
+ 6473 "10100000" // /* MW 5 */
+ 6474 "00000000" // /* MW 4 */
+ 6475 "10010100" // /* MW 3 */
+ 6476 "00000001" // /* MW 2 */
+ 6477 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1438 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6479 "00111101" // /* MW 7 */
+ 6480 "10001000" // /* MW 6 */
+ 6481 "10100010" // /* MW 5 */
+ 6482 "00000000" // /* MW 4 */
+ 6483 "11010100" // /* MW 3 */
+ 6484 "00000001" // /* MW 2 */
+ 6485 "00000011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1439 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6487 "00111101" // /* MW 9 */
+ 6488 "10001100" // /* MW 8 */
+ 6489 "10100011" // /* MW 7 */
+ 6490 "00011101" // /* MW 6 */
+ 6491 "00010100" // /* MW 5 */
+ 6492 "00010010" // /* MW 4 */
+ 6493 "01110101" // /* MW 3 */
+ 6494 "00000001" // /* MW 2 */
+ 6495 "01010101" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.begin_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6497 "10110111" // /* MW 5 */
+ 6498 "00010110" // /* MW 4 */
+ 6499 "10000010" // /* MW 3 */
+ 6500 "10000010" // /* MW 2 */
+ 6501 "10100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6503 "00001001" // /* MW 9 */
+ 6504 "00101010" // /* MW 8 */
+ 6505 "10011001" // /* MW 7 */
+ 6506 "11000110" // /* MW 6 */
+ 6507 "01011111" // /* MW 5 */
+ 6508 "00111100" // /* MW 4 */
+ 6509 "00101010" // /* MW 3 */
+ 6510 "00101000" // /* MW 2 */
+ 6511 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6513 "00001001" // /* MW 9 */
+ 6514 "00000100" // /* MW 8 */
+ 6515 "10011000" // /* MW 7 */
+ 6516 "11000110" // /* MW 6 */
+ 6517 "01011011" // /* MW 5 */
+ 6518 "10111100" // /* MW 4 */
+ 6519 "10101001" // /* MW 3 */
+ 6520 "00000000" // /* MW 2 */
+ 6521 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6523 "00001001" // /* MW 7 */
+ 6524 "01101000" // /* MW 6 */
+ 6525 "10011011" // /* MW 5 */
+ 6526 "00000000" // /* MW 4 */
+ 6527 "10010100" // /* MW 3 */
+ 6528 "00000001" // /* MW 2 */
+ 6529 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6531 "00001001" // /* MW 13 */
+ 6532 "01000110" // /* MW 12 */
+ 6533 "10011010" // /* MW 11 */
+ 6534 "01101100" // /* MW 10 */
+ 6535 "00000101" // /* MW 9 */
+ 6536 "00000000" // /* MW 8 */
+ 6537 "00000000" // /* MW 7 */
+ 6538 "00000000" // /* MW 6 */
+ 6539 "10101000" // /* MW 5 */
+ 6540 "00000011" // /* MW 4 */
+ 6541 "01110110" // /* MW 3 */
+ 6542 "10000001" // /* MW 2 */
+ 6543 "00000010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.end_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6545 "00000000" // /* MW 15 */
+ 6546 "00000000" // /* MW 14 */
+ 6547 "11101000" // /* MW 13 */
+ 6548 "10101111" // /* MW 12 */
+ 6549 "01000101" // /* MW 11 */
+ 6550 "00000001" // /* MW 10 */
+ 6551 "00000000" // /* MW 9 */
+ 6552 "00000000" // /* MW 8 */
+ 6553 "01011011" // /* MW 7 */
+ 6554 "00000001" // /* MW 6 */
+ 6555 "00101000" // /* MW 5 */
+ 6556 "00100100" // /* MW 4 */
+ 6557 "01111010" // /* MW 3 */
+ 6558 "00000001" // /* MW 2 */
+ 6559 "01010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6561 "11100000" // /* MW 11 */
+ 6562 "10101101" // /* MW 10 */
+ 6563 "10000101" // /* MW 9 */
+ 6564 "00000000" // /* MW 8 */
+ 6565 "10001011" // /* MW 7 */
+ 6566 "10011100" // /* MW 6 */
+ 6567 "00100101" // /* MW 5 */
+ 6568 "10010111" // /* MW 4 */
+ 6569 "11111111" // /* MW 3 */
+ 6570 "00001100" // /* MW 2 */
+ 6571 "00000111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.src_ref 2 "conv2d_bf16.h" 1517 32 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6573 "00001001" // /* MW 11 */
+ 6574 "00101010" // /* MW 10 */
+ 6575 "10011001" // /* MW 9 */
+ 6576 "11000110" // /* MW 8 */
+ 6577 "01011111" // /* MW 7 */
+ 6578 "00111100" // /* MW 6 */
+ 6579 "00100010" // /* MW 5 */
+ 6580 "00010111" // /* MW 4 */
+ 6581 "01101111" // /* MW 3 */
+ 6582 "10010001" // /* MW 2 */
+ 6583 "10010011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.src_ref 2 "conv2d_bf16.h" 1518 37 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6585 "00001001" // /* MW 11 */
+ 6586 "00000100" // /* MW 10 */
+ 6587 "10011000" // /* MW 9 */
+ 6588 "11000110" // /* MW 8 */
+ 6589 "01011011" // /* MW 7 */
+ 6590 "10111100" // /* MW 6 */
+ 6591 "00100001" // /* MW 5 */
+ 6592 "10010111" // /* MW 4 */
+ 6593 "01101111" // /* MW 3 */
+ 6594 "10010001" // /* MW 2 */
+ 6595 "01110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6597 "00001001" // /* MW 7 */
+ 6598 "01101000" // /* MW 6 */
+ 6599 "10011011" // /* MW 5 */
+ 6600 "11100110" // /* MW 4 */
+ 6601 "10100000" // /* MW 3 */
+ 6602 "10001000" // /* MW 2 */
+ 6603 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.src_ref 2 "conv2d_bf16.h" 1428 39
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6605 "00001001" // /* MW 9 */
+ 6606 "01000110" // /* MW 8 */
+ 6607 "10011010" // /* MW 7 */
+ 6608 "11100110" // /* MW 6 */
+ 6609 "10000000" // /* MW 5 */
+ 6610 "10011011" // /* MW 4 */
+ 6611 "00100000" // /* MW 3 */
+ 6612 "10110111" // /* MW 2 */
+ 6613 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+ 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6615 "01011011" // /* MW 3 */
+ 6616 "00001011" // /* MW 2 */
+ 6617 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6619 "01011111" // /* MW 3 */
+ 6620 "10001011" // /* MW 2 */
+ 6621 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6623 "00001001" // /* MW 7 */
+ 6624 "00000100" // /* MW 6 */
+ 6625 "10011000" // /* MW 5 */
+ 6626 "11000110" // /* MW 4 */
+ 6627 "01011011" // /* MW 3 */
+ 6628 "10111100" // /* MW 2 */
+ 6629 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6631 "00001001" // /* MW 7 */
+ 6632 "00101010" // /* MW 6 */
+ 6633 "10011001" // /* MW 5 */
+ 6634 "11000110" // /* MW 4 */
+ 6635 "01011111" // /* MW 3 */
+ 6636 "00111100" // /* MW 2 */
+ 6637 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6639 "00001001" // /* MW 3 */
+ 6640 "01000110" // /* MW 2 */
+ 6641 "10011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+ 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6643 "00001001" // /* MW 3 */
+ 6644 "01101000" // /* MW 2 */
+ 6645 "10011011" // /* MW 1 */
+ 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6647 "00000000" // /* MW 1 */
+ 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6649 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6651 "00010110" // /* MW 3 */
+ 6652 "00010000" // /* MW 2 */
+ 6653 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6655 "10010110" // /* MW 3 */
+ 6656 "10010000" // /* MW 2 */
+ 6657 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1455 20 first
+ 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */
+ 6659 "01100001" // /* MW 9 */
+ 6660 "00000000" // /* MW 8 */
+ 6661 "00000000" // /* MW 7 */
+ 6662 "01001110" // /* MW 6 */
+ 6663 "00000011" // /* MW 5 */
+ 6664 "00101010" // /* MW 4 */
+ 6665 "11000000" // /* MW 3 */
+ 6666 "00011010" // /* MW 2 */
+ 6667 "00010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.delay_slot
+ 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6669 "01010110" // /* MW 3 */
+ 6670 "00010000" // /* MW 2 */
+ 6671 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6673 "10010110" // /* MW 3 */
+ 6674 "00010001" // /* MW 2 */
+ 6675 "00001001" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6677 "11010110" // /* MW 3 */
+ 6678 "10010001" // /* MW 2 */
+ 6679 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6681 "00010110" // /* MW 3 */
+ 6682 "10010001" // /* MW 2 */
+ 6683 "00001010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6685 "01010110" // /* MW 3 */
+ 6686 "00010001" // /* MW 2 */
+ 6687 "00001100" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6689 "11101100" // /* MW 3 */
+ 6690 "11011100" // /* MW 2 */
+ 6691 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6693 "11101100" // /* MW 3 */
+ 6694 "10001100" // /* MW 2 */
+ 6695 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6697 "01110000" // /* MW 7 */
+ 6698 "01110110" // /* MW 6 */
+ 6699 "10101010" // /* MW 5 */
+ 6700 "00000010" // /* MW 4 */
+ 6701 "01100000" // /* MW 3 */
+ 6702 "01011010" // /* MW 2 */
+ 6703 "10101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6705 "01110000" // /* MW 7 */
+ 6706 "01110110" // /* MW 6 */
+ 6707 "01011010" // /* MW 5 */
+ 6708 "00000000" // /* MW 4 */
+ 6709 "01100000" // /* MW 3 */
+ 6710 "10001010" // /* MW 2 */
+ 6711 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */
+ 6713 "00100001" // /* MW 9 */
+ 6714 "00000000" // /* MW 8 */
+ 6715 "00000000" // /* MW 7 */
+ 6716 "01010010" // /* MW 6 */
+ 6717 "00000011" // /* MW 5 */
+ 6718 "00000000" // /* MW 4 */
+ 6719 "01100000" // /* MW 3 */
+ 6720 "11010010" // /* MW 2 */
+ 6721 "10100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6723 "01110000" // /* MW 7 */
+ 6724 "01110110" // /* MW 6 */
+ 6725 "10001010" // /* MW 5 */
+ 6726 "00000010" // /* MW 4 */
+ 6727 "01100000" // /* MW 3 */
+ 6728 "10001010" // /* MW 2 */
+ 6729 "10100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6731 "11101100" // /* MW 3 */
+ 6732 "10111100" // /* MW 2 */
+ 6733 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6735 "01110000" // /* MW 7 */
+ 6736 "01110110" // /* MW 6 */
+ 6737 "10010110" // /* MW 5 */
+ 6738 "00000010" // /* MW 4 */
+ 6739 "01100000" // /* MW 3 */
+ 6740 "01010010" // /* MW 2 */
+ 6741 "01101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6743 "01110010" // /* MW 9 */
+ 6744 "01110110" // /* MW 8 */
+ 6745 "00100010" // /* MW 7 */
+ 6746 "00000010" // /* MW 6 */
+ 6747 "01010011" // /* MW 5 */
+ 6748 "00010100" // /* MW 4 */
+ 6749 "11110111" // /* MW 3 */
+ 6750 "00101100" // /* MW 2 */
+ 6751 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6753 "00000000" // /* MW 15 */
+ 6754 "00000000" // /* MW 14 */
+ 6755 "01111000" // /* MW 13 */
+ 6756 "10100101" // /* MW 12 */
+ 6757 "00000001" // /* MW 11 */
+ 6758 "00000000" // /* MW 10 */
+ 6759 "00000000" // /* MW 9 */
+ 6760 "00000000" // /* MW 8 */
+ 6761 "10010011" // /* MW 7 */
+ 6762 "11100010" // /* MW 6 */
+ 6763 "00100100" // /* MW 5 */
+ 6764 "00000000" // /* MW 4 */
+ 6765 "11110000" // /* MW 3 */
+ 6766 "00101100" // /* MW 2 */
+ 6767 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304
+.src_ref 4 "vector.hpp" 1152 43
+ 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6769 "10100011" // /* MW 3 */
+ 6770 "01100000" // /* MW 2 */
+ 6771 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6773 "11100011" // /* MW 3 */
+ 6774 "00010100" // /* MW 2 */
+ 6775 "00001100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6777 "00100011" // /* MW 3 */
+ 6778 "00000100" // /* MW 2 */
+ 6779 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6781 "01100011" // /* MW 3 */
+ 6782 "00010100" // /* MW 2 */
+ 6783 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6785 "10100011" // /* MW 3 */
+ 6786 "01100001" // /* MW 2 */
+ 6787 "00001011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6789 "11100011" // /* MW 3 */
+ 6790 "00010101" // /* MW 2 */
+ 6791 "00001111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6793 "01110000" // /* MW 7 */
+ 6794 "10100101" // /* MW 6 */
+ 6795 "00000001" // /* MW 5 */
+ 6796 "00000000" // /* MW 4 */
+ 6797 "01100000" // /* MW 3 */
+ 6798 "00100100" // /* MW 2 */
+ 6799 "10011100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1337 12 first
+ 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6801 "01000000" // /* MW 5 */
+ 6802 "11110101" // /* MW 4 */
+ 6803 "01101110" // /* MW 3 */
+ 6804 "11000010" // /* MW 2 */
+ 6805 "01100010" // /* MW 1 */
+.delay_slot
+ 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6807 "10010000" // /* MW 3 */
+ 6808 "10001011" // /* MW 2 */
+ 6809 "00111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6811 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6813 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6815 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6817 "00000000" // /* MW 15 */
+ 6818 "00000000" // /* MW 14 */
+ 6819 "01111000" // /* MW 13 */
+ 6820 "10100101" // /* MW 12 */
+ 6821 "00000001" // /* MW 11 */
+ 6822 "00000000" // /* MW 10 */
+ 6823 "00000000" // /* MW 9 */
+ 6824 "00000000" // /* MW 8 */
+ 6825 "01011011" // /* MW 7 */
+ 6826 "00000001" // /* MW 6 */
+ 6827 "00100000" // /* MW 5 */
+ 6828 "00000000" // /* MW 4 */
+ 6829 "11110000" // /* MW 3 */
+ 6830 "00101100" // /* MW 2 */
+ 6831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368
+.loop_nesting 0
+ 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6833 "11110001" // /* MW 3 */
+ 6834 "11101101" // /* MW 2 */
+ 6835 "00000111" // /* MW 1 */
+ 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6837 "10010001" // /* MW 3 */
+ 6838 "11110001" // /* MW 2 */
+ 6839 "00000111" // /* MW 1 */
+ 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6841 "00110001" // /* MW 3 */
+ 6842 "11110101" // /* MW 2 */
+ 6843 "00000111" // /* MW 1 */
+ 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6845 "00011001" // /* MW 3 */
+ 6846 "11101011" // /* MW 2 */
+ 6847 "00000111" // /* MW 1 */
+ 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6849 "10011001" // /* MW 3 */
+ 6850 "11111011" // /* MW 2 */
+ 6851 "00000111" // /* MW 1 */
+ 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6853 "11010001" // /* MW 3 */
+ 6854 "11111101" // /* MW 2 */
+ 6855 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873 first
+ 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 6857 "00000000" // /* MW 3 */
+ 6858 "00101000" // /* MW 2 */
+ 6859 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873
+.delay_slot
+ 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6861 "00000001" // /* MW 5 */
+ 6862 "00000000" // /* MW 4 */
+ 6863 "00000000" // /* MW 3 */
+ 6864 "11110000" // /* MW 2 */
+ 6865 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6869 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6871 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0
+ 6873 "00000000" // /* MW 1 */
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 74 first
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 81 4
+.function_start
+ 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6881 "00010000" // /* MW 9 */
+ 6882 "00100000" // /* MW 8 */
+ 6883 "00110010" // /* MW 7 */
+ 6884 "11110010" // /* MW 6 */
+ 6885 "00000001" // /* MW 5 */
+ 6886 "00000000" // /* MW 4 */
+ 6887 "00000000" // /* MW 3 */
+ 6888 "00100000" // /* MW 2 */
+ 6889 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6 first
+.src_ref 7 "superkernels.cpp" 81 4
+ 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6891 "01111000" // /* MW 9 */
+ 6892 "11010000" // /* MW 8 */
+ 6893 "01001011" // /* MW 7 */
+ 6894 "00001000" // /* MW 6 */
+ 6895 "00010000" // /* MW 5 */
+ 6896 "00000000" // /* MW 4 */
+ 6897 "11010000" // /* MW 3 */
+ 6898 "11000010" // /* MW 2 */
+ 6899 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 74
+ 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6901 "00000001" // /* MW 5 */
+ 6902 "00000000" // /* MW 4 */
+ 6903 "00000000" // /* MW 3 */
+ 6904 "00001000" // /* MW 2 */
+ 6905 "00000000" // /* MW 1 */
+ 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6907 "01010101" // /* MW 3 */
+ 6908 "11110000" // /* MW 2 */
+ 6909 "00001111" // /* MW 1 */
+ 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6911 "00000000" // /* MW 1 */
+ 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6913 "00000000" // /* MW 1 */
+ 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6915 "00000000" // /* MW 1 */
+ 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6917 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 79 16
+ 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */
+ 6919 "00000001" // /* MW 5 */
+ 6920 "01000000" // /* MW 4 */
+ 6921 "11011000" // /* MW 3 */
+ 6922 "00001101" // /* MW 2 */
+ 6923 "10000000" // /* MW 1 */
+.delay_slot
+ 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6925 "10011101" // /* MW 3 */
+ 6926 "11111011" // /* MW 2 */
+ 6927 "00001111" // /* MW 1 */
+.delay_slot
+ 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6929 "00011101" // /* MW 3 */
+ 6930 "11111111" // /* MW 2 */
+ 6931 "00001111" // /* MW 1 */
+.delay_slot
+ 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6933 "10011101" // /* MW 3 */
+ 6934 "11101101" // /* MW 2 */
+ 6935 "00001111" // /* MW 1 */
+.delay_slot
+ 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6937 "00111101" // /* MW 3 */
+ 6938 "11110100" // /* MW 2 */
+ 6939 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6940 "01000100" // MOVXM r15, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6941 "10000000" // /* MW 5 */
+ 6942 "10101011" // /* MW 4 */
+ 6943 "11000111" // /* MW 3 */
+ 6944 "00000111" // /* MW 2 */
+ 6945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6947 "00010001" // /* MW 9 */
+ 6948 "00110100" // /* MW 8 */
+ 6949 "10110010" // /* MW 7 */
+ 6950 "11110011" // /* MW 6 */
+ 6951 "00000001" // /* MW 5 */
+ 6952 "00000000" // /* MW 4 */
+ 6953 "01100000" // /* MW 3 */
+ 6954 "10010001" // /* MW 2 */
+ 6955 "11010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6957 "00010000" // /* MW 11 */
+ 6958 "00110010" // /* MW 10 */
+ 6959 "10110010" // /* MW 9 */
+ 6960 "11110011" // /* MW 8 */
+ 6961 "00000001" // /* MW 7 */
+ 6962 "00000000" // /* MW 6 */
+ 6963 "00001011" // /* MW 5 */
+ 6964 "10001111" // /* MW 4 */
+ 6965 "11100001" // /* MW 3 */
+ 6966 "11000000" // /* MW 2 */
+ 6967 "11100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6969 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6971 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */
+ 6973 "00000001" // /* MW 5 */
+ 6974 "00000000" // /* MW 4 */
+ 6975 "01100000" // /* MW 3 */
+ 6976 "00000101" // /* MW 2 */
+ 6977 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6979 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6981 "00110001" // /* MW 3 */
+ 6982 "00100000" // /* MW 2 */
+ 6983 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6985 "00000101" // /* MW 3 */
+ 6986 "00100000" // /* MW 2 */
+ 6987 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6989 "01110000" // /* MW 7 */
+ 6990 "01100000" // /* MW 6 */
+ 6991 "10110000" // /* MW 5 */
+ 6992 "00000011" // /* MW 4 */
+ 6993 "00110000" // /* MW 3 */
+ 6994 "11000010" // /* MW 2 */
+ 6995 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6997 "01110000" // /* MW 11 */
+ 6998 "01100000" // /* MW 10 */
+ 6999 "00110010" // /* MW 9 */
+ 7000 "00000000" // /* MW 8 */
+ 7001 "01011011" // /* MW 7 */
+ 7002 "00000001" // /* MW 6 */
+ 7003 "00100000" // /* MW 5 */
+ 7004 "00000000" // /* MW 4 */
+ 7005 "11110000" // /* MW 3 */
+ 7006 "00101100" // /* MW 2 */
+ 7007 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.return_address
+ 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7009 "10000101" // /* MW 3 */
+ 7010 "01100111" // /* MW 2 */
+ 7011 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19
+.src_ref 7 "superkernels.cpp" 87 35 first
+ 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7013 "00010000" // /* MW 9 */
+ 7014 "00100010" // /* MW 8 */
+ 7015 "10110010" // /* MW 7 */
+ 7016 "11110000" // /* MW 6 */
+ 7017 "00000001" // /* MW 5 */
+ 7018 "00000000" // /* MW 4 */
+ 7019 "01010000" // /* MW 3 */
+ 7020 "11000001" // /* MW 2 */
+ 7021 "01001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 37 first
+.src_ref 7 "superkernels.cpp" 89 13
+ 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7023 "00010000" // /* MW 9 */
+ 7024 "00110000" // /* MW 8 */
+ 7025 "00110010" // /* MW 7 */
+ 7026 "11110000" // /* MW 6 */
+ 7027 "00000001" // /* MW 5 */
+ 7028 "00000000" // /* MW 4 */
+ 7029 "01010000" // /* MW 3 */
+ 7030 "11001111" // /* MW 2 */
+ 7031 "01000011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 73
+ 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7033 "00111010" // /* MW 3 */
+ 7034 "00000110" // /* MW 2 */
+ 7035 "00000010" // /* MW 1 */
+ 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7037 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 110
+ 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7039 "01011010" // /* MW 3 */
+ 7040 "00010110" // /* MW 2 */
+ 7041 "00000010" // /* MW 1 */
+ 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7043 "00000000" // /* MW 1 */
+ 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7045 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19 first
+.src_ref 7 "superkernels.cpp" 113 2
+ 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7047 "01110000" // /* MW 7 */
+ 7048 "01100000" // /* MW 6 */
+ 7049 "10110110" // /* MW 5 */
+ 7050 "00000000" // /* MW 4 */
+ 7051 "00110000" // /* MW 3 */
+ 7052 "11000010" // /* MW 2 */
+ 7053 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 57 first
+ 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7055 "00001111" // /* MW 3 */
+ 7056 "11100001" // /* MW 2 */
+ 7057 "00010100" // /* MW 1 */
+ 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7059 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 94
+ 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7061 "00001111" // /* MW 3 */
+ 7062 "01100001" // /* MW 2 */
+ 7063 "00010100" // /* MW 1 */
+ 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7065 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 28 first
+ 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7067 "00001111" // /* MW 3 */
+ 7068 "10100001" // /* MW 2 */
+ 7069 "00010100" // /* MW 1 */
+ 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7071 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 13
+.src_ref 7 "superkernels.cpp" 113 2
+ 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7073 "00000000" // /* MW 15 */
+ 7074 "00000000" // /* MW 14 */
+ 7075 "01111000" // /* MW 13 */
+ 7076 "01100000" // /* MW 12 */
+ 7077 "00110111" // /* MW 11 */
+ 7078 "00000000" // /* MW 10 */
+ 7079 "00000000" // /* MW 9 */
+ 7080 "10000000" // /* MW 8 */
+ 7081 "00010001" // /* MW 7 */
+ 7082 "00000110" // /* MW 6 */
+ 7083 "00100000" // /* MW 5 */
+ 7084 "00000000" // /* MW 4 */
+ 7085 "11110000" // /* MW 3 */
+ 7086 "00101100" // /* MW 2 */
+ 7087 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 106 12
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 7 "superkernels.cpp" 117 6
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7089 "00010000" // /* MW 9 */
+ 7090 "00100100" // /* MW 8 */
+ 7091 "00110010" // /* MW 7 */
+ 7092 "11110011" // /* MW 6 */
+ 7093 "00000001" // /* MW 5 */
+ 7094 "00000000" // /* MW 4 */
+ 7095 "00100000" // /* MW 3 */
+ 7096 "10111110" // /* MW 2 */
+ 7097 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.src_ref 7 "superkernels.cpp" 108 13
+ 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7099 "00010000" // /* MW 9 */
+ 7100 "00100110" // /* MW 8 */
+ 7101 "00110010" // /* MW 7 */
+ 7102 "11110001" // /* MW 6 */
+ 7103 "00000001" // /* MW 5 */
+ 7104 "00000000" // /* MW 4 */
+ 7105 "11010000" // /* MW 3 */
+ 7106 "11000010" // /* MW 2 */
+ 7107 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11
+.src_ref 7 "superkernels.cpp" 108 13 first
+.src_ref 7 "superkernels.cpp" 139 6
+.src_ref 7 "superkernels.cpp" 140 14
+ 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7109 "00010000" // /* MW 9 */
+ 7110 "00100000" // /* MW 8 */
+ 7111 "10110010" // /* MW 7 */
+ 7112 "11110011" // /* MW 6 */
+ 7113 "00000001" // /* MW 5 */
+ 7114 "00000000" // /* MW 4 */
+ 7115 "11010000" // /* MW 3 */
+ 7116 "11000110" // /* MW 2 */
+ 7117 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+ 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7119 "01010110" // /* MW 3 */
+ 7120 "00000110" // /* MW 2 */
+ 7121 "00000111" // /* MW 1 */
+ 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7123 "00000000" // /* MW 1 */
+ 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7125 "00000000" // /* MW 1 */
+ 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7127 "00000000" // /* MW 1 */
+ 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7129 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 110 6 first
+.src_ref 7 "superkernels.cpp" 110 17 first
+ 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */
+ 7131 "00000001" // /* MW 5 */
+ 7132 "01000000" // /* MW 4 */
+ 7133 "00011000" // /* MW 3 */
+ 7134 "00001110" // /* MW 2 */
+ 7135 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 108 13 first
+.delay_slot
+ 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7137 "00000111" // /* MW 3 */
+ 7138 "01100010" // /* MW 2 */
+ 7139 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.src_ref 7 "superkernels.cpp" 108 13
+.delay_slot
+ 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7141 "00001110" // /* MW 5 */
+ 7142 "01000100" // /* MW 4 */
+ 7143 "00111001" // /* MW 3 */
+ 7144 "11000110" // /* MW 2 */
+ 7145 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.delay_slot
+ 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7147 "00000111" // /* MW 3 */
+ 7148 "00100110" // /* MW 2 */
+ 7149 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12
+.delay_slot
+ 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7151 "01110001" // /* MW 3 */
+ 7152 "00000110" // /* MW 2 */
+ 7153 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.delay_slot
+ 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7155 "00110001" // /* MW 3 */
+ 7156 "00000110" // /* MW 2 */
+ 7157 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7159 "10000110" // /* MW 3 */
+ 7160 "01100111" // /* MW 2 */
+ 7161 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7163 "01110110" // /* MW 3 */
+ 7164 "11111111" // /* MW 2 */
+ 7165 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7167 "00010110" // /* MW 3 */
+ 7168 "11111110" // /* MW 2 */
+ 7169 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7171 "00110110" // /* MW 3 */
+ 7172 "11111110" // /* MW 2 */
+ 7173 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7175 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7177 "00010110" // /* MW 3 */
+ 7178 "01000110" // /* MW 2 */
+ 7179 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7181 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7183 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7185 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7187 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7189 "00000010" // /* MW 3 */
+ 7190 "01100001" // /* MW 2 */
+ 7191 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7193 "00010001" // /* MW 3 */
+ 7194 "00000110" // /* MW 2 */
+ 7195 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7197 "11111101" // /* MW 3 */
+ 7198 "11100010" // /* MW 2 */
+ 7199 "00010111" // /* MW 1 */
+ 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7201 "00000000" // /* MW 1 */
+ 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7203 "00000000" // /* MW 1 */
+ 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7205 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7207 "00011000" // /* MW 9 */
+ 7208 "00010011" // /* MW 8 */
+ 7209 "00000100" // /* MW 7 */
+ 7210 "00000000" // /* MW 6 */
+ 7211 "01011011" // /* MW 5 */
+ 7212 "00000001" // /* MW 4 */
+ 7213 "11110000" // /* MW 3 */
+ 7214 "00101100" // /* MW 2 */
+ 7215 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336
+.src_ref 7 "superkernels.cpp" 113 2 first
+.no_stack_arguments
+ 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */
+ 7217 "00000001" // /* MW 5 */
+ 7218 "00000000" // /* MW 4 */
+ 7219 "10111000" // /* MW 3 */
+ 7220 "00001000" // /* MW 2 */
+ 7221 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7222 "01000100" // MOVXM p3, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7223 "10000000" // /* MW 5 */
+ 7224 "11001011" // /* MW 4 */
+ 7225 "11000110" // /* MW 3 */
+ 7226 "00000111" // /* MW 2 */
+ 7227 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7229 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7233 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7235 "00011100" // /* MW 13 */
+ 7236 "00000000" // /* MW 12 */
+ 7237 "00000000" // /* MW 11 */
+ 7238 "00000111" // /* MW 10 */
+ 7239 "00111101" // /* MW 9 */
+ 7240 "01010011" // /* MW 8 */
+ 7241 "00000000" // /* MW 7 */
+ 7242 "00000000" // /* MW 6 */
+ 7243 "10110110" // /* MW 5 */
+ 7244 "00000010" // /* MW 4 */
+ 7245 "11110000" // /* MW 3 */
+ 7246 "00101100" // /* MW 2 */
+ 7247 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6 first
+.src_ref 7 "superkernels.cpp" 117 20
+.return_address
+ 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7249 "00010000" // /* MW 9 */
+ 7250 "00100010" // /* MW 8 */
+ 7251 "10110010" // /* MW 7 */
+ 7252 "11110000" // /* MW 6 */
+ 7253 "00000001" // /* MW 5 */
+ 7254 "00000000" // /* MW 4 */
+ 7255 "11010000" // /* MW 3 */
+ 7256 "11000010" // /* MW 2 */
+ 7257 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 20
+ 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7259 "00110110" // /* MW 3 */
+ 7260 "00000110" // /* MW 2 */
+ 7261 "00000001" // /* MW 1 */
+ 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7263 "00010001" // /* MW 3 */
+ 7264 "11110000" // /* MW 2 */
+ 7265 "00000111" // /* MW 1 */
+ 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7267 "00000000" // /* MW 1 */
+ 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7269 "00000000" // /* MW 1 */
+ 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7271 "00000000" // /* MW 1 */
+ 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7273 "00000000" // /* MW 1 */
+ 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7275 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 17
+ 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7277 "00001000" // /* MW 3 */
+ 7278 "01100001" // /* MW 2 */
+ 7279 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6
+ 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */
+ 7281 "00000001" // /* MW 5 */
+ 7282 "01000000" // /* MW 4 */
+ 7283 "01100000" // /* MW 3 */
+ 7284 "00001110" // /* MW 2 */
+ 7285 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 7 "superkernels.cpp" 140 14
+.delay_slot
+ 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7287 "00000001" // /* MW 3 */
+ 7288 "00110000" // /* MW 2 */
+ 7289 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7293 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7295 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7297 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7299 "00010100" // /* MW 5 */
+ 7300 "11001111" // /* MW 4 */
+ 7301 "10100010" // /* MW 3 */
+ 7302 "00000000" // /* MW 2 */
+ 7303 "00000100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7305 "00110110" // /* MW 3 */
+ 7306 "00000110" // /* MW 2 */
+ 7307 "00000001" // /* MW 1 */
+ 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7309 "00000000" // /* MW 1 */
+ 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7311 "00000000" // /* MW 1 */
+ 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7313 "00000000" // /* MW 1 */
+ 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7315 "00000000" // /* MW 1 */
+ 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7317 "00000000" // /* MW 1 */
+ 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7319 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7321 "00001000" // /* MW 3 */
+ 7322 "01010001" // /* MW 2 */
+ 7323 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15 first
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7325 "00100011" // /* MW 5 */
+ 7326 "00001110" // /* MW 4 */
+ 7327 "11011100" // /* MW 3 */
+ 7328 "11000110" // /* MW 2 */
+ 7329 "00111100" // /* MW 1 */
+ 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7331 "00000000" // /* MW 1 */
+ 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7333 "00000000" // /* MW 1 */
+ 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7335 "00000000" // /* MW 1 */
+ 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7337 "00000000" // /* MW 1 */
+ 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7339 "00000000" // /* MW 1 */
+ 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7341 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7343 "00010001" // /* MW 3 */
+ 7344 "00100001" // /* MW 2 */
+ 7345 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7347 "00011100" // /* MW 13 */
+ 7348 "00000000" // /* MW 12 */
+ 7349 "00000000" // /* MW 11 */
+ 7350 "01010111" // /* MW 10 */
+ 7351 "00011010" // /* MW 9 */
+ 7352 "01000000" // /* MW 8 */
+ 7353 "00000000" // /* MW 7 */
+ 7354 "00000000" // /* MW 6 */
+ 7355 "00100011" // /* MW 5 */
+ 7356 "11001100" // /* MW 4 */
+ 7357 "11110011" // /* MW 3 */
+ 7358 "00101100" // /* MW 2 */
+ 7359 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480
+.src_ref 7 "superkernels.cpp" 139 6 first
+.src_ref 7 "superkernels.cpp" 139 19
+ 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7361 "00010000" // /* MW 9 */
+ 7362 "00110000" // /* MW 8 */
+ 7363 "00110010" // /* MW 7 */
+ 7364 "11110011" // /* MW 6 */
+ 7365 "00000001" // /* MW 5 */
+ 7366 "00000000" // /* MW 4 */
+ 7367 "11010000" // /* MW 3 */
+ 7368 "11000010" // /* MW 2 */
+ 7369 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 19
+ 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7371 "00110110" // /* MW 3 */
+ 7372 "00000110" // /* MW 2 */
+ 7373 "00000110" // /* MW 1 */
+ 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7375 "10011001" // /* MW 3 */
+ 7376 "11111000" // /* MW 2 */
+ 7377 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+ 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7379 "00111001" // /* MW 3 */
+ 7380 "11110100" // /* MW 2 */
+ 7381 "00000111" // /* MW 1 */
+ 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7383 "00000000" // /* MW 1 */
+ 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7385 "00000000" // /* MW 1 */
+ 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7387 "00000000" // /* MW 1 */
+ 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7389 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 16
+ 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7391 "00001000" // /* MW 3 */
+ 7392 "01100001" // /* MW 2 */
+ 7393 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 6
+ 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */
+ 7395 "00000001" // /* MW 5 */
+ 7396 "01000000" // /* MW 4 */
+ 7397 "10000000" // /* MW 3 */
+ 7398 "00001110" // /* MW 2 */
+ 7399 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7407 "00000000" // /* MW 1 */
+.delay_slot
+ 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7409 "00100000" // /* MW 3 */
+ 7410 "11010000" // /* MW 2 */
+ 7411 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 140 14 first
+ 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7413 "11000001" // /* MW 11 */
+ 7414 "10001000" // /* MW 10 */
+ 7415 "10000011" // /* MW 9 */
+ 7416 "00000011" // /* MW 8 */
+ 7417 "00000000" // /* MW 7 */
+ 7418 "00000000" // /* MW 6 */
+ 7419 "00100000" // /* MW 5 */
+ 7420 "00000000" // /* MW 4 */
+ 7421 "11110000" // /* MW 3 */
+ 7422 "00101100" // /* MW 2 */
+ 7423 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544
+ 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7425 "00011001" // /* MW 3 */
+ 7426 "11111111" // /* MW 2 */
+ 7427 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142 first
+ 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7429 "00000000" // /* MW 3 */
+ 7430 "00101000" // /* MW 2 */
+ 7431 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+.delay_slot
+ 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7433 "00000001" // /* MW 5 */
+ 7434 "00000000" // /* MW 4 */
+ 7435 "00000000" // /* MW 3 */
+ 7436 "11111000" // /* MW 2 */
+ 7437 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7443 "00000000" // /* MW 1 */
+.delay_slot
+ 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7445 "10001011" // /* MW 3 */
+ 7446 "10000100" // /* MW 2 */
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 7447 "00001111" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7457 "00000001" // /* MW 5 */
+ 7458 "00100001" // /* MW 4 */
+ 7459 "00000000" // /* MW 3 */
+ 7460 "00000000" // /* MW 2 */
+ 7461 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7463 "11000000" // /* MW 3 */
+ 7464 "01010000" // /* MW 2 */
+ 7465 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7467 "10010000" // /* MW 3 */
+ 7468 "01100000" // /* MW 2 */
+ 7469 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7471 "00010001" // /* MW 3 */
+ 7472 "00000100" // /* MW 2 */
+ 7473 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7475 "00010001" // /* MW 3 */
+ 7476 "00010100" // /* MW 2 */
+ 7477 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7479 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7489 "00101110" // /* MW 3 */
+ 7490 "00011100" // /* MW 2 */
+ 7491 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7493 "00000001" // /* MW 5 */
+ 7494 "00000000" // /* MW 4 */
+ 7495 "00000000" // /* MW 3 */
+ 7496 "00001000" // /* MW 2 */
+ 7497 "00000000" // /* MW 1 */
+ 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7499 "00111101" // /* MW 3 */
+ 7500 "11111000" // /* MW 2 */
+ 7501 "00001111" // /* MW 1 */
+ 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7503 "11110101" // /* MW 3 */
+ 7504 "11111101" // /* MW 2 */
+ 7505 "00001111" // /* MW 1 */
+ 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7507 "00000000" // /* MW 1 */
+ 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7509 "00000000" // /* MW 1 */
+ 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7511 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7513 "00101001" // /* MW 3 */
+ 7514 "00011100" // /* MW 2 */
+ 7515 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7517 "00101110" // /* MW 3 */
+ 7518 "00011100" // /* MW 2 */
+ 7519 "00000001" // /* MW 1 */
+ 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7521 "00000000" // /* MW 1 */
+ 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7523 "00000000" // /* MW 1 */
+ 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7525 "00000000" // /* MW 1 */
+ 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7527 "00000000" // /* MW 1 */
+ 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7529 "00000000" // /* MW 1 */
+ 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7531 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7533 "00101001" // /* MW 3 */
+ 7534 "00011100" // /* MW 2 */
+ 7535 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7537 "00101110" // /* MW 3 */
+ 7538 "00000100" // /* MW 2 */
+ 7539 "00000001" // /* MW 1 */
+ 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7541 "00000000" // /* MW 1 */
+ 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7543 "00000000" // /* MW 1 */
+ 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7545 "00000000" // /* MW 1 */
+ 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7547 "00000000" // /* MW 1 */
+ 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7549 "00000000" // /* MW 1 */
+ 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7551 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7553 "00101001" // /* MW 3 */
+ 7554 "00011100" // /* MW 2 */
+ 7555 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7557 "00101110" // /* MW 3 */
+ 7558 "00010100" // /* MW 2 */
+ 7559 "00000001" // /* MW 1 */
+ 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7561 "00000000" // /* MW 1 */
+ 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7563 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */
+ 7565 "00000001" // /* MW 5 */
+ 7566 "00000000" // /* MW 4 */
+ 7567 "10010000" // /* MW 3 */
+ 7568 "00001110" // /* MW 2 */
+ 7569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7571 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7573 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7575 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7577 "00101001" // /* MW 3 */
+ 7578 "11011100" // /* MW 2 */
+ 7579 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.delay_slot
+ 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7581 "11000000" // /* MW 3 */
+ 7582 "11010000" // /* MW 2 */
+ 7583 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "add_impl.h" 146 29
+.return_address
+ 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7585 "00001000" // /* MW 9 */
+ 7586 "11000100" // /* MW 8 */
+ 7587 "00110011" // /* MW 7 */
+ 7588 "01101000" // /* MW 6 */
+ 7589 "00000000" // /* MW 5 */
+ 7590 "00000001" // /* MW 4 */
+ 7591 "00100000" // /* MW 3 */
+ 7592 "00000111" // /* MW 2 */
+ 7593 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29
+.src_ref 8 "add_impl.h" 147 37
+.src_ref 8 "add_impl.h" 147 39
+ 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7595 "01011000" // /* MW 9 */
+ 7596 "11111101" // /* MW 8 */
+ 7597 "00000111" // /* MW 7 */
+ 7598 "00001000" // /* MW 6 */
+ 7599 "10000000" // /* MW 5 */
+ 7600 "00000001" // /* MW 4 */
+ 7601 "10000000" // /* MW 3 */
+ 7602 "11100010" // /* MW 2 */
+ 7603 "00000001" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29 first
+.src_ref 8 "add_impl.h" 147 39
+ 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7605 "00000001" // /* MW 9 */
+ 7606 "10100000" // /* MW 8 */
+ 7607 "00000111" // /* MW 7 */
+ 7608 "10000000" // /* MW 6 */
+ 7609 "00010001" // /* MW 5 */
+ 7610 "00001010" // /* MW 4 */
+ 7611 "00100000" // /* MW 3 */
+ 7612 "10111110" // /* MW 2 */
+ 7613 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 50 first
+ 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7615 "01001010" // /* MW 3 */
+ 7616 "00000110" // /* MW 2 */
+ 7617 "00000000" // /* MW 1 */
+ 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7619 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7621 "00000000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 37
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7623 "00010111" // /* MW 3 */
+ 7624 "00000010" // /* MW 2 */
+ 7625 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7627 "00000000" // /* MW 3 */
+ 7628 "00101000" // /* MW 2 */
+ 7629 "00010000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7631 "00000101" // /* MW 3 */
+ 7632 "00100010" // /* MW 2 */
+ 7633 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7635 "00000001" // /* MW 5 */
+ 7636 "00000000" // /* MW 4 */
+ 7637 "00000000" // /* MW 3 */
+ 7638 "11111000" // /* MW 2 */
+ 7639 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7641 "00100111" // /* MW 3 */
+ 7642 "01110111" // /* MW 2 */
+ 7643 "00010100" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 39
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7645 "10000010" // /* MW 3 */
+ 7646 "00100001" // /* MW 2 */
+ 7647 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7649 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 81 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25
+.function_start
+ 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7665 "01111000" // /* MW 9 */
+ 7666 "01100000" // /* MW 8 */
+ 7667 "00001000" // /* MW 7 */
+ 7668 "11001000" // /* MW 6 */
+ 7669 "00010000" // /* MW 5 */
+ 7670 "00000000" // /* MW 4 */
+ 7671 "10000000" // /* MW 3 */
+ 7672 "10000000" // /* MW 2 */
+ 7673 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+ 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7675 "00001100" // /* MW 5 */
+ 7676 "11000000" // /* MW 4 */
+ 7677 "10100000" // /* MW 3 */
+ 7678 "00000000" // /* MW 2 */
+ 7679 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+ 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7681 "01001010" // /* MW 3 */
+ 7682 "00001000" // /* MW 2 */
+ 7683 "00000000" // /* MW 1 */
+ 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7685 "00000000" // /* MW 1 */
+ 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7687 "00000000" // /* MW 1 */
+ 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7689 "00000000" // /* MW 1 */
+ 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7691 "00000000" // /* MW 1 */
+ 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7693 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first
+ 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7695 "00000000" // /* MW 3 */
+ 7696 "00101000" // /* MW 2 */
+ 7697 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.delay_slot
+ 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7699 "00001000" // /* MW 3 */
+ 7700 "10000000" // /* MW 2 */
+ 7701 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first
+.delay_slot
+ 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7703 "00011101" // /* MW 3 */
+ 7704 "00000000" // /* MW 2 */
+ 7705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 23
+.delay_slot
+ 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7707 "11100000" // /* MW 5 */
+ 7708 "00001101" // /* MW 4 */
+ 7709 "00110001" // /* MW 3 */
+ 7710 "10000010" // /* MW 2 */
+ 7711 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.delay_slot
+ 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7713 "00011101" // /* MW 3 */
+ 7714 "11000100" // /* MW 2 */
+ 7715 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 23
+.delay_slot
+ 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7717 "01010001" // /* MW 3 */
+ 7718 "00000100" // /* MW 2 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7719 "00001000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_broadcasting.h" 76
+.src_ref 3 "elementwise_binary_broadcasting.h" 76 first
+.function_start
+ 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7729 "00000001" // /* MW 5 */
+ 7730 "00000000" // /* MW 4 */
+ 7731 "00000000" // /* MW 3 */
+ 7732 "00001000" // /* MW 2 */
+ 7733 "00000000" // /* MW 1 */
+ 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7735 "00111101" // /* MW 3 */
+ 7736 "11111100" // /* MW 2 */
+ 7737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first
+.no_stack_arguments
+ 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */
+ 7739 "00000001" // /* MW 5 */
+ 7740 "00000000" // /* MW 4 */
+ 7741 "10100000" // /* MW 3 */
+ 7742 "00001110" // /* MW 2 */
+ 7743 "00000000" // /* MW 1 */
+.delay_slot
+ 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7745 "10011101" // /* MW 3 */
+ 7746 "11111011" // /* MW 2 */
+ 7747 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+ 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7749 "11000000" // /* MW 3 */
+ 7750 "01100000" // /* MW 2 */
+ 7751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7757 "01100111" // /* MW 3 */
+ 7758 "00000001" // /* MW 2 */
+ 7759 "00000000" // /* MW 1 */
+.return_address
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7761 "10011001" // /* MW 3 */
+ 7762 "11111011" // /* MW 2 */
+ 7763 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7765 "00111001" // /* MW 3 */
+ 7766 "11111100" // /* MW 2 */
+ 7767 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first
+.tail_call
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */
+ 7769 "00000000" // /* MW 5 */
+ 7770 "00000000" // /* MW 4 */
+ 7771 "11111000" // /* MW 3 */
+ 7772 "00001110" // /* MW 2 */
+ 7773 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7775 "11000000" // /* MW 3 */
+ 7776 "01101110" // /* MW 2 */
+ 7777 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first
+.delay_slot
+ 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7779 "00000001" // /* MW 5 */
+ 7780 "00000000" // /* MW 4 */
+ 7781 "00000000" // /* MW 3 */
+ 7782 "11111000" // /* MW 2 */
+ 7783 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7789 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 89 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19
+.function_start
+ 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7793 "01010001" // /* MW 5 */
+ 7794 "00000000" // /* MW 4 */
+ 7795 "11010000" // /* MW 3 */
+ 7796 "10000010" // /* MW 2 */
+ 7797 "01100111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7799 "10000001" // /* MW 5 */
+ 7800 "11001101" // /* MW 4 */
+ 7801 "01011000" // /* MW 3 */
+ 7802 "00000101" // /* MW 2 */
+ 7803 "01100001" // /* MW 1 */
+ 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7805 "00000000" // /* MW 1 */
+ 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7807 "00000000" // /* MW 1 */
+ 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7809 "00000000" // /* MW 1 */
+ 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7811 "00000000" // /* MW 1 */
+ 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7813 "00000000" // /* MW 1 */
+ 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7815 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 12
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 35
+ 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */
+ 7817 "00000001" // /* MW 5 */
+ 7818 "01000000" // /* MW 4 */
+ 7819 "01100000" // /* MW 3 */
+ 7820 "00001111" // /* MW 2 */
+ 7821 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78
+.delay_slot
+ 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7823 "11101001" // /* MW 3 */
+ 7824 "11000100" // /* MW 2 */
+ 7825 "00010111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first
+.delay_slot
+ 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7827 "00101101" // /* MW 3 */
+ 7828 "00000000" // /* MW 2 */
+ 7829 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7835 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first
+ 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7837 "00110010" // /* MW 3 */
+ 7838 "00000100" // /* MW 2 */
+ 7839 "00000000" // /* MW 1 */
+ 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7841 "00000000" // /* MW 1 */
+ 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7843 "00000000" // /* MW 1 */
+ 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7845 "00000000" // /* MW 1 */
+ 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */
+ 7847 "00000000" // /* MW 5 */
+ 7848 "00000000" // /* MW 4 */
+ 7849 "01110000" // /* MW 3 */
+ 7850 "00001111" // /* MW 2 */
+ 7851 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7853 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7857 "01110010" // /* MW 3 */
+ 7858 "00000101" // /* MW 2 */
+ 7859 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7861 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7863 "00000000" // /* MW 9 */
+ 7864 "00000000" // /* MW 8 */
+ 7865 "00000000" // /* MW 7 */
+ 7866 "00000000" // /* MW 6 */
+ 7867 "00010011" // /* MW 5 */
+ 7868 "00000100" // /* MW 4 */
+ 7869 "11110000" // /* MW 3 */
+ 7870 "00101100" // /* MW 2 */
+ 7871 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80
+.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first
+ 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7873 "00110010" // /* MW 3 */
+ 7874 "00000100" // /* MW 2 */
+ 7875 "00000001" // /* MW 1 */
+ 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7877 "00000000" // /* MW 1 */
+ 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7879 "00000000" // /* MW 1 */
+ 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7881 "00000000" // /* MW 1 */
+ 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7883 "00000000" // /* MW 1 */
+ 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7885 "00000000" // /* MW 1 */
+ 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7887 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7889 "01110010" // /* MW 3 */
+ 7890 "00000101" // /* MW 2 */
+ 7891 "00011000" // /* MW 1 */
+ 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7893 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7895 "00000000" // /* MW 9 */
+ 7896 "00000000" // /* MW 8 */
+ 7897 "00000000" // /* MW 7 */
+ 7898 "00000000" // /* MW 6 */
+ 7899 "00010011" // /* MW 5 */
+ 7900 "00000100" // /* MW 4 */
+ 7901 "11110001" // /* MW 3 */
+ 7902 "00101100" // /* MW 2 */
+ 7903 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+ 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7905 "01001000" // /* MW 9 */
+ 7906 "00111111" // /* MW 8 */
+ 7907 "10111000" // /* MW 7 */
+ 7908 "10001010" // /* MW 6 */
+ 7909 "00000111" // /* MW 5 */
+ 7910 "00000000" // /* MW 4 */
+ 7911 "11010000" // /* MW 3 */
+ 7912 "10000000" // /* MW 2 */
+ 7913 "10001010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7915 "00010000" // /* MW 9 */
+ 7916 "10101000" // /* MW 8 */
+ 7917 "01111111" // /* MW 7 */
+ 7918 "00000100" // /* MW 6 */
+ 7919 "00000000" // /* MW 5 */
+ 7920 "00000000" // /* MW 4 */
+ 7921 "11010000" // /* MW 3 */
+ 7922 "10010000" // /* MW 2 */
+ 7923 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7925 "11100000" // /* MW 5 */
+ 7926 "11111110" // /* MW 4 */
+ 7927 "00010110" // /* MW 3 */
+ 7928 "00000000" // /* MW 2 */
+ 7929 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7931 "11010000" // /* MW 5 */
+ 7932 "11001000" // /* MW 4 */
+ 7933 "11001000" // /* MW 3 */
+ 7934 "00000111" // /* MW 2 */
+ 7935 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7937 "00100010" // /* MW 3 */
+ 7938 "00000100" // /* MW 2 */
+ 7939 "00000100" // /* MW 1 */
+ 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7941 "00000000" // /* MW 1 */
+ 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7945 "10101011" // /* MW 3 */
+ 7946 "00001000" // /* MW 2 */
+ 7947 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 189 20 first
+ 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7949 "00101011" // /* MW 3 */
+ 7950 "00101001" // /* MW 2 */
+ 7951 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+ 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7953 "00101011" // /* MW 3 */
+ 7954 "00001000" // /* MW 2 */
+ 7955 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7957 "00101011" // /* MW 3 */
+ 7958 "00101010" // /* MW 2 */
+ 7959 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7961 "00000000" // /* MW 5 */
+ 7962 "11110101" // /* MW 4 */
+ 7963 "01110000" // /* MW 3 */
+ 7964 "00010101" // /* MW 2 */
+ 7965 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7967 "00111101" // /* MW 7 */
+ 7968 "00101000" // /* MW 6 */
+ 7969 "00000011" // /* MW 5 */
+ 7970 "00000100" // /* MW 4 */
+ 7971 "01110000" // /* MW 3 */
+ 7972 "00100101" // /* MW 2 */
+ 7973 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7975 "00101011" // /* MW 3 */
+ 7976 "00001000" // /* MW 2 */
+ 7977 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7979 "00111101" // /* MW 7 */
+ 7980 "00010000" // /* MW 6 */
+ 7981 "00000100" // /* MW 5 */
+ 7982 "00000100" // /* MW 4 */
+ 7983 "01110000" // /* MW 3 */
+ 7984 "01000101" // /* MW 2 */
+ 7985 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7987 "10101011" // /* MW 3 */
+ 7988 "00001000" // /* MW 2 */
+ 7989 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7991 "00111101" // /* MW 7 */
+ 7992 "00101000" // /* MW 6 */
+ 7993 "00000011" // /* MW 5 */
+ 7994 "00000100" // /* MW 4 */
+ 7995 "01110000" // /* MW 3 */
+ 7996 "00100101" // /* MW 2 */
+ 7997 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7999 "00101011" // /* MW 3 */
+ 8000 "00001000" // /* MW 2 */
+ 8001 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8003 "00111101" // /* MW 13 */
+ 8004 "00010000" // /* MW 12 */
+ 8005 "00000100" // /* MW 11 */
+ 8006 "01010111" // /* MW 10 */
+ 8007 "00011010" // /* MW 9 */
+ 8008 "01000000" // /* MW 8 */
+ 8009 "00000000" // /* MW 7 */
+ 8010 "00000000" // /* MW 6 */
+ 8011 "01000110" // /* MW 5 */
+ 8012 "00111011" // /* MW 4 */
+ 8013 "01110100" // /* MW 3 */
+ 8014 "01000101" // /* MW 2 */
+ 8015 "00100101" // /* MW 1 */
+.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8017 "10101011" // /* MW 3 */
+ 8018 "00001000" // /* MW 2 */
+ 8019 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8021 "00111101" // /* MW 11 */
+ 8022 "00101000" // /* MW 10 */
+ 8023 "00000011" // /* MW 9 */
+ 8024 "10001110" // /* MW 8 */
+ 8025 "00010001" // /* MW 7 */
+ 8026 "00001111" // /* MW 6 */
+ 8027 "00100001" // /* MW 5 */
+ 8028 "00000000" // /* MW 4 */
+ 8029 "01110000" // /* MW 3 */
+ 8030 "00100101" // /* MW 2 */
+ 8031 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8033 "00000000" // /* MW 15 */
+ 8034 "00000000" // /* MW 14 */
+ 8035 "01111000" // /* MW 13 */
+ 8036 "10100101" // /* MW 12 */
+ 8037 "00000001" // /* MW 11 */
+ 8038 "00000000" // /* MW 10 */
+ 8039 "00000000" // /* MW 9 */
+ 8040 "00000000" // /* MW 8 */
+ 8041 "01011011" // /* MW 7 */
+ 8042 "00000001" // /* MW 6 */
+ 8043 "00100000" // /* MW 5 */
+ 8044 "00000000" // /* MW 4 */
+ 8045 "01110000" // /* MW 3 */
+ 8046 "00000101" // /* MW 2 */
+ 8047 "00000001" // /* MW 1 */
+.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8049 "10000001" // /* MW 15 */
+ 8050 "00100000" // /* MW 14 */
+ 8051 "01111000" // /* MW 13 */
+ 8052 "10100101" // /* MW 12 */
+ 8053 "00000001" // /* MW 11 */
+ 8054 "00000000" // /* MW 10 */
+ 8055 "00000000" // /* MW 9 */
+ 8056 "00000000" // /* MW 8 */
+ 8057 "10100011" // /* MW 7 */
+ 8058 "00011101" // /* MW 6 */
+ 8059 "00100010" // /* MW 5 */
+ 8060 "00000000" // /* MW 4 */
+ 8061 "01110000" // /* MW 3 */
+ 8062 "01000101" // /* MW 2 */
+ 8063 "00100101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8065 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8067 "00111101" // /* MW 7 */
+ 8068 "00101000" // /* MW 6 */
+ 8069 "00000011" // /* MW 5 */
+ 8070 "00000010" // /* MW 4 */
+ 8071 "01100000" // /* MW 3 */
+ 8072 "11000100" // /* MW 2 */
+ 8073 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8075 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8077 "00111101" // /* MW 7 */
+ 8078 "00010000" // /* MW 6 */
+ 8079 "00000100" // /* MW 5 */
+ 8080 "00000010" // /* MW 4 */
+ 8081 "01100000" // /* MW 3 */
+ 8082 "10110100" // /* MW 2 */
+ 8083 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8085 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8087 "00000000" // /* MW 5 */
+ 8088 "01010000" // /* MW 4 */
+ 8089 "01100000" // /* MW 3 */
+ 8090 "11000100" // /* MW 2 */
+ 8091 "01000011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8093 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8095 "10100011" // /* MW 3 */
+ 8096 "00011101" // /* MW 2 */
+ 8097 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8099 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8101 "00100011" // /* MW 3 */
+ 8102 "00011110" // /* MW 2 */
+ 8103 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8105 "00000000" // /* MW 1 */
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first
+.function_start
+ 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8113 "00000001" // /* MW 5 */
+ 8114 "00000000" // /* MW 4 */
+ 8115 "00000000" // /* MW 3 */
+ 8116 "00010000" // /* MW 2 */
+ 8117 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24
+ 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8119 "01110000" // /* MW 7 */
+ 8120 "01100000" // /* MW 6 */
+ 8121 "00001010" // /* MW 5 */
+ 8122 "00000010" // /* MW 4 */
+ 8123 "10110000" // /* MW 3 */
+ 8124 "10000111" // /* MW 2 */
+ 8125 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+ 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8127 "00000000" // /* MW 7 */
+ 8128 "00000011" // /* MW 6 */
+ 8129 "10110100" // /* MW 5 */
+ 8130 "00000001" // /* MW 4 */
+ 8131 "01100000" // /* MW 3 */
+ 8132 "10010001" // /* MW 2 */
+ 8133 "01010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+ 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8135 "10000001" // /* MW 5 */
+ 8136 "00100001" // /* MW 4 */
+ 8137 "01011000" // /* MW 3 */
+ 8138 "11101101" // /* MW 2 */
+ 8139 "01100101" // /* MW 1 */
+ 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8141 "11000001" // /* MW 5 */
+ 8142 "10101011" // /* MW 4 */
+ 8143 "01011000" // /* MW 3 */
+ 8144 "11001010" // /* MW 2 */
+ 8145 "01110011" // /* MW 1 */
+ 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8147 "11000000" // /* MW 3 */
+ 8148 "01101000" // /* MW 2 */
+ 8149 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+ 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8151 "00101011" // /* MW 3 */
+ 8152 "00000111" // /* MW 2 */
+ 8153 "00001000" // /* MW 1 */
+ 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8155 "01010111" // /* MW 3 */
+ 8156 "00000110" // /* MW 2 */
+ 8157 "00000000" // /* MW 1 */
+ 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8159 "00000000" // /* MW 1 */
+ 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8161 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first
+.no_stack_arguments
+ 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */
+ 8163 "00000001" // /* MW 5 */
+ 8164 "00000000" // /* MW 4 */
+ 8165 "00111000" // /* MW 3 */
+ 8166 "00001111" // /* MW 2 */
+ 8167 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.delay_slot
+ 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8169 "11000000" // /* MW 3 */
+ 8170 "01010000" // /* MW 2 */
+ 8171 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first
+.delay_slot
+ 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8175 "00010010" // /* MW 3 */
+ 8176 "00100101" // /* MW 2 */
+ 8177 "00010100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8179 "01000001" // /* MW 5 */
+ 8180 "11010010" // /* MW 4 */
+ 8181 "01000010" // /* MW 3 */
+ 8182 "00100000" // /* MW 2 */
+ 8183 "10001100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8185 "01110000" // /* MW 7 */
+ 8186 "00010000" // /* MW 6 */
+ 8187 "00110100" // /* MW 5 */
+ 8188 "00000000" // /* MW 4 */
+ 8189 "01100000" // /* MW 3 */
+ 8190 "00101011" // /* MW 2 */
+ 8191 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.return_address
+ 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8193 "00111001" // /* MW 3 */
+ 8194 "11111100" // /* MW 2 */
+ 8195 "00000111" // /* MW 1 */
+ 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8197 "00000000" // /* MW 1 */
+ 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8199 "00000000" // /* MW 1 */
+ 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8201 "00000000" // /* MW 1 */
+ 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8203 "00000000" // /* MW 1 */
+ 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8205 "00000000" // /* MW 1 */
+ 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8207 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first
+ 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8209 "00000000" // /* MW 3 */
+ 8210 "00101000" // /* MW 2 */
+ 8211 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.delay_slot
+ 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8213 "00000001" // /* MW 5 */
+ 8214 "00000000" // /* MW 4 */
+ 8215 "00000000" // /* MW 3 */
+ 8216 "11110000" // /* MW 2 */
+ 8217 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8219 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8221 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8223 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8225 "00000000" // /* MW 1 */
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 147 first
+.src_ref 7 "superkernels.cpp" 152 6
+.function_start
+ 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8241 "10000000" // /* MW 5 */
+ 8242 "11001000" // /* MW 4 */
+ 8243 "11000110" // /* MW 3 */
+ 8244 "00000111" // /* MW 2 */
+ 8245 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6 first
+ 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8247 "11000001" // /* MW 5 */
+ 8248 "10110101" // /* MW 4 */
+ 8249 "11011000" // /* MW 3 */
+ 8250 "11000010" // /* MW 2 */
+ 8251 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 147
+ 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8253 "00000001" // /* MW 5 */
+ 8254 "00000000" // /* MW 4 */
+ 8255 "00000000" // /* MW 3 */
+ 8256 "00001000" // /* MW 2 */
+ 8257 "00000000" // /* MW 1 */
+ 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8259 "01110000" // /* MW 7 */
+ 8260 "11010000" // /* MW 6 */
+ 8261 "00001011" // /* MW 5 */
+ 8262 "00000000" // /* MW 4 */
+ 8263 "10110000" // /* MW 3 */
+ 8264 "01100011" // /* MW 2 */
+ 8265 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+ 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8267 "00010001" // /* MW 9 */
+ 8268 "00101000" // /* MW 8 */
+ 8269 "00110010" // /* MW 7 */
+ 8270 "11110011" // /* MW 6 */
+ 8271 "00000001" // /* MW 5 */
+ 8272 "00000000" // /* MW 4 */
+ 8273 "10110000" // /* MW 3 */
+ 8274 "10000010" // /* MW 2 */
+ 8275 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8277 "11000000" // /* MW 3 */
+ 8278 "11010100" // /* MW 2 */
+ 8279 "00011011" // /* MW 1 */
+ 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8281 "00000000" // /* MW 1 */
+ 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8283 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6
+.src_ref 7 "superkernels.cpp" 152 16
+ 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */
+ 8285 "00000001" // /* MW 5 */
+ 8286 "01000000" // /* MW 4 */
+ 8287 "10000000" // /* MW 3 */
+ 8288 "00010000" // /* MW 2 */
+ 8289 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 22 first
+.delay_slot
+ 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8291 "10010000" // /* MW 3 */
+ 8292 "01100010" // /* MW 2 */
+ 8293 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 30
+.delay_slot
+ 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8295 "11111011" // /* MW 3 */
+ 8296 "01100011" // /* MW 2 */
+ 8297 "00010100" // /* MW 1 */
+.delay_slot
+ 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8299 "00111101" // /* MW 3 */
+ 8300 "11110100" // /* MW 2 */
+ 8301 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8303 "01110000" // /* MW 7 */
+ 8304 "01100000" // /* MW 6 */
+ 8305 "00110000" // /* MW 5 */
+ 8306 "00000011" // /* MW 4 */
+ 8307 "00110000" // /* MW 3 */
+ 8308 "11000110" // /* MW 2 */
+ 8309 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4
+.src_ref 7 "superkernels.cpp" 166 2
+.delay_slot
+ 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8311 "10000000" // /* MW 5 */
+ 8312 "11001001" // /* MW 4 */
+ 8313 "11000000" // /* MW 3 */
+ 8314 "00000111" // /* MW 2 */
+ 8315 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8317 "11010000" // /* MW 5 */
+ 8318 "11001000" // /* MW 4 */
+ 8319 "11000100" // /* MW 3 */
+ 8320 "00000111" // /* MW 2 */
+ 8321 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8323 "00010000" // /* MW 9 */
+ 8324 "00110010" // /* MW 8 */
+ 8325 "00110010" // /* MW 7 */
+ 8326 "11110001" // /* MW 6 */
+ 8327 "00000001" // /* MW 5 */
+ 8328 "00000000" // /* MW 4 */
+ 8329 "11100000" // /* MW 3 */
+ 8330 "11000000" // /* MW 2 */
+ 8331 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8333 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */
+ 8335 "00000001" // /* MW 5 */
+ 8336 "00000000" // /* MW 4 */
+ 8337 "00011000" // /* MW 3 */
+ 8338 "00001111" // /* MW 2 */
+ 8339 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8341 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8343 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8345 "00110001" // /* MW 3 */
+ 8346 "00100000" // /* MW 2 */
+ 8347 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8349 "00000101" // /* MW 3 */
+ 8350 "00100000" // /* MW 2 */
+ 8351 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8353 "00000000" // /* MW 15 */
+ 8354 "00000000" // /* MW 14 */
+ 8355 "01111000" // /* MW 13 */
+ 8356 "10100101" // /* MW 12 */
+ 8357 "00000001" // /* MW 11 */
+ 8358 "00000000" // /* MW 10 */
+ 8359 "00000000" // /* MW 9 */
+ 8360 "10000000" // /* MW 8 */
+ 8361 "00010001" // /* MW 7 */
+ 8362 "00000110" // /* MW 6 */
+ 8363 "00100010" // /* MW 5 */
+ 8364 "00000000" // /* MW 4 */
+ 8365 "11110000" // /* MW 3 */
+ 8366 "00101100" // /* MW 2 */
+ 8367 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18
+.return_address
+ 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8369 "10100000" // /* MW 5 */
+ 8370 "11001000" // /* MW 4 */
+ 8371 "11000100" // /* MW 3 */
+ 8372 "00000111" // /* MW 2 */
+ 8373 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18 first
+.src_ref 7 "superkernels.cpp" 159 65
+ 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8375 "00010000" // /* MW 9 */
+ 8376 "01100000" // /* MW 8 */
+ 8377 "00110010" // /* MW 7 */
+ 8378 "11110001" // /* MW 6 */
+ 8379 "00000001" // /* MW 5 */
+ 8380 "00000000" // /* MW 4 */
+ 8381 "11010000" // /* MW 3 */
+ 8382 "11000010" // /* MW 2 */
+ 8383 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51
+.src_ref 7 "superkernels.cpp" 159 65
+.src_ref 7 "superkernels.cpp" 166 2
+ 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8385 "00010000" // /* MW 9 */
+ 8386 "01100000" // /* MW 8 */
+ 8387 "00110010" // /* MW 7 */
+ 8388 "11110001" // /* MW 6 */
+ 8389 "00000001" // /* MW 5 */
+ 8390 "00000000" // /* MW 4 */
+ 8391 "11010000" // /* MW 3 */
+ 8392 "11000110" // /* MW 2 */
+ 8393 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51 first
+.src_ref 7 "superkernels.cpp" 159 16
+.src_ref 7 "superkernels.cpp" 164 47
+ 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8395 "00010000" // /* MW 9 */
+ 8396 "00101010" // /* MW 8 */
+ 8397 "10110010" // /* MW 7 */
+ 8398 "11110000" // /* MW 6 */
+ 8399 "00000001" // /* MW 5 */
+ 8400 "00000000" // /* MW 4 */
+ 8401 "01010000" // /* MW 3 */
+ 8402 "11001011" // /* MW 2 */
+ 8403 "01001010" // /* MW 1 */
+ 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8405 "00000000" // /* MW 1 */
+ 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8407 "00000000" // /* MW 1 */
+ 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */
+ 8409 "00000000" // /* MW 5 */
+ 8410 "00000000" // /* MW 4 */
+ 8411 "10001000" // /* MW 3 */
+ 8412 "00010000" // /* MW 2 */
+ 8413 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13
+.delay_slot
+ 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8415 "11000000" // /* MW 5 */
+ 8416 "11001000" // /* MW 4 */
+ 8417 "11000000" // /* MW 3 */
+ 8418 "00000111" // /* MW 2 */
+ 8419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8421 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 27 first
+.delay_slot
+ 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8423 "00001111" // /* MW 3 */
+ 8424 "01100001" // /* MW 2 */
+ 8425 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13 first
+.delay_slot
+ 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8427 "10100011" // /* MW 5 */
+ 8428 "00001100" // /* MW 4 */
+ 8429 "11110000" // /* MW 3 */
+ 8430 "00101100" // /* MW 2 */
+ 8431 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 16 first
+.delay_slot
+ 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8433 "00000000" // /* MW 15 */
+ 8434 "00000000" // /* MW 14 */
+ 8435 "01111000" // /* MW 13 */
+ 8436 "10100101" // /* MW 12 */
+ 8437 "00000001" // /* MW 11 */
+ 8438 "00000000" // /* MW 10 */
+ 8439 "00000000" // /* MW 9 */
+ 8440 "10000000" // /* MW 8 */
+ 8441 "00010001" // /* MW 7 */
+ 8442 "00000110" // /* MW 6 */
+ 8443 "00100001" // /* MW 5 */
+ 8444 "00000000" // /* MW 4 */
+ 8445 "11110000" // /* MW 3 */
+ 8446 "00101100" // /* MW 2 */
+ 8447 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 164 47
+.src_ref 7 "superkernels.cpp" 166 2
+ 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8449 "00000000" // /* MW 15 */
+ 8450 "00000000" // /* MW 14 */
+ 8451 "00010000" // /* MW 13 */
+ 8452 "00101010" // /* MW 12 */
+ 8453 "10110010" // /* MW 11 */
+ 8454 "11110000" // /* MW 10 */
+ 8455 "00000001" // /* MW 9 */
+ 8456 "00000000" // /* MW 8 */
+ 8457 "10001011" // /* MW 7 */
+ 8458 "10000000" // /* MW 6 */
+ 8459 "00100010" // /* MW 5 */
+ 8460 "00000000" // /* MW 4 */
+ 8461 "11110000" // /* MW 3 */
+ 8462 "00101100" // /* MW 2 */
+ 8463 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8465 "00000000" // /* MW 7 */
+ 8466 "11000011" // /* MW 6 */
+ 8467 "10110011" // /* MW 5 */
+ 8468 "00000011" // /* MW 4 */
+ 8469 "01100000" // /* MW 3 */
+ 8470 "10010001" // /* MW 2 */
+ 8471 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8473 "00010000" // /* MW 9 */
+ 8474 "00100000" // /* MW 8 */
+ 8475 "00110010" // /* MW 7 */
+ 8476 "11110000" // /* MW 6 */
+ 8477 "00000001" // /* MW 5 */
+ 8478 "00000000" // /* MW 4 */
+ 8479 "11010000" // /* MW 3 */
+ 8480 "11101110" // /* MW 2 */
+ 8481 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8483 "00010110" // /* MW 3 */
+ 8484 "11111110" // /* MW 2 */
+ 8485 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8487 "00110110" // /* MW 3 */
+ 8488 "11111110" // /* MW 2 */
+ 8489 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8491 "01010110" // /* MW 3 */
+ 8492 "01000110" // /* MW 2 */
+ 8493 "00000111" // /* MW 1 */
+ 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8495 "00000000" // /* MW 1 */
+ 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8497 "00000000" // /* MW 1 */
+ 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8499 "00000000" // /* MW 1 */
+ 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8501 "00000000" // /* MW 1 */
+ 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8503 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8505 "00000010" // /* MW 3 */
+ 8506 "01100001" // /* MW 2 */
+ 8507 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8509 "00010001" // /* MW 3 */
+ 8510 "00000110" // /* MW 2 */
+ 8511 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8513 "11111101" // /* MW 3 */
+ 8514 "11100000" // /* MW 2 */
+ 8515 "00010111" // /* MW 1 */
+ 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8517 "00000000" // /* MW 1 */
+ 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8519 "00000000" // /* MW 1 */
+ 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8521 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8523 "00001000" // /* MW 3 */
+ 8524 "10010011" // /* MW 2 */
+ 8525 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+ 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8527 "10000001" // /* MW 5 */
+ 8528 "10101101" // /* MW 4 */
+ 8529 "10100111" // /* MW 3 */
+ 8530 "00000000" // /* MW 2 */
+ 8531 "00000100" // /* MW 1 */
+ 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8533 "00000000" // /* MW 1 */
+ 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8535 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+ 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8537 "00110110" // /* MW 3 */
+ 8538 "00000110" // /* MW 2 */
+ 8539 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8541 "10000001" // /* MW 5 */
+ 8542 "11011101" // /* MW 4 */
+ 8543 "11011100" // /* MW 3 */
+ 8544 "11001010" // /* MW 2 */
+ 8545 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 47 first
+ 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8547 "01110110" // /* MW 3 */
+ 8548 "00000110" // /* MW 2 */
+ 8549 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8551 "10011110" // /* MW 3 */
+ 8552 "01011100" // /* MW 2 */
+ 8553 "00000111" // /* MW 1 */
+ 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 166 2 first
+.no_stack_arguments
+ 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */
+ 8557 "00000001" // /* MW 5 */
+ 8558 "00000000" // /* MW 4 */
+ 8559 "11011000" // /* MW 3 */
+ 8560 "00001111" // /* MW 2 */
+ 8561 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8563 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+.delay_slot
+ 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8565 "00000111" // /* MW 3 */
+ 8566 "01100010" // /* MW 2 */
+ 8567 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.delay_slot
+ 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8569 "00110001" // /* MW 3 */
+ 8570 "00000110" // /* MW 2 */
+ 8571 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45 first
+.delay_slot
+ 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8573 "00001101" // /* MW 3 */
+ 8574 "11100001" // /* MW 2 */
+ 8575 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+.delay_slot
+ 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8577 "00000000" // /* MW 15 */
+ 8578 "00000000" // /* MW 14 */
+ 8579 "10101000" // /* MW 13 */
+ 8580 "10100000" // /* MW 12 */
+ 8581 "00110100" // /* MW 11 */
+ 8582 "00000000" // /* MW 10 */
+ 8583 "00000000" // /* MW 9 */
+ 8584 "00000000" // /* MW 8 */
+ 8585 "01011011" // /* MW 7 */
+ 8586 "00000001" // /* MW 6 */
+ 8587 "00100000" // /* MW 5 */
+ 8588 "00000000" // /* MW 4 */
+ 8589 "11110000" // /* MW 3 */
+ 8590 "00101100" // /* MW 2 */
+ 8591 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+.src_ref 7 "superkernels.cpp" 169 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8593 "00010000" // /* MW 9 */
+ 8594 "00100000" // /* MW 8 */
+ 8595 "00110010" // /* MW 7 */
+ 8596 "11110011" // /* MW 6 */
+ 8597 "00000001" // /* MW 5 */
+ 8598 "00000000" // /* MW 4 */
+ 8599 "11010000" // /* MW 3 */
+ 8600 "11000110" // /* MW 2 */
+ 8601 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8603 "00000101" // /* MW 3 */
+ 8604 "00100000" // /* MW 2 */
+ 8605 "00010000" // /* MW 1 */
+ 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8607 "00000000" // /* MW 1 */
+ 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8609 "00000000" // /* MW 1 */
+ 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8611 "00000000" // /* MW 1 */
+ 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8613 "00000000" // /* MW 1 */
+ 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8617 "00001000" // /* MW 3 */
+ 8618 "01010001" // /* MW 2 */
+ 8619 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8621 "00010000" // /* MW 9 */
+ 8622 "00110000" // /* MW 8 */
+ 8623 "00110010" // /* MW 7 */
+ 8624 "11110001" // /* MW 6 */
+ 8625 "00000001" // /* MW 5 */
+ 8626 "00000000" // /* MW 4 */
+ 8627 "11010000" // /* MW 3 */
+ 8628 "11001110" // /* MW 2 */
+ 8629 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6 first
+ 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8631 "00110110" // /* MW 3 */
+ 8632 "00000110" // /* MW 2 */
+ 8633 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+ 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8635 "01010110" // /* MW 3 */
+ 8636 "00000110" // /* MW 2 */
+ 8637 "00000010" // /* MW 1 */
+ 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8639 "00000000" // /* MW 1 */
+ 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8641 "00000000" // /* MW 1 */
+ 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8643 "00000000" // /* MW 1 */
+ 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8645 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8647 "00110001" // /* MW 3 */
+ 8648 "00100001" // /* MW 2 */
+ 8649 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8651 "00010001" // /* MW 3 */
+ 8652 "11100110" // /* MW 2 */
+ 8653 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 16 first
+ 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8655 "00101000" // /* MW 3 */
+ 8656 "01100001" // /* MW 2 */
+ 8657 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+ 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */
+ 8659 "00000001" // /* MW 5 */
+ 8660 "01000000" // /* MW 4 */
+ 8661 "11111000" // /* MW 3 */
+ 8662 "00010000" // /* MW 2 */
+ 8663 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8665 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8671 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8673 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14
+ 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8675 "00000001" // /* MW 3 */
+ 8676 "00100000" // /* MW 2 */
+ 8677 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14 first
+ 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8679 "00000000" // /* MW 9 */
+ 8680 "00000000" // /* MW 8 */
+ 8681 "00000000" // /* MW 7 */
+ 8682 "10000000" // /* MW 6 */
+ 8683 "00010001" // /* MW 5 */
+ 8684 "00000110" // /* MW 4 */
+ 8685 "11110110" // /* MW 3 */
+ 8686 "00101100" // /* MW 2 */
+ 8687 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 171
+ 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8689 "00111001" // /* MW 3 */
+ 8690 "11110100" // /* MW 2 */
+ 8691 "00000111" // /* MW 1 */
+ 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8693 "00011001" // /* MW 3 */
+ 8694 "11111011" // /* MW 2 */
+ 8695 "00000111" // /* MW 1 */
+ 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8697 "00000000" // /* MW 1 */
+ 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8699 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8701 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8703 "11110001" // /* MW 3 */
+ 8704 "11111101" // /* MW 2 */
+ 8705 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8707 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8709 "00000000" // /* MW 3 */
+ 8710 "00101000" // /* MW 2 */
+ 8711 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8713 "10100000" // /* MW 3 */
+ 8714 "01100111" // /* MW 2 */
+ 8715 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171
+.delay_slot
+ 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8717 "00000001" // /* MW 5 */
+ 8718 "00000000" // /* MW 4 */
+ 8719 "00000000" // /* MW 3 */
+ 8720 "11111000" // /* MW 2 */
+ 8721 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8723 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 8727 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.src_ref 3 "elementwise_unary.h" 124 first
+.src_ref 3 "elementwise_unary.h" 126 24 first
+.function_start
+ 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8737 "00101110" // /* MW 3 */
+ 8738 "00011100" // /* MW 2 */
+ 8739 "00000001" // /* MW 1 */
+ 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8741 "00000000" // /* MW 1 */
+ 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8743 "00000000" // /* MW 1 */
+ 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8745 "00000000" // /* MW 1 */
+ 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8747 "00000000" // /* MW 1 */
+ 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8749 "00000000" // /* MW 1 */
+ 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8751 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 126 22 first
+ 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8753 "00101001" // /* MW 3 */
+ 8754 "00011100" // /* MW 2 */
+ 8755 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 24 first
+ 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8757 "00101110" // /* MW 3 */
+ 8758 "00011100" // /* MW 2 */
+ 8759 "00000001" // /* MW 1 */
+ 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8761 "00000000" // /* MW 1 */
+ 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8763 "00000000" // /* MW 1 */
+ 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8765 "00000000" // /* MW 1 */
+ 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8767 "00000000" // /* MW 1 */
+ 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8769 "00000000" // /* MW 1 */
+ 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8771 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 22
+ 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8773 "00101001" // /* MW 3 */
+ 8774 "00011100" // /* MW 2 */
+ 8775 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 24 first
+ 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8777 "00101110" // /* MW 3 */
+ 8778 "01101100" // /* MW 2 */
+ 8779 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8781 "00010010" // /* MW 3 */
+ 8782 "00000100" // /* MW 2 */
+ 8783 "00000001" // /* MW 1 */
+ 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8785 "00000000" // /* MW 1 */
+ 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8787 "00000000" // /* MW 1 */
+ 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8789 "00000000" // /* MW 1 */
+ 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8791 "00000000" // /* MW 1 */
+ 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8793 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 22 first
+ 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8795 "00101001" // /* MW 3 */
+ 8796 "01101100" // /* MW 2 */
+ 8797 "00001000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8799 "00010111" // /* MW 3 */
+ 8800 "00000100" // /* MW 2 */
+ 8801 "00000000" // /* MW 1 */
+ 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8803 "00000000" // /* MW 1 */
+ 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8805 "00000000" // /* MW 1 */
+ 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8807 "00000000" // /* MW 1 */
+ 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8809 "00000000" // /* MW 1 */
+ 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8811 "00000000" // /* MW 1 */
+ 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8813 "00000000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33 first
+ 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8815 "00010010" // /* MW 3 */
+ 8816 "00100100" // /* MW 2 */
+ 8817 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33
+ 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8819 "00010111" // /* MW 3 */
+ 8820 "00010100" // /* MW 2 */
+ 8821 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 130 4 first
+ 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8823 "00000000" // /* MW 3 */
+ 8824 "00101000" // /* MW 2 */
+ 8825 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8827 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8829 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0
+ 8835 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 136 first
+.src_ref 3 "elementwise_unary.h" 142 37
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 171 19
+.function_start
+ 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8849 "00010000" // /* MW 11 */
+ 8850 "10001000" // /* MW 10 */
+ 8851 "01111001" // /* MW 9 */
+ 8852 "00001000" // /* MW 8 */
+ 8853 "00000000" // /* MW 7 */
+ 8854 "00000000" // /* MW 6 */
+ 8855 "01101000" // /* MW 5 */
+ 8856 "00111010" // /* MW 4 */
+ 8857 "10000000" // /* MW 3 */
+ 8858 "11000010" // /* MW 2 */
+ 8859 "11111011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 142 78
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+ 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8861 "00010000" // /* MW 11 */
+ 8862 "10100000" // /* MW 10 */
+ 8863 "10111001" // /* MW 9 */
+ 8864 "00001001" // /* MW 8 */
+ 8865 "00000000" // /* MW 7 */
+ 8866 "00000000" // /* MW 6 */
+ 8867 "01101000" // /* MW 5 */
+ 8868 "00111001" // /* MW 4 */
+ 8869 "00000000" // /* MW 3 */
+ 8870 "01010001" // /* MW 2 */
+ 8871 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136
+ 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8873 "11000000" // /* MW 3 */
+ 8874 "00010100" // /* MW 2 */
+ 8875 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136 first
+ 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8877 "00010000" // /* MW 3 */
+ 8878 "01100000" // /* MW 2 */
+ 8879 "00011010" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 103 16 first
+ 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8881 "01010010" // /* MW 3 */
+ 8882 "00011100" // /* MW 2 */
+ 8883 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 142 37 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8885 "00010110" // /* MW 3 */
+ 8886 "00000000" // /* MW 2 */
+ 8887 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 8 "clip_impl.h" 104 16 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8889 "01101000" // /* MW 5 */
+ 8890 "00111010" // /* MW 4 */
+ 8891 "01010000" // /* MW 3 */
+ 8892 "10000110" // /* MW 2 */
+ 8893 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8895 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8897 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8899 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8901 "10110100" // /* MW 3 */
+ 8902 "00011100" // /* MW 2 */
+ 8903 "00111000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8905 "01110010" // /* MW 3 */
+ 8906 "00001001" // /* MW 2 */
+ 8907 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 142 78 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8909 "01111000" // /* MW 9 */
+ 8910 "00110110" // /* MW 8 */
+ 8911 "01010000" // /* MW 7 */
+ 8912 "11101101" // /* MW 6 */
+ 8913 "00011000" // /* MW 5 */
+ 8914 "00000001" // /* MW 4 */
+ 8915 "01101000" // /* MW 3 */
+ 8916 "00111010" // /* MW 2 */
+ 8917 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8919 "11111110" // /* MW 3 */
+ 8920 "01111000" // /* MW 2 */
+ 8921 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8923 "01110010" // /* MW 3 */
+ 8924 "10000101" // /* MW 2 */
+ 8925 "00011000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8927 "10101100" // /* MW 3 */
+ 8928 "10101000" // /* MW 2 */
+ 8929 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8931 "01100000" // /* MW 13 */
+ 8932 "00101011" // /* MW 12 */
+ 8933 "00000000" // /* MW 11 */
+ 8934 "11001111" // /* MW 10 */
+ 8935 "00000110" // /* MW 9 */
+ 8936 "00110001" // /* MW 8 */
+ 8937 "00000000" // /* MW 7 */
+ 8938 "00000000" // /* MW 6 */
+ 8939 "01101000" // /* MW 5 */
+ 8940 "00111001" // /* MW 4 */
+ 8941 "11110000" // /* MW 3 */
+ 8942 "00101100" // /* MW 2 */
+ 8943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8945 "00000000" // /* MW 15 */
+ 8946 "00000000" // /* MW 14 */
+ 8947 "01111000" // /* MW 13 */
+ 8948 "01010110" // /* MW 12 */
+ 8949 "11011000" // /* MW 11 */
+ 8950 "00000001" // /* MW 10 */
+ 8951 "00000000" // /* MW 9 */
+ 8952 "00000000" // /* MW 8 */
+ 8953 "11010011" // /* MW 7 */
+ 8954 "00011100" // /* MW 6 */
+ 8955 "00100001" // /* MW 5 */
+ 8956 "00000000" // /* MW 4 */
+ 8957 "11110000" // /* MW 3 */
+ 8958 "00101100" // /* MW 2 */
+ 8959 "00000000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8961 "00000000" // /* MW 15 */
+ 8962 "00000000" // /* MW 14 */
+ 8963 "01111000" // /* MW 13 */
+ 8964 "00110110" // /* MW 12 */
+ 8965 "01010000" // /* MW 11 */
+ 8966 "00000001" // /* MW 10 */
+ 8967 "00000000" // /* MW 9 */
+ 8968 "00000000" // /* MW 8 */
+ 8969 "01011011" // /* MW 7 */
+ 8970 "00000001" // /* MW 6 */
+ 8971 "00100000" // /* MW 5 */
+ 8972 "00000000" // /* MW 4 */
+ 8973 "11110000" // /* MW 3 */
+ 8974 "00101100" // /* MW 2 */
+ 8975 "00000000" // /* MW 1 */
+.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8977 "00000000" // /* MW 15 */
+ 8978 "00000000" // /* MW 14 */
+ 8979 "01111000" // /* MW 13 */
+ 8980 "01010110" // /* MW 12 */
+ 8981 "11010100" // /* MW 11 */
+ 8982 "00000000" // /* MW 10 */
+ 8983 "00000000" // /* MW 9 */
+ 8984 "00000000" // /* MW 8 */
+ 8985 "11010011" // /* MW 7 */
+ 8986 "00011101" // /* MW 6 */
+ 8987 "01101001" // /* MW 5 */
+ 8988 "00111010" // /* MW 4 */
+ 8989 "11110000" // /* MW 3 */
+ 8990 "00101100" // /* MW 2 */
+ 8991 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8993 "00000000" // /* MW 15 */
+ 8994 "00000000" // /* MW 14 */
+ 8995 "01111000" // /* MW 13 */
+ 8996 "00110110" // /* MW 12 */
+ 8997 "10001000" // /* MW 11 */
+ 8998 "00000001" // /* MW 10 */
+ 8999 "00000000" // /* MW 9 */
+ 9000 "00000000" // /* MW 8 */
+ 9001 "01011011" // /* MW 7 */
+ 9002 "00000001" // /* MW 6 */
+ 9003 "01101000" // /* MW 5 */
+ 9004 "00111001" // /* MW 4 */
+ 9005 "11110000" // /* MW 3 */
+ 9006 "00101100" // /* MW 2 */
+ 9007 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9009 "00000000" // /* MW 15 */
+ 9010 "00000000" // /* MW 14 */
+ 9011 "01111000" // /* MW 13 */
+ 9012 "01010110" // /* MW 12 */
+ 9013 "11011000" // /* MW 11 */
+ 9014 "00000001" // /* MW 10 */
+ 9015 "00000000" // /* MW 9 */
+ 9016 "00000000" // /* MW 8 */
+ 9017 "11010011" // /* MW 7 */
+ 9018 "00011100" // /* MW 6 */
+ 9019 "00100001" // /* MW 5 */
+ 9020 "00000000" // /* MW 4 */
+ 9021 "11110000" // /* MW 3 */
+ 9022 "00101100" // /* MW 2 */
+ 9023 "00000000" // /* MW 1 */
+.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176
+.src_ref 4 "max_min.hpp" 20 104 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9025 "00000000" // /* MW 15 */
+ 9026 "00000000" // /* MW 14 */
+ 9027 "01111000" // /* MW 13 */
+ 9028 "00110110" // /* MW 12 */
+ 9029 "01010000" // /* MW 11 */
+ 9030 "00000001" // /* MW 10 */
+ 9031 "00000000" // /* MW 9 */
+ 9032 "00000000" // /* MW 8 */
+ 9033 "01011011" // /* MW 7 */
+ 9034 "00000001" // /* MW 6 */
+ 9035 "00100000" // /* MW 5 */
+ 9036 "00000000" // /* MW 4 */
+ 9037 "11110000" // /* MW 3 */
+ 9038 "00101100" // /* MW 2 */
+ 9039 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9041 "01110000" // /* MW 7 */
+ 9042 "01010110" // /* MW 6 */
+ 9043 "11010100" // /* MW 5 */
+ 9044 "00000000" // /* MW 4 */
+ 9045 "01100000" // /* MW 3 */
+ 9046 "10111010" // /* MW 2 */
+ 9047 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9049 "01101100" // /* MW 3 */
+ 9050 "00010000" // /* MW 2 */
+ 9051 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+ 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9053 "01110000" // /* MW 7 */
+ 9054 "01010110" // /* MW 6 */
+ 9055 "11011000" // /* MW 5 */
+ 9056 "00000001" // /* MW 4 */
+ 9057 "01100000" // /* MW 3 */
+ 9058 "10011010" // /* MW 2 */
+ 9059 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 158 4 first
+ 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9061 "11011001" // /* MW 5 */
+ 9062 "01000000" // /* MW 4 */
+ 9063 "00000101" // /* MW 3 */
+ 9064 "00000000" // /* MW 2 */
+ 9065 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9067 "01110000" // /* MW 7 */
+ 9068 "01010110" // /* MW 6 */
+ 9069 "11010100" // /* MW 5 */
+ 9070 "00000000" // /* MW 4 */
+ 9071 "01100000" // /* MW 3 */
+ 9072 "10111010" // /* MW 2 */
+ 9073 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9075 "01101100" // /* MW 3 */
+ 9076 "00010000" // /* MW 2 */
+ 9077 "00011011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.delay_slot
+ 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9079 "10101100" // /* MW 3 */
+ 9080 "10110000" // /* MW 2 */
+ 9081 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.delay_slot
+ 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9083 "11010011" // /* MW 3 */
+ 9084 "00011100" // /* MW 2 */
+ 9085 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9087 "11010011" // /* MW 3 */
+ 9088 "00011101" // /* MW 2 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0
+ 9089 "00001001" // /* MW 1 */
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 176 first
+.src_ref 7 "superkernels.cpp" 181 6
+.function_start
+ 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9105 "10000000" // /* MW 5 */
+ 9106 "11001000" // /* MW 4 */
+ 9107 "11000110" // /* MW 3 */
+ 9108 "00000111" // /* MW 2 */
+ 9109 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6 first
+ 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9111 "11000001" // /* MW 5 */
+ 9112 "10110101" // /* MW 4 */
+ 9113 "11011000" // /* MW 3 */
+ 9114 "11000010" // /* MW 2 */
+ 9115 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 176
+ 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9117 "00000001" // /* MW 5 */
+ 9118 "00000000" // /* MW 4 */
+ 9119 "00000000" // /* MW 3 */
+ 9120 "00001000" // /* MW 2 */
+ 9121 "00000000" // /* MW 1 */
+ 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9123 "01110000" // /* MW 7 */
+ 9124 "11010000" // /* MW 6 */
+ 9125 "00001011" // /* MW 5 */
+ 9126 "00000000" // /* MW 4 */
+ 9127 "10110000" // /* MW 3 */
+ 9128 "01100011" // /* MW 2 */
+ 9129 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+ 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9131 "00010001" // /* MW 9 */
+ 9132 "00101000" // /* MW 8 */
+ 9133 "00110010" // /* MW 7 */
+ 9134 "11110011" // /* MW 6 */
+ 9135 "00000001" // /* MW 5 */
+ 9136 "00000000" // /* MW 4 */
+ 9137 "10110000" // /* MW 3 */
+ 9138 "10000010" // /* MW 2 */
+ 9139 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9141 "11000000" // /* MW 3 */
+ 9142 "11010100" // /* MW 2 */
+ 9143 "00011011" // /* MW 1 */
+ 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9145 "00000000" // /* MW 1 */
+ 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9147 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6
+.src_ref 7 "superkernels.cpp" 181 16
+ 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */
+ 9149 "00000001" // /* MW 5 */
+ 9150 "01000000" // /* MW 4 */
+ 9151 "00110000" // /* MW 3 */
+ 9152 "00010010" // /* MW 2 */
+ 9153 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 22 first
+.delay_slot
+ 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9155 "10010000" // /* MW 3 */
+ 9156 "01100010" // /* MW 2 */
+ 9157 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 30
+.delay_slot
+ 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9159 "11111011" // /* MW 3 */
+ 9160 "01100011" // /* MW 2 */
+ 9161 "00010100" // /* MW 1 */
+.delay_slot
+ 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9163 "00111101" // /* MW 3 */
+ 9164 "11110100" // /* MW 2 */
+ 9165 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9167 "01110000" // /* MW 7 */
+ 9168 "01100000" // /* MW 6 */
+ 9169 "00110000" // /* MW 5 */
+ 9170 "00000011" // /* MW 4 */
+ 9171 "00110000" // /* MW 3 */
+ 9172 "11000110" // /* MW 2 */
+ 9173 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4
+.src_ref 7 "superkernels.cpp" 195 2
+.delay_slot
+ 9174 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9175 "00000000" // /* MW 5 */
+ 9176 "11001011" // /* MW 4 */
+ 9177 "11000000" // /* MW 3 */
+ 9178 "00000111" // /* MW 2 */
+ 9179 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9181 "11010000" // /* MW 5 */
+ 9182 "11001000" // /* MW 4 */
+ 9183 "11000100" // /* MW 3 */
+ 9184 "00000111" // /* MW 2 */
+ 9185 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9187 "00010000" // /* MW 9 */
+ 9188 "00110010" // /* MW 8 */
+ 9189 "00110010" // /* MW 7 */
+ 9190 "11110001" // /* MW 6 */
+ 9191 "00000001" // /* MW 5 */
+ 9192 "00000000" // /* MW 4 */
+ 9193 "11100000" // /* MW 3 */
+ 9194 "11000000" // /* MW 2 */
+ 9195 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9197 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */
+ 9199 "00000001" // /* MW 5 */
+ 9200 "00000000" // /* MW 4 */
+ 9201 "00010000" // /* MW 3 */
+ 9202 "00010001" // /* MW 2 */
+ 9203 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9205 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9207 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9209 "00110001" // /* MW 3 */
+ 9210 "00100000" // /* MW 2 */
+ 9211 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9213 "00000101" // /* MW 3 */
+ 9214 "00100000" // /* MW 2 */
+ 9215 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9217 "00000000" // /* MW 15 */
+ 9218 "00000000" // /* MW 14 */
+ 9219 "01111000" // /* MW 13 */
+ 9220 "10100101" // /* MW 12 */
+ 9221 "00000001" // /* MW 11 */
+ 9222 "00000000" // /* MW 10 */
+ 9223 "00000000" // /* MW 9 */
+ 9224 "10000000" // /* MW 8 */
+ 9225 "00010001" // /* MW 7 */
+ 9226 "00000110" // /* MW 6 */
+ 9227 "00100010" // /* MW 5 */
+ 9228 "00000000" // /* MW 4 */
+ 9229 "11110000" // /* MW 3 */
+ 9230 "00101100" // /* MW 2 */
+ 9231 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18
+.return_address
+ 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9233 "10100000" // /* MW 5 */
+ 9234 "11001000" // /* MW 4 */
+ 9235 "11000100" // /* MW 3 */
+ 9236 "00000111" // /* MW 2 */
+ 9237 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18 first
+.src_ref 7 "superkernels.cpp" 188 43
+ 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9239 "00010000" // /* MW 9 */
+ 9240 "11000000" // /* MW 8 */
+ 9241 "00110010" // /* MW 7 */
+ 9242 "11110001" // /* MW 6 */
+ 9243 "00000001" // /* MW 5 */
+ 9244 "00000000" // /* MW 4 */
+ 9245 "11010000" // /* MW 3 */
+ 9246 "11000010" // /* MW 2 */
+ 9247 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29
+.src_ref 7 "superkernels.cpp" 188 43
+.src_ref 7 "superkernels.cpp" 195 2
+ 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9249 "00010000" // /* MW 9 */
+ 9250 "11000000" // /* MW 8 */
+ 9251 "00110010" // /* MW 7 */
+ 9252 "11110001" // /* MW 6 */
+ 9253 "00000001" // /* MW 5 */
+ 9254 "00000000" // /* MW 4 */
+ 9255 "11010000" // /* MW 3 */
+ 9256 "11000110" // /* MW 2 */
+ 9257 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29 first
+.src_ref 7 "superkernels.cpp" 188 16
+.src_ref 7 "superkernels.cpp" 193 47
+ 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9259 "00010000" // /* MW 9 */
+ 9260 "00101010" // /* MW 8 */
+ 9261 "10110010" // /* MW 7 */
+ 9262 "11110000" // /* MW 6 */
+ 9263 "00000001" // /* MW 5 */
+ 9264 "00000000" // /* MW 4 */
+ 9265 "01010000" // /* MW 3 */
+ 9266 "11001011" // /* MW 2 */
+ 9267 "01001000" // /* MW 1 */
+ 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9269 "00000000" // /* MW 1 */
+ 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9271 "00000000" // /* MW 1 */
+ 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */
+ 9273 "00000000" // /* MW 5 */
+ 9274 "00000000" // /* MW 4 */
+ 9275 "00111000" // /* MW 3 */
+ 9276 "00010010" // /* MW 2 */
+ 9277 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13
+.delay_slot
+ 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9279 "11000000" // /* MW 5 */
+ 9280 "11001000" // /* MW 4 */
+ 9281 "11000000" // /* MW 3 */
+ 9282 "00000111" // /* MW 2 */
+ 9283 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9285 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 27 first
+.delay_slot
+ 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9287 "00001111" // /* MW 3 */
+ 9288 "01100001" // /* MW 2 */
+ 9289 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13 first
+.delay_slot
+ 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9291 "10100011" // /* MW 5 */
+ 9292 "00001100" // /* MW 4 */
+ 9293 "11110000" // /* MW 3 */
+ 9294 "00101100" // /* MW 2 */
+ 9295 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 16 first
+.delay_slot
+ 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9297 "00000000" // /* MW 15 */
+ 9298 "00000000" // /* MW 14 */
+ 9299 "01111000" // /* MW 13 */
+ 9300 "10100101" // /* MW 12 */
+ 9301 "00000001" // /* MW 11 */
+ 9302 "00000000" // /* MW 10 */
+ 9303 "00000000" // /* MW 9 */
+ 9304 "10000000" // /* MW 8 */
+ 9305 "00010001" // /* MW 7 */
+ 9306 "00000110" // /* MW 6 */
+ 9307 "00100001" // /* MW 5 */
+ 9308 "00000000" // /* MW 4 */
+ 9309 "11110000" // /* MW 3 */
+ 9310 "00101100" // /* MW 2 */
+ 9311 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 193 47
+.src_ref 7 "superkernels.cpp" 195 2
+ 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9313 "00000000" // /* MW 15 */
+ 9314 "00000000" // /* MW 14 */
+ 9315 "00010000" // /* MW 13 */
+ 9316 "00101010" // /* MW 12 */
+ 9317 "10110010" // /* MW 11 */
+ 9318 "11110000" // /* MW 10 */
+ 9319 "00000001" // /* MW 9 */
+ 9320 "00000000" // /* MW 8 */
+ 9321 "10001011" // /* MW 7 */
+ 9322 "10000000" // /* MW 6 */
+ 9323 "00100010" // /* MW 5 */
+ 9324 "00000000" // /* MW 4 */
+ 9325 "11110000" // /* MW 3 */
+ 9326 "00101100" // /* MW 2 */
+ 9327 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9329 "00000000" // /* MW 7 */
+ 9330 "11000011" // /* MW 6 */
+ 9331 "10110011" // /* MW 5 */
+ 9332 "00000011" // /* MW 4 */
+ 9333 "01100000" // /* MW 3 */
+ 9334 "10010001" // /* MW 2 */
+ 9335 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9337 "00010000" // /* MW 9 */
+ 9338 "00100000" // /* MW 8 */
+ 9339 "00110010" // /* MW 7 */
+ 9340 "11110000" // /* MW 6 */
+ 9341 "00000001" // /* MW 5 */
+ 9342 "00000000" // /* MW 4 */
+ 9343 "11010000" // /* MW 3 */
+ 9344 "11101110" // /* MW 2 */
+ 9345 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9347 "00010110" // /* MW 3 */
+ 9348 "11111110" // /* MW 2 */
+ 9349 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9351 "00110110" // /* MW 3 */
+ 9352 "11111110" // /* MW 2 */
+ 9353 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9355 "01010110" // /* MW 3 */
+ 9356 "01000110" // /* MW 2 */
+ 9357 "00000111" // /* MW 1 */
+ 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9359 "00000000" // /* MW 1 */
+ 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9361 "00000000" // /* MW 1 */
+ 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9363 "00000000" // /* MW 1 */
+ 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9365 "00000000" // /* MW 1 */
+ 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9367 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9369 "00000010" // /* MW 3 */
+ 9370 "01100001" // /* MW 2 */
+ 9371 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9373 "00010001" // /* MW 3 */
+ 9374 "00000110" // /* MW 2 */
+ 9375 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9377 "11111101" // /* MW 3 */
+ 9378 "11100000" // /* MW 2 */
+ 9379 "00010111" // /* MW 1 */
+ 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9381 "00000000" // /* MW 1 */
+ 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9383 "00000000" // /* MW 1 */
+ 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9385 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9387 "00001000" // /* MW 3 */
+ 9388 "10010011" // /* MW 2 */
+ 9389 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+ 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9391 "10000001" // /* MW 5 */
+ 9392 "10101101" // /* MW 4 */
+ 9393 "10100111" // /* MW 3 */
+ 9394 "00000000" // /* MW 2 */
+ 9395 "00000100" // /* MW 1 */
+ 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9397 "00000000" // /* MW 1 */
+ 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9399 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+ 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9401 "00110110" // /* MW 3 */
+ 9402 "00000110" // /* MW 2 */
+ 9403 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9405 "10000001" // /* MW 5 */
+ 9406 "11011101" // /* MW 4 */
+ 9407 "11011100" // /* MW 3 */
+ 9408 "11001010" // /* MW 2 */
+ 9409 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 47 first
+ 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9411 "01110110" // /* MW 3 */
+ 9412 "00000110" // /* MW 2 */
+ 9413 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9415 "10011110" // /* MW 3 */
+ 9416 "01011100" // /* MW 2 */
+ 9417 "00000111" // /* MW 1 */
+ 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9419 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 195 2 first
+.no_stack_arguments
+ 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */
+ 9421 "00000001" // /* MW 5 */
+ 9422 "00000000" // /* MW 4 */
+ 9423 "01001000" // /* MW 3 */
+ 9424 "00010001" // /* MW 2 */
+ 9425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9427 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+.delay_slot
+ 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9429 "00000111" // /* MW 3 */
+ 9430 "01100010" // /* MW 2 */
+ 9431 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.delay_slot
+ 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9433 "00110001" // /* MW 3 */
+ 9434 "00000110" // /* MW 2 */
+ 9435 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45 first
+.delay_slot
+ 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9437 "00001101" // /* MW 3 */
+ 9438 "11100001" // /* MW 2 */
+ 9439 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+.delay_slot
+ 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9441 "00000000" // /* MW 15 */
+ 9442 "00000000" // /* MW 14 */
+ 9443 "10101000" // /* MW 13 */
+ 9444 "10100000" // /* MW 12 */
+ 9445 "00110100" // /* MW 11 */
+ 9446 "00000000" // /* MW 10 */
+ 9447 "00000000" // /* MW 9 */
+ 9448 "00000000" // /* MW 8 */
+ 9449 "01011011" // /* MW 7 */
+ 9450 "00000001" // /* MW 6 */
+ 9451 "00100000" // /* MW 5 */
+ 9452 "00000000" // /* MW 4 */
+ 9453 "11110000" // /* MW 3 */
+ 9454 "00101100" // /* MW 2 */
+ 9455 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+.src_ref 7 "superkernels.cpp" 198 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9457 "00010000" // /* MW 9 */
+ 9458 "00100000" // /* MW 8 */
+ 9459 "00110010" // /* MW 7 */
+ 9460 "11110011" // /* MW 6 */
+ 9461 "00000001" // /* MW 5 */
+ 9462 "00000000" // /* MW 4 */
+ 9463 "11010000" // /* MW 3 */
+ 9464 "11000110" // /* MW 2 */
+ 9465 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9467 "00000101" // /* MW 3 */
+ 9468 "00100000" // /* MW 2 */
+ 9469 "00010000" // /* MW 1 */
+ 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9471 "00000000" // /* MW 1 */
+ 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9473 "00000000" // /* MW 1 */
+ 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9475 "00000000" // /* MW 1 */
+ 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9477 "00000000" // /* MW 1 */
+ 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9479 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9481 "00001000" // /* MW 3 */
+ 9482 "01010001" // /* MW 2 */
+ 9483 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9485 "00010000" // /* MW 9 */
+ 9486 "00110000" // /* MW 8 */
+ 9487 "00110010" // /* MW 7 */
+ 9488 "11110001" // /* MW 6 */
+ 9489 "00000001" // /* MW 5 */
+ 9490 "00000000" // /* MW 4 */
+ 9491 "11010000" // /* MW 3 */
+ 9492 "11001110" // /* MW 2 */
+ 9493 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6 first
+ 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9495 "00110110" // /* MW 3 */
+ 9496 "00000110" // /* MW 2 */
+ 9497 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+ 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9499 "01010110" // /* MW 3 */
+ 9500 "00000110" // /* MW 2 */
+ 9501 "00000010" // /* MW 1 */
+ 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9503 "00000000" // /* MW 1 */
+ 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9505 "00000000" // /* MW 1 */
+ 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9507 "00000000" // /* MW 1 */
+ 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9509 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9511 "00110001" // /* MW 3 */
+ 9512 "00100001" // /* MW 2 */
+ 9513 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9515 "00010001" // /* MW 3 */
+ 9516 "11100110" // /* MW 2 */
+ 9517 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 16 first
+ 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9519 "00101000" // /* MW 3 */
+ 9520 "01100001" // /* MW 2 */
+ 9521 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+ 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */
+ 9523 "00000001" // /* MW 5 */
+ 9524 "01000000" // /* MW 4 */
+ 9525 "10101000" // /* MW 3 */
+ 9526 "00010010" // /* MW 2 */
+ 9527 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9535 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9537 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14
+ 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9539 "00000001" // /* MW 3 */
+ 9540 "00100000" // /* MW 2 */
+ 9541 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14 first
+ 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9543 "00000000" // /* MW 9 */
+ 9544 "00000000" // /* MW 8 */
+ 9545 "00000000" // /* MW 7 */
+ 9546 "10000000" // /* MW 6 */
+ 9547 "00010001" // /* MW 5 */
+ 9548 "00000110" // /* MW 4 */
+ 9549 "11110110" // /* MW 3 */
+ 9550 "00101100" // /* MW 2 */
+ 9551 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 200
+ 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9553 "00111001" // /* MW 3 */
+ 9554 "11110100" // /* MW 2 */
+ 9555 "00000111" // /* MW 1 */
+ 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9557 "00011001" // /* MW 3 */
+ 9558 "11111011" // /* MW 2 */
+ 9559 "00000111" // /* MW 1 */
+ 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9561 "00000000" // /* MW 1 */
+ 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9563 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9565 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9567 "11110001" // /* MW 3 */
+ 9568 "11111101" // /* MW 2 */
+ 9569 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9571 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9573 "00000000" // /* MW 3 */
+ 9574 "00101000" // /* MW 2 */
+ 9575 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9577 "10100000" // /* MW 3 */
+ 9578 "01100111" // /* MW 2 */
+ 9579 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200
+.delay_slot
+ 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9581 "00000001" // /* MW 5 */
+ 9582 "00000000" // /* MW 4 */
+ 9583 "00000000" // /* MW 3 */
+ 9584 "11111000" // /* MW 2 */
+ 9585 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9587 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9589 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 9591 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 205 first
+.src_ref 3 "elementwise_binary_shared.h" 211 24 first
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.function_start
+ 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9601 "01011000" // /* MW 9 */
+ 9602 "00000000" // /* MW 8 */
+ 9603 "00001000" // /* MW 7 */
+ 9604 "00001011" // /* MW 6 */
+ 9605 "00100000" // /* MW 5 */
+ 9606 "00001000" // /* MW 4 */
+ 9607 "11010000" // /* MW 3 */
+ 9608 "10000101" // /* MW 2 */
+ 9609 "00100011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+ 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9611 "00000001" // /* MW 3 */
+ 9612 "10000000" // /* MW 2 */
+ 9613 "00010111" // /* MW 1 */
+ 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9615 "00000000" // /* MW 1 */
+ 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9617 "00000000" // /* MW 1 */
+ 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9619 "00000000" // /* MW 1 */
+ 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9621 "00000000" // /* MW 1 */
+ 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9623 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 211 22 first
+ 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9625 "00101001" // /* MW 3 */
+ 9626 "00011100" // /* MW 2 */
+ 9627 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 24 first
+ 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9629 "00101110" // /* MW 3 */
+ 9630 "00011100" // /* MW 2 */
+ 9631 "00000001" // /* MW 1 */
+ 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9633 "00000000" // /* MW 1 */
+ 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9635 "00000000" // /* MW 1 */
+ 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9637 "00000000" // /* MW 1 */
+ 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9639 "00000000" // /* MW 1 */
+ 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9641 "00000000" // /* MW 1 */
+ 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9643 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 22
+ 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9645 "00101001" // /* MW 3 */
+ 9646 "00011100" // /* MW 2 */
+ 9647 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 24 first
+ 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9649 "00101110" // /* MW 3 */
+ 9650 "00000100" // /* MW 2 */
+ 9651 "00000001" // /* MW 1 */
+ 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9653 "00000000" // /* MW 1 */
+ 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9655 "00000000" // /* MW 1 */
+ 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9657 "00000000" // /* MW 1 */
+ 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9659 "00000000" // /* MW 1 */
+ 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9661 "00000000" // /* MW 1 */
+ 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9663 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 22
+ 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9665 "00101001" // /* MW 3 */
+ 9666 "00011100" // /* MW 2 */
+ 9667 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 24 first
+ 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9669 "01110110" // /* MW 3 */
+ 9670 "00010100" // /* MW 2 */
+ 9671 "00000001" // /* MW 1 */
+ 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9673 "00000000" // /* MW 1 */
+ 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9675 "00000000" // /* MW 1 */
+ 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9677 "00000000" // /* MW 1 */
+ 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9679 "00000000" // /* MW 1 */
+ 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9681 "00000000" // /* MW 1 */
+ 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9683 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 22
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9685 "01110001" // /* MW 3 */
+ 9686 "01001100" // /* MW 2 */
+ 9687 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 34 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9689 "00010111" // /* MW 3 */
+ 9690 "00000100" // /* MW 2 */
+ 9691 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 217 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9693 "00000000" // /* MW 3 */
+ 9694 "00101000" // /* MW 2 */
+ 9695 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9697 "00000000" // /* MW 5 */
+ 9698 "10111110" // /* MW 4 */
+ 9699 "11110000" // /* MW 3 */
+ 9700 "00000000" // /* MW 2 */
+ 9701 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9703 "00010100" // /* MW 3 */
+ 9704 "11000010" // /* MW 2 */
+ 9705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9707 "00100111" // /* MW 3 */
+ 9708 "01110110" // /* MW 2 */
+ 9709 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9711 "10000010" // /* MW 3 */
+ 9712 "00000001" // /* MW 2 */
+ 9713 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9715 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 219
+.src_ref 3 "elementwise_binary_shared.h" 219 first
+.function_start
+ 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9729 "00000001" // /* MW 5 */
+ 9730 "00000000" // /* MW 4 */
+ 9731 "00000000" // /* MW 3 */
+ 9732 "00001000" // /* MW 2 */
+ 9733 "00000000" // /* MW 1 */
+ 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9735 "00111101" // /* MW 3 */
+ 9736 "11111000" // /* MW 2 */
+ 9737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8 first
+.no_stack_arguments
+ 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */
+ 9739 "00000001" // /* MW 5 */
+ 9740 "00000000" // /* MW 4 */
+ 9741 "11000000" // /* MW 3 */
+ 9742 "00010010" // /* MW 2 */
+ 9743 "00000000" // /* MW 1 */
+.delay_slot
+ 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9745 "10011101" // /* MW 3 */
+ 9746 "11111111" // /* MW 2 */
+ 9747 "00001111" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+ 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9749 "11000000" // /* MW 3 */
+ 9750 "01100000" // /* MW 2 */
+ 9751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9757 "01100111" // /* MW 3 */
+ 9758 "00000001" // /* MW 2 */
+ 9759 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.return_address
+ 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9761 "00111001" // /* MW 3 */
+ 9762 "11111000" // /* MW 2 */
+ 9763 "00000111" // /* MW 1 */
+ 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9765 "00000000" // /* MW 1 */
+ 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9767 "00000000" // /* MW 1 */
+ 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9769 "00000000" // /* MW 1 */
+ 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9771 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9773 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9775 "10011001" // /* MW 3 */
+ 9776 "11111111" // /* MW 2 */
+ 9777 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9779 "00000000" // /* MW 3 */
+ 9780 "00101000" // /* MW 2 */
+ 9781 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9783 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9787 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9789 "00001001" // /* MW 3 */
+ 9790 "00100000" // /* MW 2 */
+ 9791 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.src_ref 8 "mul_impl.h" 193 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9793 "01110001" // /* MW 9 */
+ 9794 "00000000" // /* MW 8 */
+ 9795 "00000000" // /* MW 7 */
+ 9796 "00000000" // /* MW 6 */
+ 9797 "11111110" // /* MW 5 */
+ 9798 "00111111" // /* MW 4 */
+ 9799 "00110000" // /* MW 3 */
+ 9800 "11000010" // /* MW 2 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9801 "11101000" // /* MW 1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0
+.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.src_ref 3 "elementwise_binary_shared.h" 107 first
+.src_ref 3 "elementwise_binary_shared.h" 119 37
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.function_start
+ 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9809 "11000000" // /* MW 3 */
+ 9810 "00010110" // /* MW 2 */
+ 9811 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+ 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9813 "00000111" // /* MW 3 */
+ 9814 "01100000" // /* MW 2 */
+ 9815 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 122 22 first
+ 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9817 "01010010" // /* MW 3 */
+ 9818 "00011100" // /* MW 2 */
+ 9819 "00000011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 15 first
+ 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9821 "10010110" // /* MW 3 */
+ 9822 "00000100" // /* MW 2 */
+ 9823 "00000011" // /* MW 1 */
+ 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9825 "00000000" // /* MW 1 */
+ 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9827 "00000000" // /* MW 1 */
+ 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9829 "00000000" // /* MW 1 */
+ 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9831 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9833 "00001001" // /* MW 3 */
+ 9834 "00000110" // /* MW 2 */
+ 9835 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 107
+ 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9837 "00000001" // /* MW 5 */
+ 9838 "00000000" // /* MW 4 */
+ 9839 "00000000" // /* MW 3 */
+ 9840 "00010000" // /* MW 2 */
+ 9841 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9843 "01001100" // /* MW 3 */
+ 9844 "11000110" // /* MW 2 */
+ 9845 "00010000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25
+.src_ref 3 "elementwise_binary_shared.h" 124 8
+ 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */
+ 9847 "01100000" // /* MW 9 */
+ 9848 "00000000" // /* MW 8 */
+ 9849 "00010000" // /* MW 7 */
+ 9850 "11100010" // /* MW 6 */
+ 9851 "00000100" // /* MW 5 */
+ 9852 "00000110" // /* MW 4 */
+ 9853 "00000000" // /* MW 3 */
+ 9854 "00000001" // /* MW 2 */
+ 9855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25 first
+.delay_slot
+ 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9857 "01110010" // /* MW 3 */
+ 9858 "00000101" // /* MW 2 */
+ 9859 "00011000" // /* MW 1 */
+.delay_slot
+ 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9861 "11000000" // /* MW 3 */
+ 9862 "01011110" // /* MW 2 */
+ 9863 "00011000" // /* MW 1 */
+.delay_slot
+ 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9865 "11100000" // /* MW 3 */
+ 9866 "01100101" // /* MW 2 */
+ 9867 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9869 "10000001" // /* MW 5 */
+ 9870 "11011101" // /* MW 4 */
+ 9871 "00001010" // /* MW 3 */
+ 9872 "11110010" // /* MW 2 */
+ 9873 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+.delay_slot
+ 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9875 "00010011" // /* MW 3 */
+ 9876 "00000100" // /* MW 2 */
+ 9877 "00001111" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+ 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9879 "01110010" // /* MW 9 */
+ 9880 "10111001" // /* MW 8 */
+ 9881 "00000100" // /* MW 7 */
+ 9882 "00000000" // /* MW 6 */
+ 9883 "00001011" // /* MW 5 */
+ 9884 "10000000" // /* MW 4 */
+ 9885 "10000100" // /* MW 3 */
+ 9886 "10000010" // /* MW 2 */
+ 9887 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 126 34 first
+.src_ref 3 "elementwise_binary_shared.h" 131 19 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9889 "00000001" // /* MW 5 */
+ 9890 "00000001" // /* MW 4 */
+ 9891 "01010100" // /* MW 3 */
+ 9892 "00000001" // /* MW 2 */
+ 9893 "10000000" // /* MW 1 */
+ 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9895 "00000000" // /* MW 1 */
+ 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9897 "00000000" // /* MW 1 */
+ 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9899 "00000000" // /* MW 1 */
+ 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9901 "00000000" // /* MW 1 */
+ 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9903 "00000000" // /* MW 1 */
+ 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9905 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 131 12
+.src_ref 3 "elementwise_binary_shared.h" 131 35
+ 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */
+ 9907 "00000001" // /* MW 5 */
+ 9908 "01000000" // /* MW 4 */
+ 9909 "01110000" // /* MW 3 */
+ 9910 "00010011" // /* MW 2 */
+ 9911 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9913 "00000000" // /* MW 3 */
+ 9914 "00000000" // /* MW 2 */
+ 9915 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9917 "11010000" // /* MW 5 */
+ 9918 "11001000" // /* MW 4 */
+ 9919 "11001000" // /* MW 3 */
+ 9920 "00000111" // /* MW 2 */
+ 9921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9927 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */
+ 9929 "00100000" // /* MW 9 */
+ 9930 "00000000" // /* MW 8 */
+ 9931 "00000000" // /* MW 7 */
+ 9932 "11011110" // /* MW 6 */
+ 9933 "00000100" // /* MW 5 */
+ 9934 "00000000" // /* MW 4 */
+ 9935 "10000000" // /* MW 3 */
+ 9936 "00000100" // /* MW 2 */
+ 9937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9945 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9947 "00100110" // /* MW 5 */
+ 9948 "00001000" // /* MW 4 */
+ 9949 "11110000" // /* MW 3 */
+ 9950 "00101100" // /* MW 2 */
+ 9951 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9953 "10000000" // /* MW 3 */
+ 9954 "00000000" // /* MW 2 */
+ 9955 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9957 "01010000" // /* MW 11 */
+ 9958 "00000000" // /* MW 10 */
+ 9959 "00000000" // /* MW 9 */
+ 9960 "00000001" // /* MW 8 */
+ 9961 "00010011" // /* MW 7 */
+ 9962 "00000100" // /* MW 6 */
+ 9963 "00100001" // /* MW 5 */
+ 9964 "00000000" // /* MW 4 */
+ 9965 "11110000" // /* MW 3 */
+ 9966 "00101100" // /* MW 2 */
+ 9967 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160
+ 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */
+ 9969 "00000000" // /* MW 5 */
+ 9970 "00000000" // /* MW 4 */
+ 9971 "11001000" // /* MW 3 */
+ 9972 "00010011" // /* MW 2 */
+ 9973 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9975 "01110000" // /* MW 7 */
+ 9976 "01100000" // /* MW 6 */
+ 9977 "10110000" // /* MW 5 */
+ 9978 "00000011" // /* MW 4 */
+ 9979 "01100000" // /* MW 3 */
+ 9980 "10010001" // /* MW 2 */
+ 9981 "00010011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9983 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9985 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9987 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9989 "10000001" // /* MW 11 */
+ 9990 "10101101" // /* MW 10 */
+ 9991 "00000000" // /* MW 9 */
+ 9992 "00000000" // /* MW 8 */
+ 9993 "00000000" // /* MW 7 */
+ 9994 "00000000" // /* MW 6 */
+ 9995 "00100000" // /* MW 5 */
+ 9996 "00000000" // /* MW 4 */
+ 9997 "11110000" // /* MW 3 */
+ 9998 "00101100" // /* MW 2 */
+ 9999 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192
+.src_ref 3 "elementwise_binary_shared.h" 150 97
+ 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10001 "00001101" // /* MW 3 */
+ 10002 "00000100" // /* MW 2 */
+ 10003 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 97 first
+ 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10005 "01000111" // /* MW 3 */
+ 10006 "10000100" // /* MW 2 */
+ 10007 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */
+ 10009 "00000001" // /* MW 5 */
+ 10010 "01000000" // /* MW 4 */
+ 10011 "10100000" // /* MW 3 */
+ 10012 "00010011" // /* MW 2 */
+ 10013 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10015 "00000000" // /* MW 5 */
+ 10016 "00100000" // /* MW 4 */
+ 10017 "00000000" // /* MW 3 */
+ 10018 "10000000" // /* MW 2 */
+ 10019 "00111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10021 "11010000" // /* MW 5 */
+ 10022 "11001000" // /* MW 4 */
+ 10023 "11001000" // /* MW 3 */
+ 10024 "00000111" // /* MW 2 */
+ 10025 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10027 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10029 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10031 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10033 "00000000" // /* MW 15 */
+ 10034 "00000000" // /* MW 14 */
+ 10035 "00010000" // /* MW 13 */
+ 10036 "00000000" // /* MW 12 */
+ 10037 "00001000" // /* MW 11 */
+ 10038 "00000000" // /* MW 10 */
+ 10039 "11100000" // /* MW 9 */
+ 10040 "00101111" // /* MW 8 */
+ 10041 "01011011" // /* MW 7 */
+ 10042 "00000001" // /* MW 6 */
+ 10043 "00100000" // /* MW 5 */
+ 10044 "00000000" // /* MW 4 */
+ 10045 "11110000" // /* MW 3 */
+ 10046 "00101100" // /* MW 2 */
+ 10047 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10049 "01011000" // /* MW 9 */
+ 10050 "10111110" // /* MW 8 */
+ 10051 "01000111" // /* MW 7 */
+ 10052 "00000000" // /* MW 6 */
+ 10053 "11010010" // /* MW 5 */
+ 10054 "00000010" // /* MW 4 */
+ 10055 "01010000" // /* MW 3 */
+ 10056 "10000000" // /* MW 2 */
+ 10057 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10059 "10000000" // /* MW 3 */
+ 10060 "00000000" // /* MW 2 */
+ 10061 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10063 "00000000" // /* MW 3 */
+ 10064 "00000000" // /* MW 2 */
+ 10065 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10067 "10000000" // /* MW 3 */
+ 10068 "00000000" // /* MW 2 */
+ 10069 "00011010" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10071 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10073 "00010001" // /* MW 3 */
+ 10074 "00000000" // /* MW 2 */
+ 10075 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10077 "00100101" // /* MW 5 */
+ 10078 "00000001" // /* MW 4 */
+ 10079 "11100010" // /* MW 3 */
+ 10080 "00000010" // /* MW 2 */
+ 10081 "10100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10083 "10000000" // /* MW 3 */
+ 10084 "00111010" // /* MW 2 */
+ 10085 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10087 "10010110" // /* MW 3 */
+ 10088 "01000000" // /* MW 2 */
+ 10089 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10091 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10093 "00000001" // /* MW 3 */
+ 10094 "00000001" // /* MW 2 */
+ 10095 "00011000" // /* MW 1 */
+ 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10097 "00000000" // /* MW 1 */
+ 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10099 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10101 "00010010" // /* MW 3 */
+ 10102 "00000000" // /* MW 2 */
+ 10103 "00000101" // /* MW 1 */
+ 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10105 "00000000" // /* MW 1 */
+ 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10107 "00000000" // /* MW 1 */
+ 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10109 "00000000" // /* MW 1 */
+ 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10111 "00000000" // /* MW 1 */
+ 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10113 "00000000" // /* MW 1 */
+ 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10115 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10117 "01110010" // /* MW 3 */
+ 10118 "00000001" // /* MW 2 */
+ 10119 "00011000" // /* MW 1 */
+ 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10121 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10123 "01100110" // /* MW 5 */
+ 10124 "11111000" // /* MW 4 */
+ 10125 "11111111" // /* MW 3 */
+ 10126 "00101100" // /* MW 2 */
+ 10127 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 166 4 first
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+ 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10129 "00010000" // /* MW 11 */
+ 10130 "00000000" // /* MW 10 */
+ 10131 "01111100" // /* MW 9 */
+ 10132 "00001000" // /* MW 8 */
+ 10133 "00000000" // /* MW 7 */
+ 10134 "00000000" // /* MW 6 */
+ 10135 "11101000" // /* MW 5 */
+ 10136 "01010000" // /* MW 4 */
+ 10137 "11011110" // /* MW 3 */
+ 10138 "10001010" // /* MW 2 */
+ 10139 "01111000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10141 "00010000" // /* MW 11 */
+ 10142 "00011000" // /* MW 10 */
+ 10143 "10111100" // /* MW 9 */
+ 10144 "00001001" // /* MW 8 */
+ 10145 "00000000" // /* MW 7 */
+ 10146 "00000000" // /* MW 6 */
+ 10147 "01101000" // /* MW 5 */
+ 10148 "10010000" // /* MW 4 */
+ 10149 "00000010" // /* MW 3 */
+ 10150 "01100011" // /* MW 2 */
+ 10151 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 177 44
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10153 "11110001" // /* MW 7 */
+ 10154 "00000000" // /* MW 6 */
+ 10155 "11101000" // /* MW 5 */
+ 10156 "01010000" // /* MW 4 */
+ 10157 "01111110" // /* MW 3 */
+ 10158 "00000101" // /* MW 2 */
+ 10159 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10161 "01101000" // /* MW 5 */
+ 10162 "10010000" // /* MW 4 */
+ 10163 "01010010" // /* MW 3 */
+ 10164 "10010000" // /* MW 2 */
+ 10165 "10000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10167 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10169 "00101011" // /* MW 3 */
+ 10170 "00001000" // /* MW 2 */
+ 10171 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10175 "00111101" // /* MW 3 */
+ 10176 "10000100" // /* MW 2 */
+ 10177 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10179 "00000001" // /* MW 7 */
+ 10180 "00000010" // /* MW 6 */
+ 10181 "00000001" // /* MW 5 */
+ 10182 "10000110" // /* MW 4 */
+ 10183 "01111110" // /* MW 3 */
+ 10184 "01110001" // /* MW 2 */
+ 10185 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10187 "11101000" // /* MW 5 */
+ 10188 "01010000" // /* MW 4 */
+ 10189 "01111110" // /* MW 3 */
+ 10190 "00000011" // /* MW 2 */
+ 10191 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10193 "00000000" // /* MW 15 */
+ 10194 "00000000" // /* MW 14 */
+ 10195 "01111000" // /* MW 13 */
+ 10196 "10100101" // /* MW 12 */
+ 10197 "00000001" // /* MW 11 */
+ 10198 "00000000" // /* MW 10 */
+ 10199 "11010100" // /* MW 9 */
+ 10200 "00001001" // /* MW 8 */
+ 10201 "01011011" // /* MW 7 */
+ 10202 "00000001" // /* MW 6 */
+ 10203 "00100000" // /* MW 5 */
+ 10204 "00000000" // /* MW 4 */
+ 10205 "01110000" // /* MW 3 */
+ 10206 "00000101" // /* MW 2 */
+ 10207 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10209 "00000000" // /* MW 15 */
+ 10210 "00000000" // /* MW 14 */
+ 10211 "01111000" // /* MW 13 */
+ 10212 "10100101" // /* MW 12 */
+ 10213 "00000001" // /* MW 11 */
+ 10214 "00000000" // /* MW 10 */
+ 10215 "00000000" // /* MW 9 */
+ 10216 "00000000" // /* MW 8 */
+ 10217 "01011011" // /* MW 7 */
+ 10218 "00000001" // /* MW 6 */
+ 10219 "00100000" // /* MW 5 */
+ 10220 "00000000" // /* MW 4 */
+ 10221 "11110000" // /* MW 3 */
+ 10222 "00101100" // /* MW 2 */
+ 10223 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10225 "00010000" // /* MW 15 */
+ 10226 "00001000" // /* MW 14 */
+ 10227 "01111000" // /* MW 13 */
+ 10228 "10100101" // /* MW 12 */
+ 10229 "00000001" // /* MW 11 */
+ 10230 "00000000" // /* MW 10 */
+ 10231 "00000000" // /* MW 9 */
+ 10232 "00000000" // /* MW 8 */
+ 10233 "01011011" // /* MW 7 */
+ 10234 "00000001" // /* MW 6 */
+ 10235 "00100000" // /* MW 5 */
+ 10236 "00000000" // /* MW 4 */
+ 10237 "11110000" // /* MW 3 */
+ 10238 "00101100" // /* MW 2 */
+ 10239 "00000000" // /* MW 1 */
+.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10241 "00000000" // /* MW 15 */
+ 10242 "00000000" // /* MW 14 */
+ 10243 "01111000" // /* MW 13 */
+ 10244 "10100101" // /* MW 12 */
+ 10245 "00000001" // /* MW 11 */
+ 10246 "00000000" // /* MW 10 */
+ 10247 "00000000" // /* MW 9 */
+ 10248 "00000000" // /* MW 8 */
+ 10249 "01011011" // /* MW 7 */
+ 10250 "00000001" // /* MW 6 */
+ 10251 "11101000" // /* MW 5 */
+ 10252 "01010000" // /* MW 4 */
+ 10253 "01111110" // /* MW 3 */
+ 10254 "00000011" // /* MW 2 */
+ 10255 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10257 "00000000" // /* MW 15 */
+ 10258 "00000000" // /* MW 14 */
+ 10259 "01111000" // /* MW 13 */
+ 10260 "10100101" // /* MW 12 */
+ 10261 "00000001" // /* MW 11 */
+ 10262 "00000000" // /* MW 10 */
+ 10263 "00000000" // /* MW 9 */
+ 10264 "00000000" // /* MW 8 */
+ 10265 "10100011" // /* MW 7 */
+ 10266 "00011100" // /* MW 6 */
+ 10267 "00100010" // /* MW 5 */
+ 10268 "00000000" // /* MW 4 */
+ 10269 "01110000" // /* MW 3 */
+ 10270 "00000101" // /* MW 2 */
+ 10271 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10273 "00000000" // /* MW 15 */
+ 10274 "00000000" // /* MW 14 */
+ 10275 "01111000" // /* MW 13 */
+ 10276 "10100101" // /* MW 12 */
+ 10277 "00000001" // /* MW 11 */
+ 10278 "00000000" // /* MW 10 */
+ 10279 "00000000" // /* MW 9 */
+ 10280 "00000000" // /* MW 8 */
+ 10281 "01011011" // /* MW 7 */
+ 10282 "00000001" // /* MW 6 */
+ 10283 "00100000" // /* MW 5 */
+ 10284 "00000000" // /* MW 4 */
+ 10285 "11110000" // /* MW 3 */
+ 10286 "00101100" // /* MW 2 */
+ 10287 "00000000" // /* MW 1 */
+.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10289 "00010000" // /* MW 15 */
+ 10290 "00001000" // /* MW 14 */
+ 10291 "01111000" // /* MW 13 */
+ 10292 "10100101" // /* MW 12 */
+ 10293 "00000001" // /* MW 11 */
+ 10294 "00000000" // /* MW 10 */
+ 10295 "00000000" // /* MW 9 */
+ 10296 "00000000" // /* MW 8 */
+ 10297 "01011011" // /* MW 7 */
+ 10298 "00000001" // /* MW 6 */
+ 10299 "00100000" // /* MW 5 */
+ 10300 "00000000" // /* MW 4 */
+ 10301 "11110000" // /* MW 3 */
+ 10302 "00101100" // /* MW 2 */
+ 10303 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10305 "00000001" // /* MW 5 */
+ 10306 "00000000" // /* MW 4 */
+ 10307 "00000000" // /* MW 3 */
+ 10308 "11110000" // /* MW 2 */
+ 10309 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10311 "10100011" // /* MW 3 */
+ 10312 "00011100" // /* MW 2 */
+ 10313 "00001010" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10315 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10317 "00000001" // /* MW 3 */
+ 10318 "00000010" // /* MW 2 */
+ 10319 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10321 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10323 "00000000" // /* MW 3 */
+ 10324 "00101000" // /* MW 2 */
+ 10325 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10327 "10100011" // /* MW 3 */
+ 10328 "00011100" // /* MW 2 */
+ 10329 "00001010" // /* MW 1 */
+.delay_slot
+ 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10331 "10100000" // /* MW 3 */
+ 10332 "01100000" // /* MW 2 */
+ 10333 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10335 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.delay_slot
+ 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10337 "10100011" // /* MW 3 */
+ 10338 "00011100" // /* MW 2 */
+ 10339 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0
+ 10341 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 3 "elementwise_binary_shared.h" 237 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.function_start
+ 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10353 "01110010" // /* MW 9 */
+ 10354 "11110000" // /* MW 8 */
+ 10355 "01100000" // /* MW 7 */
+ 10356 "00000000" // /* MW 6 */
+ 10357 "10001011" // /* MW 5 */
+ 10358 "10001000" // /* MW 4 */
+ 10359 "10000011" // /* MW 3 */
+ 10360 "10000010" // /* MW 2 */
+ 10361 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19 first
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+ 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10363 "10000001" // /* MW 5 */
+ 10364 "11000101" // /* MW 4 */
+ 10365 "01010100" // /* MW 3 */
+ 10366 "00000001" // /* MW 2 */
+ 10367 "01000000" // /* MW 1 */
+ 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10369 "00000000" // /* MW 1 */
+ 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10371 "00000000" // /* MW 1 */
+ 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10373 "00000000" // /* MW 1 */
+ 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10375 "00000000" // /* MW 1 */
+ 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10377 "00000000" // /* MW 1 */
+ 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10379 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 244 12
+.src_ref 3 "elementwise_binary_shared.h" 244 35
+ 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */
+ 10381 "00000001" // /* MW 5 */
+ 10382 "00000000" // /* MW 4 */
+ 10383 "01101000" // /* MW 3 */
+ 10384 "00010100" // /* MW 2 */
+ 10385 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 237
+.delay_slot
+ 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10387 "00000001" // /* MW 5 */
+ 10388 "00000000" // /* MW 4 */
+ 10389 "00000000" // /* MW 3 */
+ 10390 "00001000" // /* MW 2 */
+ 10391 "00000000" // /* MW 1 */
+.delay_slot
+ 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10393 "11100000" // /* MW 3 */
+ 10394 "01010101" // /* MW 2 */
+ 10395 "00011000" // /* MW 1 */
+.delay_slot
+ 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10397 "11100000" // /* MW 3 */
+ 10398 "01100000" // /* MW 2 */
+ 10399 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+.delay_slot
+ 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10401 "00101011" // /* MW 3 */
+ 10402 "00000111" // /* MW 2 */
+ 10403 "00001001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10405 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 247 12 first
+.no_stack_arguments
+ 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10407 "00000001" // /* MW 5 */
+ 10408 "00000000" // /* MW 4 */
+ 10409 "00101000" // /* MW 3 */
+ 10410 "00010011" // /* MW 2 */
+ 10411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10421 "10000001" // /* MW 11 */
+ 10422 "10101101" // /* MW 10 */
+ 10423 "00000000" // /* MW 9 */
+ 10424 "00000000" // /* MW 8 */
+ 10425 "00000000" // /* MW 7 */
+ 10426 "00000000" // /* MW 6 */
+ 10427 "00100000" // /* MW 5 */
+ 10428 "00000000" // /* MW 4 */
+ 10429 "11110000" // /* MW 3 */
+ 10430 "00101100" // /* MW 2 */
+ 10431 "00000000" // /* MW 1 */
+.return_address
+ 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 10433 "00000000" // /* MW 5 */
+ 10434 "00000000" // /* MW 4 */
+ 10435 "01111000" // /* MW 3 */
+ 10436 "00010100" // /* MW 2 */
+ 10437 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10447 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96
+.src_ref 3 "elementwise_binary_shared.h" 245 12 first
+.no_stack_arguments
+ 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10449 "00000001" // /* MW 5 */
+ 10450 "00000000" // /* MW 4 */
+ 10451 "00101000" // /* MW 3 */
+ 10452 "00010011" // /* MW 2 */
+ 10453 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.delay_slot
+ 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10455 "01110000" // /* MW 7 */
+ 10456 "01100000" // /* MW 6 */
+ 10457 "10110000" // /* MW 5 */
+ 10458 "00000000" // /* MW 4 */
+ 10459 "01100000" // /* MW 3 */
+ 10460 "10010001" // /* MW 2 */
+ 10461 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10463 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10469 "10000001" // /* MW 11 */
+ 10470 "10101101" // /* MW 10 */
+ 10471 "00000000" // /* MW 9 */
+ 10472 "00000000" // /* MW 8 */
+ 10473 "00000000" // /* MW 7 */
+ 10474 "00000000" // /* MW 6 */
+ 10475 "00100000" // /* MW 5 */
+ 10476 "00000000" // /* MW 4 */
+ 10477 "11110000" // /* MW 3 */
+ 10478 "00101100" // /* MW 2 */
+ 10479 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.return_address
+ 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10481 "10000000" // /* MW 3 */
+ 10482 "01110001" // /* MW 2 */
+ 10483 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4 first
+ 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10485 "00000000" // /* MW 3 */
+ 10486 "00101000" // /* MW 2 */
+ 10487 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.delay_slot
+ 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10489 "00000001" // /* MW 5 */
+ 10490 "00000000" // /* MW 4 */
+ 10491 "00000000" // /* MW 3 */
+ 10492 "11111000" // /* MW 2 */
+ 10493 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10495 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0
+ 10501 "00000000" // /* MW 1 */
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 205 first
+.src_ref 7 "superkernels.cpp" 210 6
+.function_start
+ 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10513 "10000000" // /* MW 5 */
+ 10514 "11001000" // /* MW 4 */
+ 10515 "11000110" // /* MW 3 */
+ 10516 "00000111" // /* MW 2 */
+ 10517 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6 first
+ 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10519 "11000001" // /* MW 5 */
+ 10520 "10110101" // /* MW 4 */
+ 10521 "11011000" // /* MW 3 */
+ 10522 "11000010" // /* MW 2 */
+ 10523 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 205
+ 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10525 "00000001" // /* MW 5 */
+ 10526 "00000000" // /* MW 4 */
+ 10527 "00000000" // /* MW 3 */
+ 10528 "00001000" // /* MW 2 */
+ 10529 "00000000" // /* MW 1 */
+ 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10531 "01110000" // /* MW 7 */
+ 10532 "11010000" // /* MW 6 */
+ 10533 "00001011" // /* MW 5 */
+ 10534 "00000000" // /* MW 4 */
+ 10535 "10110000" // /* MW 3 */
+ 10536 "01100011" // /* MW 2 */
+ 10537 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+ 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10539 "00010001" // /* MW 9 */
+ 10540 "00101000" // /* MW 8 */
+ 10541 "00110010" // /* MW 7 */
+ 10542 "11110011" // /* MW 6 */
+ 10543 "00000001" // /* MW 5 */
+ 10544 "00000000" // /* MW 4 */
+ 10545 "10110000" // /* MW 3 */
+ 10546 "10000010" // /* MW 2 */
+ 10547 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10549 "11000000" // /* MW 3 */
+ 10550 "11010100" // /* MW 2 */
+ 10551 "00011011" // /* MW 1 */
+ 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10553 "00000000" // /* MW 1 */
+ 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6
+.src_ref 7 "superkernels.cpp" 210 16
+ 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */
+ 10557 "00000001" // /* MW 5 */
+ 10558 "01000000" // /* MW 4 */
+ 10559 "11110000" // /* MW 3 */
+ 10560 "00010100" // /* MW 2 */
+ 10561 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 22 first
+.delay_slot
+ 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10563 "10010000" // /* MW 3 */
+ 10564 "01100010" // /* MW 2 */
+ 10565 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 30
+.delay_slot
+ 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10567 "11111011" // /* MW 3 */
+ 10568 "01100011" // /* MW 2 */
+ 10569 "00010100" // /* MW 1 */
+.delay_slot
+ 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10571 "00111101" // /* MW 3 */
+ 10572 "11110100" // /* MW 2 */
+ 10573 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10575 "01110000" // /* MW 7 */
+ 10576 "01100000" // /* MW 6 */
+ 10577 "00110000" // /* MW 5 */
+ 10578 "00000011" // /* MW 4 */
+ 10579 "00110000" // /* MW 3 */
+ 10580 "11000110" // /* MW 2 */
+ 10581 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4
+.src_ref 7 "superkernels.cpp" 224 2
+.delay_slot
+ 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10583 "00000000" // /* MW 5 */
+ 10584 "11001010" // /* MW 4 */
+ 10585 "11000000" // /* MW 3 */
+ 10586 "00000111" // /* MW 2 */
+ 10587 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10589 "11010000" // /* MW 5 */
+ 10590 "11001000" // /* MW 4 */
+ 10591 "11000100" // /* MW 3 */
+ 10592 "00000111" // /* MW 2 */
+ 10593 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10595 "00010000" // /* MW 9 */
+ 10596 "00110010" // /* MW 8 */
+ 10597 "00110010" // /* MW 7 */
+ 10598 "11110001" // /* MW 6 */
+ 10599 "00000001" // /* MW 5 */
+ 10600 "00000000" // /* MW 4 */
+ 10601 "11100000" // /* MW 3 */
+ 10602 "11000000" // /* MW 2 */
+ 10603 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10605 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */
+ 10607 "00000001" // /* MW 5 */
+ 10608 "00000000" // /* MW 4 */
+ 10609 "00000000" // /* MW 3 */
+ 10610 "00010011" // /* MW 2 */
+ 10611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10615 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10617 "00110001" // /* MW 3 */
+ 10618 "00100000" // /* MW 2 */
+ 10619 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10621 "00000101" // /* MW 3 */
+ 10622 "00100000" // /* MW 2 */
+ 10623 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10625 "00000000" // /* MW 15 */
+ 10626 "00000000" // /* MW 14 */
+ 10627 "01111000" // /* MW 13 */
+ 10628 "10100101" // /* MW 12 */
+ 10629 "00000001" // /* MW 11 */
+ 10630 "00000000" // /* MW 10 */
+ 10631 "00000000" // /* MW 9 */
+ 10632 "10000000" // /* MW 8 */
+ 10633 "00010001" // /* MW 7 */
+ 10634 "00000110" // /* MW 6 */
+ 10635 "00100010" // /* MW 5 */
+ 10636 "00000000" // /* MW 4 */
+ 10637 "11110000" // /* MW 3 */
+ 10638 "00101100" // /* MW 2 */
+ 10639 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18
+.return_address
+ 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10641 "10100000" // /* MW 5 */
+ 10642 "11001000" // /* MW 4 */
+ 10643 "11000100" // /* MW 3 */
+ 10644 "00000111" // /* MW 2 */
+ 10645 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18 first
+.src_ref 7 "superkernels.cpp" 217 65
+ 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10647 "00010000" // /* MW 9 */
+ 10648 "10000000" // /* MW 8 */
+ 10649 "00110010" // /* MW 7 */
+ 10650 "11110001" // /* MW 6 */
+ 10651 "00000001" // /* MW 5 */
+ 10652 "00000000" // /* MW 4 */
+ 10653 "11010000" // /* MW 3 */
+ 10654 "11000010" // /* MW 2 */
+ 10655 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51
+.src_ref 7 "superkernels.cpp" 217 65
+.src_ref 7 "superkernels.cpp" 224 2
+ 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10657 "00010000" // /* MW 9 */
+ 10658 "10000000" // /* MW 8 */
+ 10659 "00110010" // /* MW 7 */
+ 10660 "11110001" // /* MW 6 */
+ 10661 "00000001" // /* MW 5 */
+ 10662 "00000000" // /* MW 4 */
+ 10663 "11010000" // /* MW 3 */
+ 10664 "11000110" // /* MW 2 */
+ 10665 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51 first
+.src_ref 7 "superkernels.cpp" 217 16
+.src_ref 7 "superkernels.cpp" 222 47
+ 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10667 "00010000" // /* MW 9 */
+ 10668 "00101010" // /* MW 8 */
+ 10669 "10110010" // /* MW 7 */
+ 10670 "11110000" // /* MW 6 */
+ 10671 "00000001" // /* MW 5 */
+ 10672 "00000000" // /* MW 4 */
+ 10673 "01010000" // /* MW 3 */
+ 10674 "11001011" // /* MW 2 */
+ 10675 "01001010" // /* MW 1 */
+ 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10677 "00000000" // /* MW 1 */
+ 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10679 "00000000" // /* MW 1 */
+ 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */
+ 10681 "00000000" // /* MW 5 */
+ 10682 "00000000" // /* MW 4 */
+ 10683 "11111000" // /* MW 3 */
+ 10684 "00010100" // /* MW 2 */
+ 10685 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13
+.delay_slot
+ 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10687 "11000000" // /* MW 5 */
+ 10688 "11001000" // /* MW 4 */
+ 10689 "11000000" // /* MW 3 */
+ 10690 "00000111" // /* MW 2 */
+ 10691 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10693 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 27 first
+.delay_slot
+ 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10695 "00001111" // /* MW 3 */
+ 10696 "01100001" // /* MW 2 */
+ 10697 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13 first
+.delay_slot
+ 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10699 "10100011" // /* MW 5 */
+ 10700 "00001100" // /* MW 4 */
+ 10701 "11110000" // /* MW 3 */
+ 10702 "00101100" // /* MW 2 */
+ 10703 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 16 first
+.delay_slot
+ 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10705 "00000000" // /* MW 15 */
+ 10706 "00000000" // /* MW 14 */
+ 10707 "01111000" // /* MW 13 */
+ 10708 "10100101" // /* MW 12 */
+ 10709 "00000001" // /* MW 11 */
+ 10710 "00000000" // /* MW 10 */
+ 10711 "00000000" // /* MW 9 */
+ 10712 "10000000" // /* MW 8 */
+ 10713 "00010001" // /* MW 7 */
+ 10714 "00000110" // /* MW 6 */
+ 10715 "00100001" // /* MW 5 */
+ 10716 "00000000" // /* MW 4 */
+ 10717 "11110000" // /* MW 3 */
+ 10718 "00101100" // /* MW 2 */
+ 10719 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 222 47
+.src_ref 7 "superkernels.cpp" 224 2
+ 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10721 "00000000" // /* MW 15 */
+ 10722 "00000000" // /* MW 14 */
+ 10723 "00010000" // /* MW 13 */
+ 10724 "00101010" // /* MW 12 */
+ 10725 "10110010" // /* MW 11 */
+ 10726 "11110000" // /* MW 10 */
+ 10727 "00000001" // /* MW 9 */
+ 10728 "00000000" // /* MW 8 */
+ 10729 "10001011" // /* MW 7 */
+ 10730 "10000000" // /* MW 6 */
+ 10731 "00100010" // /* MW 5 */
+ 10732 "00000000" // /* MW 4 */
+ 10733 "11110000" // /* MW 3 */
+ 10734 "00101100" // /* MW 2 */
+ 10735 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10737 "00000000" // /* MW 7 */
+ 10738 "11000011" // /* MW 6 */
+ 10739 "10110011" // /* MW 5 */
+ 10740 "00000011" // /* MW 4 */
+ 10741 "01100000" // /* MW 3 */
+ 10742 "10010001" // /* MW 2 */
+ 10743 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10745 "00010000" // /* MW 9 */
+ 10746 "00100000" // /* MW 8 */
+ 10747 "00110010" // /* MW 7 */
+ 10748 "11110000" // /* MW 6 */
+ 10749 "00000001" // /* MW 5 */
+ 10750 "00000000" // /* MW 4 */
+ 10751 "11010000" // /* MW 3 */
+ 10752 "11101110" // /* MW 2 */
+ 10753 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10755 "00010110" // /* MW 3 */
+ 10756 "11111110" // /* MW 2 */
+ 10757 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10759 "00110110" // /* MW 3 */
+ 10760 "11111110" // /* MW 2 */
+ 10761 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10763 "01010110" // /* MW 3 */
+ 10764 "01000110" // /* MW 2 */
+ 10765 "00000111" // /* MW 1 */
+ 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10767 "00000000" // /* MW 1 */
+ 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10769 "00000000" // /* MW 1 */
+ 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10771 "00000000" // /* MW 1 */
+ 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10773 "00000000" // /* MW 1 */
+ 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10775 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10777 "00000010" // /* MW 3 */
+ 10778 "01100001" // /* MW 2 */
+ 10779 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10781 "00010001" // /* MW 3 */
+ 10782 "00000110" // /* MW 2 */
+ 10783 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10785 "11111101" // /* MW 3 */
+ 10786 "11100000" // /* MW 2 */
+ 10787 "00010111" // /* MW 1 */
+ 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10789 "00000000" // /* MW 1 */
+ 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10791 "00000000" // /* MW 1 */
+ 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10793 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10795 "00001000" // /* MW 3 */
+ 10796 "10010011" // /* MW 2 */
+ 10797 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+ 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10799 "10000001" // /* MW 5 */
+ 10800 "10101101" // /* MW 4 */
+ 10801 "10100111" // /* MW 3 */
+ 10802 "00000000" // /* MW 2 */
+ 10803 "00000100" // /* MW 1 */
+ 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10805 "00000000" // /* MW 1 */
+ 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10807 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+ 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10809 "00110110" // /* MW 3 */
+ 10810 "00000110" // /* MW 2 */
+ 10811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10813 "10000001" // /* MW 5 */
+ 10814 "11011101" // /* MW 4 */
+ 10815 "11011100" // /* MW 3 */
+ 10816 "11001010" // /* MW 2 */
+ 10817 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 47 first
+ 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10819 "01110110" // /* MW 3 */
+ 10820 "00000110" // /* MW 2 */
+ 10821 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10823 "10011110" // /* MW 3 */
+ 10824 "01011100" // /* MW 2 */
+ 10825 "00000111" // /* MW 1 */
+ 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10827 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 224 2 first
+.no_stack_arguments
+ 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */
+ 10829 "00000001" // /* MW 5 */
+ 10830 "00000000" // /* MW 4 */
+ 10831 "00111000" // /* MW 3 */
+ 10832 "00010100" // /* MW 2 */
+ 10833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10835 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+.delay_slot
+ 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10837 "00000111" // /* MW 3 */
+ 10838 "01100010" // /* MW 2 */
+ 10839 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.delay_slot
+ 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10841 "00110001" // /* MW 3 */
+ 10842 "00000110" // /* MW 2 */
+ 10843 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45 first
+.delay_slot
+ 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10845 "00001101" // /* MW 3 */
+ 10846 "11100001" // /* MW 2 */
+ 10847 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+.delay_slot
+ 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10849 "00000000" // /* MW 15 */
+ 10850 "00000000" // /* MW 14 */
+ 10851 "10101000" // /* MW 13 */
+ 10852 "10100000" // /* MW 12 */
+ 10853 "00110100" // /* MW 11 */
+ 10854 "00000000" // /* MW 10 */
+ 10855 "00000000" // /* MW 9 */
+ 10856 "00000000" // /* MW 8 */
+ 10857 "01011011" // /* MW 7 */
+ 10858 "00000001" // /* MW 6 */
+ 10859 "00100000" // /* MW 5 */
+ 10860 "00000000" // /* MW 4 */
+ 10861 "11110000" // /* MW 3 */
+ 10862 "00101100" // /* MW 2 */
+ 10863 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+.src_ref 7 "superkernels.cpp" 227 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10865 "00010000" // /* MW 9 */
+ 10866 "00100000" // /* MW 8 */
+ 10867 "00110010" // /* MW 7 */
+ 10868 "11110011" // /* MW 6 */
+ 10869 "00000001" // /* MW 5 */
+ 10870 "00000000" // /* MW 4 */
+ 10871 "11010000" // /* MW 3 */
+ 10872 "11000110" // /* MW 2 */
+ 10873 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10875 "00000101" // /* MW 3 */
+ 10876 "00100000" // /* MW 2 */
+ 10877 "00010000" // /* MW 1 */
+ 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10879 "00000000" // /* MW 1 */
+ 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10881 "00000000" // /* MW 1 */
+ 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10883 "00000000" // /* MW 1 */
+ 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10885 "00000000" // /* MW 1 */
+ 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10887 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10889 "00001000" // /* MW 3 */
+ 10890 "01010001" // /* MW 2 */
+ 10891 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10893 "00010000" // /* MW 9 */
+ 10894 "00110000" // /* MW 8 */
+ 10895 "00110010" // /* MW 7 */
+ 10896 "11110001" // /* MW 6 */
+ 10897 "00000001" // /* MW 5 */
+ 10898 "00000000" // /* MW 4 */
+ 10899 "11010000" // /* MW 3 */
+ 10900 "11001110" // /* MW 2 */
+ 10901 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6 first
+ 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10903 "00110110" // /* MW 3 */
+ 10904 "00000110" // /* MW 2 */
+ 10905 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+ 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10907 "01010110" // /* MW 3 */
+ 10908 "00000110" // /* MW 2 */
+ 10909 "00000010" // /* MW 1 */
+ 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10911 "00000000" // /* MW 1 */
+ 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10913 "00000000" // /* MW 1 */
+ 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10915 "00000000" // /* MW 1 */
+ 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10917 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10919 "00110001" // /* MW 3 */
+ 10920 "00100001" // /* MW 2 */
+ 10921 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10923 "00010001" // /* MW 3 */
+ 10924 "11100110" // /* MW 2 */
+ 10925 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 16 first
+ 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10927 "00101000" // /* MW 3 */
+ 10928 "01100001" // /* MW 2 */
+ 10929 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+ 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */
+ 10931 "00000001" // /* MW 5 */
+ 10932 "01000000" // /* MW 4 */
+ 10933 "01101000" // /* MW 3 */
+ 10934 "00010101" // /* MW 2 */
+ 10935 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14
+ 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10947 "00000001" // /* MW 3 */
+ 10948 "00100000" // /* MW 2 */
+ 10949 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14 first
+ 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10951 "00000000" // /* MW 9 */
+ 10952 "00000000" // /* MW 8 */
+ 10953 "00000000" // /* MW 7 */
+ 10954 "10000000" // /* MW 6 */
+ 10955 "00010001" // /* MW 5 */
+ 10956 "00000110" // /* MW 4 */
+ 10957 "11110110" // /* MW 3 */
+ 10958 "00101100" // /* MW 2 */
+ 10959 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 229
+ 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10961 "00111001" // /* MW 3 */
+ 10962 "11110100" // /* MW 2 */
+ 10963 "00000111" // /* MW 1 */
+ 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10965 "00011001" // /* MW 3 */
+ 10966 "11111011" // /* MW 2 */
+ 10967 "00000111" // /* MW 1 */
+ 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10969 "00000000" // /* MW 1 */
+ 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10971 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10973 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10975 "11110001" // /* MW 3 */
+ 10976 "11111101" // /* MW 2 */
+ 10977 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10979 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10981 "00000000" // /* MW 3 */
+ 10982 "00101000" // /* MW 2 */
+ 10983 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10985 "10100000" // /* MW 3 */
+ 10986 "01100111" // /* MW 2 */
+ 10987 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229
+.delay_slot
+ 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10989 "00000001" // /* MW 5 */
+ 10990 "00000000" // /* MW 4 */
+ 10991 "00000000" // /* MW 3 */
+ 10992 "11111000" // /* MW 2 */
+ 10993 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10995 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10997 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 10999 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 11008 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11009 "00000001" // /* MW 5 */
+ 11010 "00100001" // /* MW 4 */
+ 11011 "00000000" // /* MW 3 */
+ 11012 "00000000" // /* MW 2 */
+ 11013 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11014 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11015 "11000000" // /* MW 3 */
+ 11016 "01010000" // /* MW 2 */
+ 11017 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11018 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11019 "10010000" // /* MW 3 */
+ 11020 "01100000" // /* MW 2 */
+ 11021 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 11022 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11023 "00010001" // /* MW 3 */
+ 11024 "00000100" // /* MW 2 */
+ 11025 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 11026 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11027 "00010001" // /* MW 3 */
+ 11028 "00010100" // /* MW 2 */
+ 11029 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0
+ 11031 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 11040 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11041 "00101110" // /* MW 3 */
+ 11042 "00011100" // /* MW 2 */
+ 11043 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 11044 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11045 "00000001" // /* MW 5 */
+ 11046 "00000000" // /* MW 4 */
+ 11047 "00000000" // /* MW 3 */
+ 11048 "00001000" // /* MW 2 */
+ 11049 "00000000" // /* MW 1 */
+ 11050 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11051 "00111101" // /* MW 3 */
+ 11052 "11111100" // /* MW 2 */
+ 11053 "00001111" // /* MW 1 */
+ 11054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11055 "00000000" // /* MW 1 */
+ 11056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11057 "00000000" // /* MW 1 */
+ 11058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11059 "00000000" // /* MW 1 */
+ 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11061 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 11062 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11063 "00101001" // /* MW 3 */
+ 11064 "00011100" // /* MW 2 */
+ 11065 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 11066 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11067 "00101110" // /* MW 3 */
+ 11068 "00011100" // /* MW 2 */
+ 11069 "00000001" // /* MW 1 */
+ 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11071 "00000000" // /* MW 1 */
+ 11072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11073 "00000000" // /* MW 1 */
+ 11074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11075 "00000000" // /* MW 1 */
+ 11076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11077 "00000000" // /* MW 1 */
+ 11078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11079 "00000000" // /* MW 1 */
+ 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11081 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 11082 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11083 "00101001" // /* MW 3 */
+ 11084 "00011100" // /* MW 2 */
+ 11085 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 11086 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11087 "00101110" // /* MW 3 */
+ 11088 "00000100" // /* MW 2 */
+ 11089 "00000001" // /* MW 1 */
+ 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11091 "00000000" // /* MW 1 */
+ 11092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11093 "00000000" // /* MW 1 */
+ 11094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11095 "00000000" // /* MW 1 */
+ 11096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11097 "00000000" // /* MW 1 */
+ 11098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11099 "00000000" // /* MW 1 */
+ 11100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11101 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 11102 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11103 "00101001" // /* MW 3 */
+ 11104 "00011100" // /* MW 2 */
+ 11105 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 11106 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11107 "00101110" // /* MW 3 */
+ 11108 "00010100" // /* MW 2 */
+ 11109 "00000001" // /* MW 1 */
+ 11110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11111 "00000000" // /* MW 1 */
+ 11112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11113 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 11114 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */
+ 11115 "00000001" // /* MW 5 */
+ 11116 "00000000" // /* MW 4 */
+ 11117 "10000000" // /* MW 3 */
+ 11118 "00010101" // /* MW 2 */
+ 11119 "00000000" // /* MW 1 */
+.delay_slot
+ 11120 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11121 "10011101" // /* MW 3 */
+ 11122 "11111011" // /* MW 2 */
+ 11123 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11125 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11127 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 11128 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11129 "00101001" // /* MW 3 */
+ 11130 "11011100" // /* MW 2 */
+ 11131 "00001000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+ 11132 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11133 "11000000" // /* MW 3 */
+ 11134 "01100000" // /* MW 2 */
+ 11135 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.return_address
+ 11136 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11137 "00111001" // /* MW 3 */
+ 11138 "11111100" // /* MW 2 */
+ 11139 "00000111" // /* MW 1 */
+ 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11141 "00000000" // /* MW 1 */
+ 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11143 "00000000" // /* MW 1 */
+ 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11145 "00000000" // /* MW 1 */
+ 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11147 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11149 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11150 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11151 "10011001" // /* MW 3 */
+ 11152 "11111011" // /* MW 2 */
+ 11153 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11154 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11155 "00000000" // /* MW 3 */
+ 11156 "00101000" // /* MW 2 */
+ 11157 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11159 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11161 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11163 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11164 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11165 "00000001" // /* MW 3 */
+ 11166 "00100000" // /* MW 2 */
+ 11167 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "mul_impl.h" 134 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11168 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11169 "01110001" // /* MW 9 */
+ 11170 "00000000" // /* MW 8 */
+ 11171 "00000000" // /* MW 7 */
+ 11172 "00000000" // /* MW 6 */
+ 11173 "11111110" // /* MW 5 */
+ 11174 "00111111" // /* MW 4 */
+ 11175 "00110000" // /* MW 3 */
+ 11176 "11000010" // /* MW 2 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 11177 "11101000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 149 first
+.src_ref 3 "elementwise_binary.h" 156 37
+.src_ref 3 "elementwise_binary.h" 168 8 first
+.function_start
+ 11184 "10111010" // MOVA m0, #32; MOVXM ls, #11360 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11185 "00010000" // /* MW 9 */
+ 11186 "00110000" // /* MW 8 */
+ 11187 "01111110" // /* MW 7 */
+ 11188 "00001000" // /* MW 6 */
+ 11189 "00000000" // /* MW 5 */
+ 11190 "00000000" // /* MW 4 */
+ 11191 "10000000" // /* MW 3 */
+ 11192 "00000000" // /* MW 2 */
+ 11193 "00000100" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 37 first
+.src_ref 3 "elementwise_binary.h" 168 8 first
+ 11194 "10111010" // LDA r3, [p3], m0; MOVXM le, #11376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11195 "00010000" // /* MW 9 */
+ 11196 "00111000" // /* MW 8 */
+ 11197 "10111110" // /* MW 7 */
+ 11198 "00001001" // /* MW 6 */
+ 11199 "00000000" // /* MW 5 */
+ 11200 "00000000" // /* MW 4 */
+ 11201 "11010000" // /* MW 3 */
+ 11202 "00001110" // /* MW 2 */
+ 11203 "01100001" // /* MW 1 */
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11204 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11205 "01011000" // /* MW 9 */
+ 11206 "00111100" // /* MW 8 */
+ 11207 "00001011" // /* MW 7 */
+ 11208 "01001000" // /* MW 6 */
+ 11209 "00010111" // /* MW 5 */
+ 11210 "00111110" // /* MW 4 */
+ 11211 "11010000" // /* MW 3 */
+ 11212 "10010000" // /* MW 2 */
+ 11213 "01100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11214 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11215 "00010000" // /* MW 9 */
+ 11216 "00110100" // /* MW 8 */
+ 11217 "00110010" // /* MW 7 */
+ 11218 "11110010" // /* MW 6 */
+ 11219 "00000001" // /* MW 5 */
+ 11220 "00000000" // /* MW 4 */
+ 11221 "11010000" // /* MW 3 */
+ 11222 "10000000" // /* MW 2 */
+ 11223 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11224 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11225 "01000010" // /* MW 3 */
+ 11226 "00000100" // /* MW 2 */
+ 11227 "00000100" // /* MW 1 */
+ 11228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11229 "00000000" // /* MW 1 */
+ 11230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11231 "00000000" // /* MW 1 */
+ 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11233 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11234 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11235 "00011101" // /* MW 3 */
+ 11236 "11000010" // /* MW 2 */
+ 11237 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 168 8
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 11238 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11239 "11111001" // /* MW 5 */
+ 11240 "11100001" // /* MW 4 */
+ 11241 "10001010" // /* MW 3 */
+ 11242 "00001110" // /* MW 2 */
+ 11243 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11244 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11245 "01101000" // /* MW 5 */
+ 11246 "01010000" // /* MW 4 */
+ 11247 "01110000" // /* MW 3 */
+ 11248 "00010011" // /* MW 2 */
+ 11249 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11250 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11251 "10000000" // /* MW 7 */
+ 11252 "10111010" // /* MW 6 */
+ 11253 "11101000" // /* MW 5 */
+ 11254 "01010000" // /* MW 4 */
+ 11255 "01110000" // /* MW 3 */
+ 11256 "00011011" // /* MW 2 */
+ 11257 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11258 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11259 "01101000" // /* MW 5 */
+ 11260 "01010000" // /* MW 4 */
+ 11261 "01110000" // /* MW 3 */
+ 11262 "00010011" // /* MW 2 */
+ 11263 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11264 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11265 "11101000" // /* MW 5 */
+ 11266 "01010000" // /* MW 4 */
+ 11267 "01110000" // /* MW 3 */
+ 11268 "00011011" // /* MW 2 */
+ 11269 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11270 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11271 "10011011" // /* MW 3 */
+ 11272 "00001000" // /* MW 2 */
+ 11273 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11274 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11275 "01101000" // /* MW 5 */
+ 11276 "01010000" // /* MW 4 */
+ 11277 "01110000" // /* MW 3 */
+ 11278 "00011011" // /* MW 2 */
+ 11279 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11280 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11281 "11101000" // /* MW 5 */
+ 11282 "01010000" // /* MW 4 */
+ 11283 "01110000" // /* MW 3 */
+ 11284 "00010011" // /* MW 2 */
+ 11285 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11286 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11287 "01000001" // /* MW 9 */
+ 11288 "11100010" // /* MW 8 */
+ 11289 "00000000" // /* MW 7 */
+ 11290 "00011101" // /* MW 6 */
+ 11291 "00110100" // /* MW 5 */
+ 11292 "00101000" // /* MW 4 */
+ 11293 "01110000" // /* MW 3 */
+ 11294 "00011011" // /* MW 2 */
+ 11295 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11296 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11297 "01100001" // /* MW 9 */
+ 11298 "11100000" // /* MW 8 */
+ 11299 "00000001" // /* MW 7 */
+ 11300 "00011101" // /* MW 6 */
+ 11301 "01110100" // /* MW 5 */
+ 11302 "00101000" // /* MW 4 */
+ 11303 "01110000" // /* MW 3 */
+ 11304 "00010011" // /* MW 2 */
+ 11305 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11306 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11307 "01000001" // /* MW 9 */
+ 11308 "11100010" // /* MW 8 */
+ 11309 "00000000" // /* MW 7 */
+ 11310 "00011101" // /* MW 6 */
+ 11311 "00110100" // /* MW 5 */
+ 11312 "00101000" // /* MW 4 */
+ 11313 "01110000" // /* MW 3 */
+ 11314 "00011011" // /* MW 2 */
+ 11315 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11316 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11317 "01100001" // /* MW 9 */
+ 11318 "11100000" // /* MW 8 */
+ 11319 "00000001" // /* MW 7 */
+ 11320 "00011101" // /* MW 6 */
+ 11321 "01110100" // /* MW 5 */
+ 11322 "00101000" // /* MW 4 */
+ 11323 "01110000" // /* MW 3 */
+ 11324 "00010011" // /* MW 2 */
+ 11325 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11326 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11327 "01000001" // /* MW 9 */
+ 11328 "11100010" // /* MW 8 */
+ 11329 "00000000" // /* MW 7 */
+ 11330 "00011101" // /* MW 6 */
+ 11331 "00110100" // /* MW 5 */
+ 11332 "00101000" // /* MW 4 */
+ 11333 "01110000" // /* MW 3 */
+ 11334 "00011011" // /* MW 2 */
+ 11335 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11336 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11337 "01100001" // /* MW 9 */
+ 11338 "11100000" // /* MW 8 */
+ 11339 "00000001" // /* MW 7 */
+ 11340 "00011101" // /* MW 6 */
+ 11341 "01110100" // /* MW 5 */
+ 11342 "00101000" // /* MW 4 */
+ 11343 "01110000" // /* MW 3 */
+ 11344 "00010011" // /* MW 2 */
+ 11345 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11346 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11347 "01000001" // /* MW 13 */
+ 11348 "11100010" // /* MW 12 */
+ 11349 "00000000" // /* MW 11 */
+ 11350 "10001100" // /* MW 10 */
+ 11351 "01110000" // /* MW 9 */
+ 11352 "00001000" // /* MW 8 */
+ 11353 "00000000" // /* MW 7 */
+ 11354 "00000000" // /* MW 6 */
+ 11355 "01101000" // /* MW 5 */
+ 11356 "01010000" // /* MW 4 */
+ 11357 "01110000" // /* MW 3 */
+ 11358 "00011011" // /* MW 2 */
+ 11359 "00100001" // /* MW 1 */
+.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 11360 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11361 "00000011" // /* MW 15 */
+ 11362 "00001111" // /* MW 14 */
+ 11363 "01111000" // /* MW 13 */
+ 11364 "10100101" // /* MW 12 */
+ 11365 "00000001" // /* MW 11 */
+ 11366 "00000000" // /* MW 10 */
+ 11367 "00000000" // /* MW 9 */
+ 11368 "00000000" // /* MW 8 */
+ 11369 "10100011" // /* MW 7 */
+ 11370 "00011100" // /* MW 6 */
+ 11371 "11101010" // /* MW 5 */
+ 11372 "01010000" // /* MW 4 */
+ 11373 "01110000" // /* MW 3 */
+ 11374 "00010011" // /* MW 2 */
+ 11375 "00100001" // /* MW 1 */
+.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11376 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11377 "00010010" // /* MW 15 */
+ 11378 "00000111" // /* MW 14 */
+ 11379 "01111000" // /* MW 13 */
+ 11380 "10100101" // /* MW 12 */
+ 11381 "00000001" // /* MW 11 */
+ 11382 "00000000" // /* MW 10 */
+ 11383 "00000000" // /* MW 9 */
+ 11384 "00000000" // /* MW 8 */
+ 11385 "00100011" // /* MW 7 */
+ 11386 "00011100" // /* MW 6 */
+ 11387 "01101010" // /* MW 5 */
+ 11388 "01010000" // /* MW 4 */
+ 11389 "01110000" // /* MW 3 */
+ 11390 "00011011" // /* MW 2 */
+ 11391 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 11392 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11393 "01100001" // /* MW 7 */
+ 11394 "11100000" // /* MW 6 */
+ 11395 "00000001" // /* MW 5 */
+ 11396 "00000010" // /* MW 4 */
+ 11397 "01100000" // /* MW 3 */
+ 11398 "10010100" // /* MW 2 */
+ 11399 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11400 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11401 "01000001" // /* MW 7 */
+ 11402 "11100010" // /* MW 6 */
+ 11403 "00000000" // /* MW 5 */
+ 11404 "00000010" // /* MW 4 */
+ 11405 "01100000" // /* MW 3 */
+ 11406 "10000100" // /* MW 2 */
+ 11407 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11408 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11409 "01100001" // /* MW 7 */
+ 11410 "11100000" // /* MW 6 */
+ 11411 "00000001" // /* MW 5 */
+ 11412 "00000010" // /* MW 4 */
+ 11413 "01100000" // /* MW 3 */
+ 11414 "10010100" // /* MW 2 */
+ 11415 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11416 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11417 "01000001" // /* MW 7 */
+ 11418 "11100010" // /* MW 6 */
+ 11419 "00000000" // /* MW 5 */
+ 11420 "00000010" // /* MW 4 */
+ 11421 "01100000" // /* MW 3 */
+ 11422 "10000100" // /* MW 2 */
+ 11423 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11424 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11425 "01100001" // /* MW 7 */
+ 11426 "11100000" // /* MW 6 */
+ 11427 "00000001" // /* MW 5 */
+ 11428 "00000010" // /* MW 4 */
+ 11429 "01100000" // /* MW 3 */
+ 11430 "10010100" // /* MW 2 */
+ 11431 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11432 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11433 "01000001" // /* MW 7 */
+ 11434 "11100010" // /* MW 6 */
+ 11435 "00000000" // /* MW 5 */
+ 11436 "00000010" // /* MW 4 */
+ 11437 "01100000" // /* MW 3 */
+ 11438 "10000100" // /* MW 2 */
+ 11439 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11440 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11441 "01100001" // /* MW 7 */
+ 11442 "11100000" // /* MW 6 */
+ 11443 "00000001" // /* MW 5 */
+ 11444 "00000010" // /* MW 4 */
+ 11445 "01100000" // /* MW 3 */
+ 11446 "10010100" // /* MW 2 */
+ 11447 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11448 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11449 "00100011" // /* MW 3 */
+ 11450 "00011100" // /* MW 2 */
+ 11451 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 172 4 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11452 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11453 "00000000" // /* MW 5 */
+ 11454 "01010000" // /* MW 4 */
+ 11455 "01100000" // /* MW 3 */
+ 11456 "10010100" // /* MW 2 */
+ 11457 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11458 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11459 "00100011" // /* MW 3 */
+ 11460 "00011100" // /* MW 2 */
+ 11461 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11462 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11463 "10100011" // /* MW 3 */
+ 11464 "00011100" // /* MW 2 */
+ 11465 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 11466 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11467 "00100011" // /* MW 3 */
+ 11468 "00011100" // /* MW 2 */
+ 11469 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 11470 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11471 "10100011" // /* MW 3 */
+ 11472 "00011100" // /* MW 2 */
+ 11473 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0
+ 11475 "00000000" // /* MW 1 */
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.src_ref 7 "superkernels.cpp" 369 first
+.src_ref 7 "superkernels.cpp" 374 6
+.function_start
+ 11488 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11489 "10000000" // /* MW 5 */
+ 11490 "11001000" // /* MW 4 */
+ 11491 "11001000" // /* MW 3 */
+ 11492 "00000111" // /* MW 2 */
+ 11493 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+ 11494 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11495 "11000001" // /* MW 5 */
+ 11496 "10110101" // /* MW 4 */
+ 11497 "11011000" // /* MW 3 */
+ 11498 "11000010" // /* MW 2 */
+ 11499 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 369
+ 11500 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11501 "00000001" // /* MW 5 */
+ 11502 "00000000" // /* MW 4 */
+ 11503 "00000000" // /* MW 3 */
+ 11504 "00001000" // /* MW 2 */
+ 11505 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 22 first
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11506 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11507 "01111001" // /* MW 9 */
+ 11508 "01100000" // /* MW 8 */
+ 11509 "11001010" // /* MW 7 */
+ 11510 "10000001" // /* MW 6 */
+ 11511 "00010100" // /* MW 5 */
+ 11512 "00100011" // /* MW 4 */
+ 11513 "10110000" // /* MW 3 */
+ 11514 "00111010" // /* MW 2 */
+ 11515 "11111111" // /* MW 1 */
+ 11516 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11517 "01110000" // /* MW 7 */
+ 11518 "11010000" // /* MW 6 */
+ 11519 "00001011" // /* MW 5 */
+ 11520 "00000000" // /* MW 4 */
+ 11521 "10110000" // /* MW 3 */
+ 11522 "10000011" // /* MW 2 */
+ 11523 "11111101" // /* MW 1 */
+ 11524 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11525 "00010101" // /* MW 3 */
+ 11526 "11111100" // /* MW 2 */
+ 11527 "00001111" // /* MW 1 */
+ 11528 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11529 "00111101" // /* MW 3 */
+ 11530 "11110000" // /* MW 2 */
+ 11531 "00001111" // /* MW 1 */
+ 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11533 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+.src_ref 7 "superkernels.cpp" 374 16 first
+ 11534 "10000100" // JNZ r16, #11680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11680 delay_slots=5 */
+ 11535 "00000001" // /* MW 5 */
+ 11536 "01000000" // /* MW 4 */
+ 11537 "11010000" // /* MW 3 */
+ 11538 "00010110" // /* MW 2 */
+ 11539 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 30 first
+.delay_slot
+ 11540 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11541 "11111011" // /* MW 3 */
+ 11542 "01100011" // /* MW 2 */
+ 11543 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11544 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11545 "10100000" // /* MW 5 */
+ 11546 "11001000" // /* MW 4 */
+ 11547 "11000100" // /* MW 3 */
+ 11548 "00000111" // /* MW 2 */
+ 11549 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11550 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11551 "01110000" // /* MW 7 */
+ 11552 "01100000" // /* MW 6 */
+ 11553 "00110111" // /* MW 5 */
+ 11554 "00000001" // /* MW 4 */
+ 11555 "00110000" // /* MW 3 */
+ 11556 "11000110" // /* MW 2 */
+ 11557 "01000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 11558 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11559 "11000000" // /* MW 3 */
+ 11560 "11010110" // /* MW 2 */
+ 11561 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 7 "superkernels.cpp" 379 28
+.src_ref 7 "superkernels.cpp" 381 42
+.src_ref 7 "superkernels.cpp" 393 2
+.delay_slot
+ 11562 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11563 "00010001" // /* MW 9 */
+ 11564 "10100000" // /* MW 8 */
+ 11565 "10110010" // /* MW 7 */
+ 11566 "11110011" // /* MW 6 */
+ 11567 "00000001" // /* MW 5 */
+ 11568 "00000000" // /* MW 4 */
+ 11569 "10110000" // /* MW 3 */
+ 11570 "10100011" // /* MW 2 */
+ 11571 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11572 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11573 "00010001" // /* MW 9 */
+ 11574 "00110100" // /* MW 8 */
+ 11575 "00110010" // /* MW 7 */
+ 11576 "11110001" // /* MW 6 */
+ 11577 "00000001" // /* MW 5 */
+ 11578 "00000000" // /* MW 4 */
+ 11579 "01100000" // /* MW 3 */
+ 11580 "10010001" // /* MW 2 */
+ 11581 "00010011" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11582 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11583 "00010000" // /* MW 9 */
+ 11584 "00110010" // /* MW 8 */
+ 11585 "00110010" // /* MW 7 */
+ 11586 "11110001" // /* MW 6 */
+ 11587 "00000001" // /* MW 5 */
+ 11588 "00000000" // /* MW 4 */
+ 11589 "11100000" // /* MW 3 */
+ 11590 "11000000" // /* MW 2 */
+ 11591 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11593 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11594 "00000100" // JL #11040 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */
+ 11595 "00000001" // /* MW 5 */
+ 11596 "00000000" // /* MW 4 */
+ 11597 "10010000" // /* MW 3 */
+ 11598 "00010101" // /* MW 2 */
+ 11599 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11601 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11603 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11604 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11605 "00110001" // /* MW 3 */
+ 11606 "00100000" // /* MW 2 */
+ 11607 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 11608 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11609 "00000101" // /* MW 3 */
+ 11610 "00100000" // /* MW 2 */
+ 11611 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 11612 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11613 "00010001" // /* MW 3 */
+ 11614 "00000110" // /* MW 2 */
+ 11615 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 381 42 first
+.return_address
+ 11616 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11617 "00010000" // /* MW 9 */
+ 11618 "00101000" // /* MW 8 */
+ 11619 "10110010" // /* MW 7 */
+ 11620 "11110000" // /* MW 6 */
+ 11621 "00000001" // /* MW 5 */
+ 11622 "00000000" // /* MW 4 */
+ 11623 "11010000" // /* MW 3 */
+ 11624 "11000010" // /* MW 2 */
+ 11625 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 390 48
+ 11626 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11627 "00010000" // /* MW 9 */
+ 11628 "00101010" // /* MW 8 */
+ 11629 "10110010" // /* MW 7 */
+ 11630 "11110001" // /* MW 6 */
+ 11631 "00000001" // /* MW 5 */
+ 11632 "00000000" // /* MW 4 */
+ 11633 "11010000" // /* MW 3 */
+ 11634 "11000110" // /* MW 2 */
+ 11635 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 28 first
+.src_ref 7 "superkernels.cpp" 382 16
+.src_ref 7 "superkernels.cpp" 391 48
+ 11636 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11637 "00010000" // /* MW 9 */
+ 11638 "00101110" // /* MW 8 */
+ 11639 "10110010" // /* MW 7 */
+ 11640 "11110000" // /* MW 6 */
+ 11641 "00000001" // /* MW 5 */
+ 11642 "00000000" // /* MW 4 */
+ 11643 "01010000" // /* MW 3 */
+ 11644 "11001011" // /* MW 2 */
+ 11645 "11101010" // /* MW 1 */
+ 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11647 "00000000" // /* MW 1 */
+ 11648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11649 "00000000" // /* MW 1 */
+ 11650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11651 "00000000" // /* MW 1 */
+ 11652 "10000100" // J #11696 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11696 delay_slots=5 */
+ 11653 "00000000" // /* MW 5 */
+ 11654 "00000000" // /* MW 4 */
+ 11655 "11011000" // /* MW 3 */
+ 11656 "00010110" // /* MW 2 */
+ 11657 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13
+.delay_slot
+ 11658 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11659 "11000000" // /* MW 5 */
+ 11660 "11001000" // /* MW 4 */
+ 11661 "11000100" // /* MW 3 */
+ 11662 "00000111" // /* MW 2 */
+ 11663 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 27 first
+.delay_slot
+ 11664 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11665 "00001111" // /* MW 3 */
+ 11666 "01100001" // /* MW 2 */
+ 11667 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13 first
+.delay_slot
+ 11668 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11669 "01010001" // /* MW 3 */
+ 11670 "00000110" // /* MW 2 */
+ 11671 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16 first
+.delay_slot
+ 11672 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11673 "00010001" // /* MW 3 */
+ 11674 "00000110" // /* MW 2 */
+ 11675 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 382 16 first
+.delay_slot
+ 11676 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11677 "00010001" // /* MW 3 */
+ 11678 "00000110" // /* MW 2 */
+ 11679 "00001001" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192
+.src_ref 7 "superkernels.cpp" 390 48
+ 11680 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11681 "10101000" // /* MW 5 */
+ 11682 "11001000" // /* MW 4 */
+ 11683 "11000110" // /* MW 3 */
+ 11684 "00000111" // /* MW 2 */
+ 11685 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48
+ 11686 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11687 "00010000" // /* MW 9 */
+ 11688 "00101110" // /* MW 8 */
+ 11689 "10110010" // /* MW 7 */
+ 11690 "11110000" // /* MW 6 */
+ 11691 "00000001" // /* MW 5 */
+ 11692 "00000000" // /* MW 4 */
+ 11693 "11110000" // /* MW 3 */
+ 11694 "00101100" // /* MW 2 */
+ 11695 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 11696 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11697 "10000110" // /* MW 3 */
+ 11698 "01100111" // /* MW 2 */
+ 11699 "00011000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11700 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11701 "00010000" // /* MW 9 */
+ 11702 "00100000" // /* MW 8 */
+ 11703 "00110010" // /* MW 7 */
+ 11704 "11110001" // /* MW 6 */
+ 11705 "00000001" // /* MW 5 */
+ 11706 "00000000" // /* MW 4 */
+ 11707 "11010000" // /* MW 3 */
+ 11708 "11101110" // /* MW 2 */
+ 11709 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 11710 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11711 "00010110" // /* MW 3 */
+ 11712 "11111110" // /* MW 2 */
+ 11713 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 11714 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11715 "00110110" // /* MW 3 */
+ 11716 "11111110" // /* MW 2 */
+ 11717 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+ 11718 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11719 "01010110" // /* MW 3 */
+ 11720 "00000110" // /* MW 2 */
+ 11721 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 11722 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11723 "01110110" // /* MW 3 */
+ 11724 "01000110" // /* MW 2 */
+ 11725 "00000000" // /* MW 1 */
+ 11726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11727 "00000000" // /* MW 1 */
+ 11728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11729 "00000000" // /* MW 1 */
+ 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11731 "00000000" // /* MW 1 */
+ 11732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11733 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 11734 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11735 "00000010" // /* MW 3 */
+ 11736 "01100001" // /* MW 2 */
+ 11737 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+.src_ref 1 "io_buffer_main.h" 218 20
+ 11738 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11739 "00001110" // /* MW 5 */
+ 11740 "01000000" // /* MW 4 */
+ 11741 "00111001" // /* MW 3 */
+ 11742 "11000010" // /* MW 2 */
+ 11743 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+ 11744 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11745 "00010001" // /* MW 3 */
+ 11746 "00000110" // /* MW 2 */
+ 11747 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+.src_ref 1 "io_buffer_main.h" 395 8
+ 11748 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11749 "11111101" // /* MW 3 */
+ 11750 "11100000" // /* MW 2 */
+ 11751 "00010111" // /* MW 1 */
+ 11752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11753 "00000000" // /* MW 1 */
+ 11754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11755 "00000000" // /* MW 1 */
+ 11756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11757 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 11758 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11759 "00001000" // /* MW 3 */
+ 11760 "11010011" // /* MW 2 */
+ 11761 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 11762 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11763 "00000110" // /* MW 3 */
+ 11764 "01100111" // /* MW 2 */
+ 11765 "00011010" // /* MW 1 */
+ 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11767 "00000000" // /* MW 1 */
+ 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11769 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11770 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11771 "01110110" // /* MW 3 */
+ 11772 "11111111" // /* MW 2 */
+ 11773 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 11774 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11775 "00110110" // /* MW 3 */
+ 11776 "11111110" // /* MW 2 */
+ 11777 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 11778 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11779 "01010110" // /* MW 3 */
+ 11780 "11111110" // /* MW 2 */
+ 11781 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 47 first
+ 11782 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11783 "01110110" // /* MW 3 */
+ 11784 "01010110" // /* MW 2 */
+ 11785 "00000010" // /* MW 1 */
+ 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11787 "00000000" // /* MW 1 */
+ 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11789 "00000000" // /* MW 1 */
+ 11790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11791 "00000000" // /* MW 1 */
+ 11792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11793 "00000000" // /* MW 1 */
+ 11794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11795 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 11796 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11797 "00010010" // /* MW 3 */
+ 11798 "10100011" // /* MW 2 */
+ 11799 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 11800 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11801 "00110001" // /* MW 3 */
+ 11802 "00000110" // /* MW 2 */
+ 11803 "00001010" // /* MW 1 */
+ 11804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11805 "00000000" // /* MW 1 */
+ 11806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11807 "00000000" // /* MW 1 */
+ 11808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11809 "00000000" // /* MW 1 */
+ 11810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 11812 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11813 "00001000" // /* MW 3 */
+ 11814 "11010011" // /* MW 2 */
+ 11815 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46
+.src_ref 7 "superkernels.cpp" 391 46
+.src_ref 1 "io_buffer_main.h" 324 32
+ 11816 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11817 "01111001" // /* MW 9 */
+ 11818 "01100000" // /* MW 8 */
+ 11819 "11001110" // /* MW 7 */
+ 11820 "00101001" // /* MW 6 */
+ 11821 "00000000" // /* MW 5 */
+ 11822 "00000001" // /* MW 4 */
+ 11823 "01100000" // /* MW 3 */
+ 11824 "00010001" // /* MW 2 */
+ 11825 "11010001" // /* MW 1 */
+ 11826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11827 "00000000" // /* MW 1 */
+ 11828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11829 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+ 11830 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11831 "00011001" // /* MW 3 */
+ 11832 "11101110" // /* MW 2 */
+ 11833 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 48 first
+ 11834 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11835 "00111011" // /* MW 5 */
+ 11836 "11011000" // /* MW 4 */
+ 11837 "11011111" // /* MW 3 */
+ 11838 "11000110" // /* MW 2 */
+ 11839 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48 first
+.src_ref 7 "superkernels.cpp" 393 2
+ 11840 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11841 "10000001" // /* MW 5 */
+ 11842 "11011101" // /* MW 4 */
+ 11843 "11010110" // /* MW 3 */
+ 11844 "11010010" // /* MW 2 */
+ 11845 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 11846 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11847 "01010110" // /* MW 3 */
+ 11848 "01001110" // /* MW 2 */
+ 11849 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 11850 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11851 "00011110" // /* MW 3 */
+ 11852 "01011101" // /* MW 2 */
+ 11853 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11854 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11855 "11000000" // /* MW 3 */
+ 11856 "01100000" // /* MW 2 */
+ 11857 "00011111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11859 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11860 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11861 "01110110" // /* MW 3 */
+ 11862 "00000110" // /* MW 2 */
+ 11863 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11865 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 393 2 first
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11866 "00000100" // JL #11184 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11184 delay_slots=5 */
+ 11867 "00000001" // /* MW 5 */
+ 11868 "00000000" // /* MW 4 */
+ 11869 "11011000" // /* MW 3 */
+ 11870 "00010101" // /* MW 2 */
+ 11871 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11872 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11873 "11000000" // /* MW 3 */
+ 11874 "11010100" // /* MW 2 */
+ 11875 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 11876 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11877 "00001101" // /* MW 3 */
+ 11878 "01100011" // /* MW 2 */
+ 11879 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46 first
+.delay_slot
+ 11880 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11881 "00001101" // /* MW 3 */
+ 11882 "00100001" // /* MW 2 */
+ 11883 "00010101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46
+.delay_slot
+ 11884 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11885 "01000001" // /* MW 3 */
+ 11886 "01101001" // /* MW 2 */
+ 11887 "00011001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11889 "00000000" // /* MW 15 */
+ 11890 "00000000" // /* MW 14 */
+ 11891 "10101000" // /* MW 13 */
+ 11892 "11100010" // /* MW 12 */
+ 11893 "00110100" // /* MW 11 */
+ 11894 "00000000" // /* MW 10 */
+ 11895 "00000000" // /* MW 9 */
+ 11896 "00000000" // /* MW 8 */
+ 11897 "01011011" // /* MW 7 */
+ 11898 "00000001" // /* MW 6 */
+ 11899 "00100000" // /* MW 5 */
+ 11900 "00000000" // /* MW 4 */
+ 11901 "11110000" // /* MW 3 */
+ 11902 "00101100" // /* MW 2 */
+ 11903 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 32 first
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 40
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.return_address
+ 11904 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11905 "01111000" // /* MW 9 */
+ 11906 "11010000" // /* MW 8 */
+ 11907 "10110011" // /* MW 7 */
+ 11908 "00101000" // /* MW 6 */
+ 11909 "00000000" // /* MW 5 */
+ 11910 "00000001" // /* MW 4 */
+ 11911 "11010000" // /* MW 3 */
+ 11912 "11000110" // /* MW 2 */
+ 11913 "11001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19
+ 11914 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11915 "11000000" // /* MW 5 */
+ 11916 "11001000" // /* MW 4 */
+ 11917 "11001100" // /* MW 3 */
+ 11918 "00000111" // /* MW 2 */
+ 11919 "00000000" // /* MW 1 */
+ 11920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11921 "00000000" // /* MW 1 */
+ 11922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11923 "00000000" // /* MW 1 */
+ 11924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11925 "00000000" // /* MW 1 */
+ 11926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11927 "00000000" // /* MW 1 */
+ 11928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11929 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 11930 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11931 "00001000" // /* MW 3 */
+ 11932 "01010001" // /* MW 2 */
+ 11933 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 11934 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11935 "00110110" // /* MW 3 */
+ 11936 "11110110" // /* MW 2 */
+ 11937 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 11938 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11939 "00011001" // /* MW 3 */
+ 11940 "11101101" // /* MW 2 */
+ 11941 "00000111" // /* MW 1 */
+ 11942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11943 "00000000" // /* MW 1 */
+ 11944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11945 "00000000" // /* MW 1 */
+ 11946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11947 "00000000" // /* MW 1 */
+ 11948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11949 "00000000" // /* MW 1 */
+ 11950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11951 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 11952 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11953 "00010001" // /* MW 3 */
+ 11954 "00100011" // /* MW 2 */
+ 11955 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 28
+ 11956 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11957 "01100011" // /* MW 5 */
+ 11958 "11101100" // /* MW 4 */
+ 11959 "11010011" // /* MW 3 */
+ 11960 "11000110" // /* MW 2 */
+ 11961 "01001010" // /* MW 1 */
+ 11962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11963 "00000000" // /* MW 1 */
+ 11964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11965 "00000000" // /* MW 1 */
+ 11966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11967 "00000000" // /* MW 1 */
+ 11968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11969 "00000000" // /* MW 1 */
+ 11970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11971 "00000000" // /* MW 1 */
+ 11972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11973 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 11974 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11975 "00001000" // /* MW 3 */
+ 11976 "01010001" // /* MW 2 */
+ 11977 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+.src_ref 7 "superkernels.cpp" 398 14
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 11978 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11979 "00010000" // /* MW 9 */
+ 11980 "00100000" // /* MW 8 */
+ 11981 "10110010" // /* MW 7 */
+ 11982 "11110000" // /* MW 6 */
+ 11983 "00000001" // /* MW 5 */
+ 11984 "00000000" // /* MW 4 */
+ 11985 "11010000" // /* MW 3 */
+ 11986 "11001110" // /* MW 2 */
+ 11987 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19 first
+ 11988 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11989 "01010110" // /* MW 3 */
+ 11990 "00000110" // /* MW 2 */
+ 11991 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 11992 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11993 "00110110" // /* MW 3 */
+ 11994 "00000110" // /* MW 2 */
+ 11995 "00000001" // /* MW 1 */
+ 11996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11997 "00000000" // /* MW 1 */
+ 11998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11999 "00000000" // /* MW 1 */
+ 12000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12001 "00000000" // /* MW 1 */
+ 12002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12003 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 12004 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12005 "00110001" // /* MW 3 */
+ 12006 "00100001" // /* MW 2 */
+ 12007 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 12008 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12009 "00010001" // /* MW 3 */
+ 12010 "11100110" // /* MW 2 */
+ 12011 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 16 first
+ 12012 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12013 "00101000" // /* MW 3 */
+ 12014 "01100001" // /* MW 2 */
+ 12015 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 12016 "10000100" // JNZ r16, #12048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12048 delay_slots=5 */
+ 12017 "00000001" // /* MW 5 */
+ 12018 "01000000" // /* MW 4 */
+ 12019 "10001000" // /* MW 3 */
+ 12020 "00010111" // /* MW 2 */
+ 12021 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12023 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12025 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12027 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12029 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12031 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14
+ 12032 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12033 "00000001" // /* MW 3 */
+ 12034 "00100000" // /* MW 2 */
+ 12035 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14 first
+ 12036 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12037 "11000001" // /* MW 11 */
+ 12038 "00001000" // /* MW 10 */
+ 12039 "10000011" // /* MW 9 */
+ 12040 "00000000" // /* MW 8 */
+ 12041 "00000000" // /* MW 7 */
+ 12042 "00000000" // /* MW 6 */
+ 12043 "00100000" // /* MW 5 */
+ 12044 "00000000" // /* MW 4 */
+ 12045 "11110000" // /* MW 3 */
+ 12046 "00101100" // /* MW 2 */
+ 12047 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560
+.src_ref 7 "superkernels.cpp" 400
+ 12048 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12049 "00111001" // /* MW 3 */
+ 12050 "11110000" // /* MW 2 */
+ 12051 "00000111" // /* MW 1 */
+ 12052 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12053 "11110001" // /* MW 3 */
+ 12054 "11111101" // /* MW 2 */
+ 12055 "00000111" // /* MW 1 */
+ 12056 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12057 "10011001" // /* MW 3 */
+ 12058 "11110111" // /* MW 2 */
+ 12059 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12061 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 12062 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12063 "11010001" // /* MW 3 */
+ 12064 "11111001" // /* MW 2 */
+ 12065 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12067 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12069 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12070 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12071 "00000000" // /* MW 3 */
+ 12072 "00101000" // /* MW 2 */
+ 12073 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12074 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12075 "00001011" // /* MW 3 */
+ 12076 "10001110" // /* MW 2 */
+ 12077 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400
+.delay_slot
+ 12078 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12079 "00000001" // /* MW 5 */
+ 12080 "00000000" // /* MW 4 */
+ 12081 "00000000" // /* MW 3 */
+ 12082 "11111000" // /* MW 2 */
+ 12083 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12085 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12087 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0
+ 12089 "00000000" // /* MW 1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0
+.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.src_ref 2 "conv2d_dw_bf16_params.h" 211 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.function_start
+ 12096 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12097 "00010000" // /* MW 9 */
+ 12098 "11000000" // /* MW 8 */
+ 12099 "10110011" // /* MW 7 */
+ 12100 "11110000" // /* MW 6 */
+ 12101 "00000001" // /* MW 5 */
+ 12102 "00000000" // /* MW 4 */
+ 12103 "11010000" // /* MW 3 */
+ 12104 "10000101" // /* MW 2 */
+ 12105 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12106 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12107 "01011000" // /* MW 9 */
+ 12108 "00000000" // /* MW 8 */
+ 12109 "00001000" // /* MW 7 */
+ 12110 "01001011" // /* MW 6 */
+ 12111 "00000000" // /* MW 5 */
+ 12112 "00000001" // /* MW 4 */
+ 12113 "11010000" // /* MW 3 */
+ 12114 "10000001" // /* MW 2 */
+ 12115 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 211
+ 12116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12117 "00000001" // /* MW 5 */
+ 12118 "00000000" // /* MW 4 */
+ 12119 "00000000" // /* MW 3 */
+ 12120 "00001000" // /* MW 2 */
+ 12121 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32
+ 12122 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12123 "00010001" // /* MW 9 */
+ 12124 "11000000" // /* MW 8 */
+ 12125 "10110011" // /* MW 7 */
+ 12126 "11110011" // /* MW 6 */
+ 12127 "00000001" // /* MW 5 */
+ 12128 "00000000" // /* MW 4 */
+ 12129 "10110000" // /* MW 3 */
+ 12130 "11110011" // /* MW 2 */
+ 12131 "11111110" // /* MW 1 */
+ 12132 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12133 "00111101" // /* MW 3 */
+ 12134 "11111100" // /* MW 2 */
+ 12135 "00001111" // /* MW 1 */
+ 12136 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12137 "11110101" // /* MW 3 */
+ 12138 "11111001" // /* MW 2 */
+ 12139 "00001111" // /* MW 1 */
+ 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12141 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12142 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12143 "00101001" // /* MW 3 */
+ 12144 "00011100" // /* MW 2 */
+ 12145 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12146 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12147 "00001001" // /* MW 3 */
+ 12148 "00011100" // /* MW 2 */
+ 12149 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12150 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12151 "00101110" // /* MW 3 */
+ 12152 "00000100" // /* MW 2 */
+ 12153 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12154 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12155 "00001110" // /* MW 3 */
+ 12156 "00010100" // /* MW 2 */
+ 12157 "00000000" // /* MW 1 */
+ 12158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12159 "00000000" // /* MW 1 */
+ 12160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12161 "00000000" // /* MW 1 */
+ 12162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12163 "00000000" // /* MW 1 */
+ 12164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12165 "00000000" // /* MW 1 */
+ 12166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12167 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12168 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12169 "00101001" // /* MW 3 */
+ 12170 "00000100" // /* MW 2 */
+ 12171 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12172 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12173 "00001001" // /* MW 3 */
+ 12174 "00010100" // /* MW 2 */
+ 12175 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first
+ 12176 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12177 "00101010" // /* MW 3 */
+ 12178 "01011110" // /* MW 2 */
+ 12179 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 52
+ 12180 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12181 "01001010" // /* MW 3 */
+ 12182 "11101110" // /* MW 2 */
+ 12183 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12184 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12185 "00101010" // /* MW 3 */
+ 12186 "11101100" // /* MW 2 */
+ 12187 "00000111" // /* MW 1 */
+ 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12189 "00000000" // /* MW 1 */
+ 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12191 "00000000" // /* MW 1 */
+ 12192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12193 "00000000" // /* MW 1 */
+ 12194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12195 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.no_stack_arguments
+ 12196 "00000100" // JL #14224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=14224 delay_slots=5 */
+ 12197 "00000001" // /* MW 5 */
+ 12198 "00000000" // /* MW 4 */
+ 12199 "11001000" // /* MW 3 */
+ 12200 "00011011" // /* MW 2 */
+ 12201 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 38
+.delay_slot
+ 12202 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12203 "01000011" // /* MW 5 */
+ 12204 "10111110" // /* MW 4 */
+ 12205 "10111000" // /* MW 3 */
+ 12206 "11001010" // /* MW 2 */
+ 12207 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+.delay_slot
+ 12208 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12209 "00010001" // /* MW 5 */
+ 12210 "11000010" // /* MW 4 */
+ 12211 "10110000" // /* MW 3 */
+ 12212 "10000110" // /* MW 2 */
+ 12213 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12214 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12215 "00010101" // /* MW 5 */
+ 12216 "11101111" // /* MW 4 */
+ 12217 "10110111" // /* MW 3 */
+ 12218 "01000010" // /* MW 2 */
+ 12219 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12220 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12221 "11110001" // /* MW 3 */
+ 12222 "00100010" // /* MW 2 */
+ 12223 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12224 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12225 "00000000" // /* MW 15 */
+ 12226 "00000000" // /* MW 14 */
+ 12227 "01111000" // /* MW 13 */
+ 12228 "10100101" // /* MW 12 */
+ 12229 "00000001" // /* MW 11 */
+ 12230 "10010000" // /* MW 10 */
+ 12231 "00001000" // /* MW 9 */
+ 12232 "00011110" // /* MW 8 */
+ 12233 "01011011" // /* MW 7 */
+ 12234 "00000001" // /* MW 6 */
+ 12235 "00100000" // /* MW 5 */
+ 12236 "00000000" // /* MW 4 */
+ 12237 "11110000" // /* MW 3 */
+ 12238 "00101100" // /* MW 2 */
+ 12239 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.return_address
+ 12240 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12241 "00000010" // /* MW 5 */
+ 12242 "01000000" // /* MW 4 */
+ 12243 "00100000" // /* MW 3 */
+ 12244 "11010010" // /* MW 2 */
+ 12245 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first
+ 12246 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12247 "01000011" // /* MW 5 */
+ 12248 "01001000" // /* MW 4 */
+ 12249 "01011000" // /* MW 3 */
+ 12250 "11000101" // /* MW 2 */
+ 12251 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 52
+ 12252 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12253 "01101010" // /* MW 3 */
+ 12254 "11101110" // /* MW 2 */
+ 12255 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12256 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12257 "00110001" // /* MW 3 */
+ 12258 "11101100" // /* MW 2 */
+ 12259 "00000111" // /* MW 1 */
+ 12260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12261 "00000000" // /* MW 1 */
+ 12262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12263 "00000000" // /* MW 1 */
+ 12264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12265 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+ 12266 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12267 "01000110" // /* MW 3 */
+ 12268 "11101001" // /* MW 2 */
+ 12269 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+ 12270 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12271 "00001010" // /* MW 3 */
+ 12272 "00110111" // /* MW 2 */
+ 12273 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first
+ 12274 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12275 "01100011" // /* MW 5 */
+ 12276 "11000110" // /* MW 4 */
+ 12277 "10111000" // /* MW 3 */
+ 12278 "01001110" // /* MW 2 */
+ 12279 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.no_stack_arguments
+ 12280 "00111010" // ST r17, [sp, #-32]; JL #14224 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=14224 delay_slots=5 */
+ 12281 "01000001" // /* MW 9 */
+ 12282 "00000000" // /* MW 8 */
+ 12283 "00000000" // /* MW 7 */
+ 12284 "11110010" // /* MW 6 */
+ 12285 "00000110" // /* MW 5 */
+ 12286 "00000000" // /* MW 4 */
+ 12287 "10110000" // /* MW 3 */
+ 12288 "01000110" // /* MW 2 */
+ 12289 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12290 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12291 "00100010" // /* MW 3 */
+ 12292 "10101001" // /* MW 2 */
+ 12293 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12294 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12295 "00001010" // /* MW 3 */
+ 12296 "01110111" // /* MW 2 */
+ 12297 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.delay_slot
+ 12298 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12299 "00010001" // /* MW 3 */
+ 12300 "00100101" // /* MW 2 */
+ 12301 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12302 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12303 "01110000" // /* MW 3 */
+ 12304 "00100110" // /* MW 2 */
+ 12305 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 87
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12306 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12307 "01100000" // /* MW 13 */
+ 12308 "00101011" // /* MW 12 */
+ 12309 "00000000" // /* MW 11 */
+ 12310 "00001001" // /* MW 10 */
+ 12311 "10011000" // /* MW 9 */
+ 12312 "00111101" // /* MW 8 */
+ 12313 "00100010" // /* MW 7 */
+ 12314 "01000001" // /* MW 6 */
+ 12315 "00100100" // /* MW 5 */
+ 12316 "00000000" // /* MW 4 */
+ 12317 "11110000" // /* MW 3 */
+ 12318 "00101100" // /* MW 2 */
+ 12319 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+.return_address
+ 12320 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12321 "01011000" // /* MW 9 */
+ 12322 "01000010" // /* MW 8 */
+ 12323 "00000000" // /* MW 7 */
+ 12324 "11001000" // /* MW 6 */
+ 12325 "00110111" // /* MW 5 */
+ 12326 "00111111" // /* MW 4 */
+ 12327 "00100000" // /* MW 3 */
+ 12328 "00001110" // /* MW 2 */
+ 12329 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12330 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12331 "01011000" // /* MW 9 */
+ 12332 "11111100" // /* MW 8 */
+ 12333 "00101001" // /* MW 7 */
+ 12334 "00001000" // /* MW 6 */
+ 12335 "10000000" // /* MW 5 */
+ 12336 "00000001" // /* MW 4 */
+ 12337 "00100000" // /* MW 3 */
+ 12338 "11000010" // /* MW 2 */
+ 12339 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53
+ 12340 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12341 "01011000" // /* MW 9 */
+ 12342 "00000010" // /* MW 8 */
+ 12343 "10001000" // /* MW 7 */
+ 12344 "10001000" // /* MW 6 */
+ 12345 "01100000" // /* MW 5 */
+ 12346 "00000000" // /* MW 4 */
+ 12347 "00100000" // /* MW 3 */
+ 12348 "11011010" // /* MW 2 */
+ 12349 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+ 12350 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12351 "01011000" // /* MW 9 */
+ 12352 "00010111" // /* MW 8 */
+ 12353 "10001000" // /* MW 7 */
+ 12354 "00001011" // /* MW 6 */
+ 12355 "01010001" // /* MW 5 */
+ 12356 "00000000" // /* MW 4 */
+ 12357 "01010000" // /* MW 3 */
+ 12358 "01000101" // /* MW 2 */
+ 12359 "11100001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76
+ 12360 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12361 "01011000" // /* MW 9 */
+ 12362 "00100000" // /* MW 8 */
+ 12363 "10000000" // /* MW 7 */
+ 12364 "01001000" // /* MW 6 */
+ 12365 "00100111" // /* MW 5 */
+ 12366 "00111111" // /* MW 4 */
+ 12367 "00100000" // /* MW 3 */
+ 12368 "01010110" // /* MW 2 */
+ 12369 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12370 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12371 "01011000" // /* MW 9 */
+ 12372 "00000001" // /* MW 8 */
+ 12373 "01001000" // /* MW 7 */
+ 12374 "11001011" // /* MW 6 */
+ 12375 "01110000" // /* MW 5 */
+ 12376 "00000001" // /* MW 4 */
+ 12377 "00100000" // /* MW 3 */
+ 12378 "01111010" // /* MW 2 */
+ 12379 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41
+ 12380 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12381 "01011000" // /* MW 9 */
+ 12382 "11000000" // /* MW 8 */
+ 12383 "11101111" // /* MW 7 */
+ 12384 "00001011" // /* MW 6 */
+ 12385 "11010000" // /* MW 5 */
+ 12386 "00000101" // /* MW 4 */
+ 12387 "10000000" // /* MW 3 */
+ 12388 "11000000" // /* MW 2 */
+ 12389 "11101001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12390 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12391 "00100001" // /* MW 3 */
+ 12392 "00101000" // /* MW 2 */
+ 12393 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12394 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12395 "00000110" // /* MW 3 */
+ 12396 "11000111" // /* MW 2 */
+ 12397 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+ 12398 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12399 "00000010" // /* MW 5 */
+ 12400 "00110110" // /* MW 4 */
+ 12401 "01010000" // /* MW 3 */
+ 12402 "11110001" // /* MW 2 */
+ 12403 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69
+ 12404 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12405 "11110101" // /* MW 5 */
+ 12406 "00111111" // /* MW 4 */
+ 12407 "01001011" // /* MW 3 */
+ 12408 "00101000" // /* MW 2 */
+ 12409 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12410 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12411 "00011101" // /* MW 5 */
+ 12412 "00100000" // /* MW 4 */
+ 12413 "11110001" // /* MW 3 */
+ 12414 "11100001" // /* MW 2 */
+ 12415 "01111000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12416 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12417 "01110000" // /* MW 3 */
+ 12418 "00101000" // /* MW 2 */
+ 12419 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+ 12420 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12421 "00000001" // /* MW 5 */
+ 12422 "10100000" // /* MW 4 */
+ 12423 "10010000" // /* MW 3 */
+ 12424 "00000000" // /* MW 2 */
+ 12425 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first
+ 12426 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12427 "00000001" // /* MW 5 */
+ 12428 "10110100" // /* MW 4 */
+ 12429 "10111101" // /* MW 3 */
+ 12430 "11100111" // /* MW 2 */
+ 12431 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first
+ 12432 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12433 "00000010" // /* MW 5 */
+ 12434 "10100011" // /* MW 4 */
+ 12435 "10110000" // /* MW 3 */
+ 12436 "00001101" // /* MW 2 */
+ 12437 "01111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 70
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first
+ 12438 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12439 "11111111" // /* MW 5 */
+ 12440 "00110101" // /* MW 4 */
+ 12441 "10110000" // /* MW 3 */
+ 12442 "11001101" // /* MW 2 */
+ 12443 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first
+ 12444 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12445 "00001111" // /* MW 3 */
+ 12446 "11001101" // /* MW 2 */
+ 12447 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first
+ 12448 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12449 "00011111" // /* MW 3 */
+ 12450 "11011111" // /* MW 2 */
+ 12451 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 79
+ 12452 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12453 "11111111" // /* MW 5 */
+ 12454 "10110011" // /* MW 4 */
+ 12455 "11111001" // /* MW 3 */
+ 12456 "01101011" // /* MW 2 */
+ 12457 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first
+ 12458 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12459 "00000111" // /* MW 3 */
+ 12460 "00110111" // /* MW 2 */
+ 12461 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first
+ 12462 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12463 "11011111" // /* MW 5 */
+ 12464 "10010000" // /* MW 4 */
+ 12465 "00110111" // /* MW 3 */
+ 12466 "11010110" // /* MW 2 */
+ 12467 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+ 12468 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12469 "01010010" // /* MW 3 */
+ 12470 "00111000" // /* MW 2 */
+ 12471 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first
+ 12472 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12473 "00101101" // /* MW 3 */
+ 12474 "00100101" // /* MW 2 */
+ 12475 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+ 12476 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12477 "00111111" // /* MW 5 */
+ 12478 "11001000" // /* MW 4 */
+ 12479 "00111000" // /* MW 3 */
+ 12480 "01001010" // /* MW 2 */
+ 12481 "11100101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first
+ 12482 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12483 "11111011" // /* MW 5 */
+ 12484 "01110010" // /* MW 4 */
+ 12485 "00111111" // /* MW 3 */
+ 12486 "11110010" // /* MW 2 */
+ 12487 "11111001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 47
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first
+ 12488 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12489 "00011111" // /* MW 5 */
+ 12490 "01110000" // /* MW 4 */
+ 12491 "00111001" // /* MW 3 */
+ 12492 "11110010" // /* MW 2 */
+ 12493 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first
+ 12494 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12495 "11111011" // /* MW 5 */
+ 12496 "11001110" // /* MW 4 */
+ 12497 "00111001" // /* MW 3 */
+ 12498 "11001110" // /* MW 2 */
+ 12499 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first
+ 12500 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12501 "11101010" // /* MW 5 */
+ 12502 "10110011" // /* MW 4 */
+ 12503 "10111001" // /* MW 3 */
+ 12504 "00110101" // /* MW 2 */
+ 12505 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first
+ 12506 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12507 "01011011" // /* MW 5 */
+ 12508 "01111011" // /* MW 4 */
+ 12509 "00111001" // /* MW 3 */
+ 12510 "11111110" // /* MW 2 */
+ 12511 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12512 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12513 "11100010" // /* MW 5 */
+ 12514 "00110011" // /* MW 4 */
+ 12515 "11111001" // /* MW 3 */
+ 12516 "00100001" // /* MW 2 */
+ 12517 "10010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first
+ 12518 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12519 "00000100" // /* MW 5 */
+ 12520 "11110011" // /* MW 4 */
+ 12521 "00111111" // /* MW 3 */
+ 12522 "10000010" // /* MW 2 */
+ 12523 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first
+ 12524 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12525 "01101101" // /* MW 3 */
+ 12526 "11111111" // /* MW 2 */
+ 12527 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 73
+ 12528 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12529 "11111111" // /* MW 5 */
+ 12530 "10111111" // /* MW 4 */
+ 12531 "00111001" // /* MW 3 */
+ 12532 "01100110" // /* MW 2 */
+ 12533 "11110000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+ 12534 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12535 "11011011" // /* MW 5 */
+ 12536 "11000110" // /* MW 4 */
+ 12537 "00111000" // /* MW 3 */
+ 12538 "10000110" // /* MW 2 */
+ 12539 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first
+ 12540 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12541 "11111111" // /* MW 5 */
+ 12542 "00110001" // /* MW 4 */
+ 12543 "00111001" // /* MW 3 */
+ 12544 "10100100" // /* MW 2 */
+ 12545 "11000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12546 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12547 "11000011" // /* MW 5 */
+ 12548 "11011011" // /* MW 4 */
+ 12549 "00110011" // /* MW 3 */
+ 12550 "11011010" // /* MW 2 */
+ 12551 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12552 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12553 "01011011" // /* MW 5 */
+ 12554 "01000011" // /* MW 4 */
+ 12555 "00111000" // /* MW 3 */
+ 12556 "11001010" // /* MW 2 */
+ 12557 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12558 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12559 "01011011" // /* MW 5 */
+ 12560 "11111100" // /* MW 4 */
+ 12561 "00111001" // /* MW 3 */
+ 12562 "10011110" // /* MW 2 */
+ 12563 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12564 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12565 "11000001" // /* MW 5 */
+ 12566 "11011010" // /* MW 4 */
+ 12567 "00111110" // /* MW 3 */
+ 12568 "11001110" // /* MW 2 */
+ 12569 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+ 12570 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12571 "11110010" // /* MW 5 */
+ 12572 "10111111" // /* MW 4 */
+ 12573 "00011110" // /* MW 3 */
+ 12574 "00100000" // /* MW 2 */
+ 12575 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12576 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12577 "10100011" // /* MW 5 */
+ 12578 "01000011" // /* MW 4 */
+ 12579 "00111000" // /* MW 3 */
+ 12580 "11011010" // /* MW 2 */
+ 12581 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140
+ 12582 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12583 "01011001" // /* MW 9 */
+ 12584 "11111111" // /* MW 8 */
+ 12585 "00001111" // /* MW 7 */
+ 12586 "01101110" // /* MW 6 */
+ 12587 "01101101" // /* MW 5 */
+ 12588 "00011111" // /* MW 4 */
+ 12589 "00110000" // /* MW 3 */
+ 12590 "11000010" // /* MW 2 */
+ 12591 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first
+ 12592 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12593 "10000001" // /* MW 5 */
+ 12594 "01101010" // /* MW 4 */
+ 12595 "00111110" // /* MW 3 */
+ 12596 "11001010" // /* MW 2 */
+ 12597 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+ 12598 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12599 "11000011" // /* MW 5 */
+ 12600 "01010010" // /* MW 4 */
+ 12601 "00111010" // /* MW 3 */
+ 12602 "11101010" // /* MW 2 */
+ 12603 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41
+ 12604 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12605 "00001000" // /* MW 11 */
+ 12606 "00010000" // /* MW 10 */
+ 12607 "01101101" // /* MW 9 */
+ 12608 "10110010" // /* MW 8 */
+ 12609 "00001000" // /* MW 7 */
+ 12610 "10101011" // /* MW 6 */
+ 12611 "01110001" // /* MW 5 */
+ 12612 "00011110" // /* MW 4 */
+ 12613 "00000111" // /* MW 3 */
+ 12614 "00010001" // /* MW 2 */
+ 12615 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first
+ 12616 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12617 "01110001" // /* MW 3 */
+ 12618 "00011110" // /* MW 2 */
+ 12619 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first
+ 12620 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12621 "11111011" // /* MW 5 */
+ 12622 "01010010" // /* MW 4 */
+ 12623 "00111000" // /* MW 3 */
+ 12624 "11000110" // /* MW 2 */
+ 12625 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+ 12626 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12627 "10000011" // /* MW 5 */
+ 12628 "01000010" // /* MW 4 */
+ 12629 "00111100" // /* MW 3 */
+ 12630 "11000010" // /* MW 2 */
+ 12631 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first
+ 12632 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12633 "11111011" // /* MW 5 */
+ 12634 "01010010" // /* MW 4 */
+ 12635 "00111001" // /* MW 3 */
+ 12636 "11000110" // /* MW 2 */
+ 12637 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12638 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12639 "10000011" // /* MW 5 */
+ 12640 "01000010" // /* MW 4 */
+ 12641 "00111100" // /* MW 3 */
+ 12642 "11000010" // /* MW 2 */
+ 12643 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first
+ 12644 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12645 "01010001" // /* MW 3 */
+ 12646 "00011110" // /* MW 2 */
+ 12647 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first
+ 12648 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12649 "00110001" // /* MW 3 */
+ 12650 "00011110" // /* MW 2 */
+ 12651 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first
+ 12652 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12653 "00010001" // /* MW 3 */
+ 12654 "00001010" // /* MW 2 */
+ 12655 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first
+ 12656 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12657 "00001010" // /* MW 3 */
+ 12658 "00000110" // /* MW 2 */
+ 12659 "00000111" // /* MW 1 */
+ 12660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12661 "00000000" // /* MW 1 */
+ 12662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12663 "00000000" // /* MW 1 */
+ 12664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12665 "00000000" // /* MW 1 */
+ 12666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12667 "00000000" // /* MW 1 */
+ 12668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12669 "00000000" // /* MW 1 */
+ 12670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12671 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 58
+ 12672 "10000100" // JZ r16, #12704 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12704 delay_slots=5 */
+ 12673 "00000001" // /* MW 5 */
+ 12674 "00000000" // /* MW 4 */
+ 12675 "11010000" // /* MW 3 */
+ 12676 "00011000" // /* MW 2 */
+ 12677 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12678 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12679 "01100000" // /* MW 3 */
+ 12680 "00111011" // /* MW 2 */
+ 12681 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12682 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12683 "00000000" // /* MW 5 */
+ 12684 "10100000" // /* MW 4 */
+ 12685 "00001001" // /* MW 3 */
+ 12686 "01111111" // /* MW 2 */
+ 12687 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12689 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12691 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12693 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12694 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12695 "00000001" // /* MW 9 */
+ 12696 "00100110" // /* MW 8 */
+ 12697 "00000000" // /* MW 7 */
+ 12698 "00000000" // /* MW 6 */
+ 12699 "01011011" // /* MW 5 */
+ 12700 "00000001" // /* MW 4 */
+ 12701 "11110000" // /* MW 3 */
+ 12702 "00101100" // /* MW 2 */
+ 12703 "00000000" // /* MW 1 */
+.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267
+ 12704 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12705 "00010000" // /* MW 9 */
+ 12706 "00110100" // /* MW 8 */
+ 12707 "00110010" // /* MW 7 */
+ 12708 "11110000" // /* MW 6 */
+ 12709 "00000001" // /* MW 5 */
+ 12710 "00000000" // /* MW 4 */
+ 12711 "00100000" // /* MW 3 */
+ 12712 "10000111" // /* MW 2 */
+ 12713 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12714 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12715 "11100010" // /* MW 5 */
+ 12716 "00000100" // /* MW 4 */
+ 12717 "01010000" // /* MW 3 */
+ 12718 "11000000" // /* MW 2 */
+ 12719 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39
+ 12720 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12721 "11101001" // /* MW 5 */
+ 12722 "00000010" // /* MW 4 */
+ 12723 "00100001" // /* MW 3 */
+ 12724 "10000011" // /* MW 2 */
+ 12725 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12726 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12727 "00100101" // /* MW 5 */
+ 12728 "00000001" // /* MW 4 */
+ 12729 "00100000" // /* MW 3 */
+ 12730 "00111110" // /* MW 2 */
+ 12731 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+ 12732 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12733 "00000001" // /* MW 5 */
+ 12734 "00000000" // /* MW 4 */
+ 12735 "00000000" // /* MW 3 */
+ 12736 "11111000" // /* MW 2 */
+ 12737 "11111111" // /* MW 1 */
+ 12738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12739 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 12740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12741 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 12742 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12743 "00010111" // /* MW 3 */
+ 12744 "00000010" // /* MW 2 */
+ 12745 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 12746 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12747 "01000001" // /* MW 5 */
+ 12748 "01110000" // /* MW 4 */
+ 12749 "00001111" // /* MW 3 */
+ 12750 "00000000" // /* MW 2 */
+ 12751 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 12752 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12753 "00010110" // /* MW 3 */
+ 12754 "01000000" // /* MW 2 */
+ 12755 "00001000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 12756 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12757 "11000000" // /* MW 3 */
+ 12758 "01100000" // /* MW 2 */
+ 12759 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12760 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12761 "00000001" // /* MW 3 */
+ 12762 "00000001" // /* MW 2 */
+ 12763 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12765 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0
+ 12767 "00000000" // /* MW 1 */
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 2 "conv2d_dw_bf16.h" 199 first
+.function_start
+ 12768 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12769 "11000000" // /* MW 3 */
+ 12770 "01010110" // /* MW 2 */
+ 12771 "00011100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 82
+ 12772 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12773 "10101001" // /* MW 5 */
+ 12774 "00000001" // /* MW 4 */
+ 12775 "11011110" // /* MW 3 */
+ 12776 "10010011" // /* MW 2 */
+ 12777 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 1 "io_buffer_main.h" 125 25
+ 12778 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12779 "00000010" // /* MW 5 */
+ 12780 "11010001" // /* MW 4 */
+ 12781 "11010110" // /* MW 3 */
+ 12782 "10000011" // /* MW 2 */
+ 12783 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 204 82 first
+ 12784 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12785 "10001010" // /* MW 3 */
+ 12786 "11101000" // /* MW 2 */
+ 12787 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4 first
+ 12788 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12789 "01000110" // /* MW 3 */
+ 12790 "11111101" // /* MW 2 */
+ 12791 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 12792 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12793 "00100110" // /* MW 3 */
+ 12794 "00111101" // /* MW 2 */
+ 12795 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 12796 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12797 "01000110" // /* MW 3 */
+ 12798 "11111111" // /* MW 2 */
+ 12799 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 12800 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12801 "00100110" // /* MW 3 */
+ 12802 "00101111" // /* MW 2 */
+ 12803 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 12804 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12805 "00000110" // /* MW 3 */
+ 12806 "00101101" // /* MW 2 */
+ 12807 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4 first
+ 12808 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12809 "01000110" // /* MW 3 */
+ 12810 "11111100" // /* MW 2 */
+ 12811 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 12812 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12813 "00100110" // /* MW 3 */
+ 12814 "00111100" // /* MW 2 */
+ 12815 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 12816 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12817 "01000110" // /* MW 3 */
+ 12818 "11111110" // /* MW 2 */
+ 12819 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 12820 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12821 "00100110" // /* MW 3 */
+ 12822 "00101110" // /* MW 2 */
+ 12823 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 12824 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12825 "00000110" // /* MW 3 */
+ 12826 "00101100" // /* MW 2 */
+ 12827 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4 first
+ 12828 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12829 "11000110" // /* MW 3 */
+ 12830 "11111100" // /* MW 2 */
+ 12831 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 12832 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12833 "10100110" // /* MW 3 */
+ 12834 "00111100" // /* MW 2 */
+ 12835 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 12836 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12837 "11000110" // /* MW 3 */
+ 12838 "11111110" // /* MW 2 */
+ 12839 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 12840 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12841 "10100110" // /* MW 3 */
+ 12842 "00101110" // /* MW 2 */
+ 12843 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 12844 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12845 "10000110" // /* MW 3 */
+ 12846 "00101100" // /* MW 2 */
+ 12847 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4 first
+ 12848 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12849 "11000110" // /* MW 3 */
+ 12850 "11111111" // /* MW 2 */
+ 12851 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+ 12852 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12853 "10100110" // /* MW 3 */
+ 12854 "00101111" // /* MW 2 */
+ 12855 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 12856 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12857 "00010000" // /* MW 9 */
+ 12858 "00110100" // /* MW 8 */
+ 12859 "00110010" // /* MW 7 */
+ 12860 "11110010" // /* MW 6 */
+ 12861 "00000001" // /* MW 5 */
+ 12862 "00000000" // /* MW 4 */
+ 12863 "11010000" // /* MW 3 */
+ 12864 "11110000" // /* MW 2 */
+ 12865 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 12866 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12867 "10000001" // /* MW 5 */
+ 12868 "11000101" // /* MW 4 */
+ 12869 "01011000" // /* MW 3 */
+ 12870 "10011000" // /* MW 2 */
+ 12871 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 12872 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12873 "00010000" // /* MW 3 */
+ 12874 "00001111" // /* MW 2 */
+ 12875 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+ 12876 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12877 "01011000" // /* MW 11 */
+ 12878 "00000000" // /* MW 10 */
+ 12879 "01100000" // /* MW 9 */
+ 12880 "01101010" // /* MW 8 */
+ 12881 "00100000" // /* MW 7 */
+ 12882 "00000000" // /* MW 6 */
+ 12883 "01101000" // /* MW 5 */
+ 12884 "00111011" // /* MW 4 */
+ 12885 "01110000" // /* MW 3 */
+ 12886 "10000101" // /* MW 2 */
+ 12887 "10000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43 first
+.src_ref 2 "conv2d_dw_bf16.h" 225 4 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 12888 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12889 "01100000" // /* MW 13 */
+ 12890 "00001001" // /* MW 12 */
+ 12891 "01100010" // /* MW 11 */
+ 12892 "00001011" // /* MW 10 */
+ 12893 "00010000" // /* MW 9 */
+ 12894 "11100000" // /* MW 8 */
+ 12895 "00101101" // /* MW 7 */
+ 12896 "00000100" // /* MW 6 */
+ 12897 "11101001" // /* MW 5 */
+ 12898 "00111000" // /* MW 4 */
+ 12899 "11010000" // /* MW 3 */
+ 12900 "10111000" // /* MW 2 */
+ 12901 "01111111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 12902 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12903 "01110010" // /* MW 9 */
+ 12904 "10010000" // /* MW 8 */
+ 12905 "10000000" // /* MW 7 */
+ 12906 "00000010" // /* MW 6 */
+ 12907 "01001011" // /* MW 5 */
+ 12908 "00001100" // /* MW 4 */
+ 12909 "11010001" // /* MW 3 */
+ 12910 "10110100" // /* MW 2 */
+ 12911 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 12912 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12913 "01111110" // /* MW 9 */
+ 12914 "11000000" // /* MW 8 */
+ 12915 "11100001" // /* MW 7 */
+ 12916 "00000011" // /* MW 6 */
+ 12917 "10010000" // /* MW 5 */
+ 12918 "10101011" // /* MW 4 */
+ 12919 "11010001" // /* MW 3 */
+ 12920 "00110000" // /* MW 2 */
+ 12921 "01101101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 12922 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12923 "01011110" // /* MW 9 */
+ 12924 "10010000" // /* MW 8 */
+ 12925 "00000111" // /* MW 7 */
+ 12926 "00000010" // /* MW 6 */
+ 12927 "11110100" // /* MW 5 */
+ 12928 "11110000" // /* MW 4 */
+ 12929 "11010001" // /* MW 3 */
+ 12930 "00001010" // /* MW 2 */
+ 12931 "01111001" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 12932 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12933 "10000010" // /* MW 5 */
+ 12934 "00000000" // /* MW 4 */
+ 12935 "01010000" // /* MW 3 */
+ 12936 "00011110" // /* MW 2 */
+ 12937 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+ 12938 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13040 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12939 "00010000" // /* MW 11 */
+ 12940 "01111000" // /* MW 10 */
+ 12941 "01111001" // /* MW 9 */
+ 12942 "00001100" // /* MW 8 */
+ 12943 "00000000" // /* MW 7 */
+ 12944 "00000000" // /* MW 6 */
+ 12945 "01001011" // /* MW 5 */
+ 12946 "00010000" // /* MW 4 */
+ 12947 "11010110" // /* MW 3 */
+ 12948 "11000000" // /* MW 2 */
+ 12949 "01101001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+ 12950 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13136 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12951 "00010000" // /* MW 11 */
+ 12952 "10101000" // /* MW 10 */
+ 12953 "10111001" // /* MW 9 */
+ 12954 "00001101" // /* MW 8 */
+ 12955 "00000000" // /* MW 7 */
+ 12956 "00000000" // /* MW 6 */
+ 12957 "01001011" // /* MW 5 */
+ 12958 "00010000" // /* MW 4 */
+ 12959 "11010010" // /* MW 3 */
+ 12960 "10010010" // /* MW 2 */
+ 12961 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 12962 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12963 "00000101" // /* MW 5 */
+ 12964 "01100001" // /* MW 4 */
+ 12965 "10000100" // /* MW 3 */
+ 12966 "00010110" // /* MW 2 */
+ 12967 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+ 12968 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12969 "10001010" // /* MW 3 */
+ 12970 "00000000" // /* MW 2 */
+ 12971 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 244 4
+ 12972 "10111010" // LDA r5, [p3]; MOVXM p3, #13200 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12973 "00010000" // /* MW 9 */
+ 12974 "11001000" // /* MW 8 */
+ 12975 "10110001" // /* MW 7 */
+ 12976 "00001101" // /* MW 6 */
+ 12977 "00000000" // /* MW 5 */
+ 12978 "00000000" // /* MW 4 */
+ 12979 "11010000" // /* MW 3 */
+ 12980 "10010110" // /* MW 2 */
+ 12981 "01100000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+ 12982 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12983 "10101000" // /* MW 9 */
+ 12984 "00000001" // /* MW 8 */
+ 12985 "10001110" // /* MW 7 */
+ 12986 "00001010" // /* MW 6 */
+ 12987 "00010100" // /* MW 5 */
+ 12988 "00000000" // /* MW 4 */
+ 12989 "11110000" // /* MW 3 */
+ 12990 "00101100" // /* MW 2 */
+ 12991 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.src_ref 2 "conv2d_dw_bf16.h" 271 12
+.src_ref 2 "conv2d_dw_bf16.h" 272 12
+.src_ref 2 "conv2d_dw_bf16.h" 273 12
+.src_ref 2 "conv2d_dw_bf16.h" 274 12
+.src_ref 2 "conv2d_dw_bf16.h" 275 12
+.src_ref 2 "conv2d_dw_bf16.h" 276 12
+.src_ref 2 "conv2d_dw_bf16.h" 277 12
+ 12992 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12993 "00000000" // /* MW 15 */
+ 12994 "00000000" // /* MW 14 */
+ 12995 "01111000" // /* MW 13 */
+ 12996 "10111001" // /* MW 12 */
+ 12997 "00001110" // /* MW 11 */
+ 12998 "00001000" // /* MW 10 */
+ 12999 "00110110" // /* MW 9 */
+ 13000 "00000000" // /* MW 8 */
+ 13001 "01011011" // /* MW 7 */
+ 13002 "00000001" // /* MW 6 */
+ 13003 "00100000" // /* MW 5 */
+ 13004 "00000000" // /* MW 4 */
+ 13005 "00000000" // /* MW 3 */
+ 13006 "10010001" // /* MW 2 */
+ 13007 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13008 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13009 "01101010" // /* MW 15 */
+ 13010 "01100011" // /* MW 14 */
+ 13011 "10101100" // /* MW 13 */
+ 13012 "00000011" // /* MW 12 */
+ 13013 "00001110" // /* MW 11 */
+ 13014 "00000010" // /* MW 10 */
+ 13015 "11010100" // /* MW 9 */
+ 13016 "00001101" // /* MW 8 */
+ 13017 "01001011" // /* MW 7 */
+ 13018 "00010000" // /* MW 6 */
+ 13019 "00100000" // /* MW 5 */
+ 13020 "00000000" // /* MW 4 */
+ 13021 "11110000" // /* MW 3 */
+ 13022 "00101100" // /* MW 2 */
+ 13023 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13024 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13025 "00011010" // /* MW 15 */
+ 13026 "01001000" // /* MW 14 */
+ 13027 "11001100" // /* MW 13 */
+ 13028 "00111111" // /* MW 12 */
+ 13029 "10111001" // /* MW 11 */
+ 13030 "11011010" // /* MW 10 */
+ 13031 "00101111" // /* MW 9 */
+ 13032 "00000100" // /* MW 8 */
+ 13033 "01001011" // /* MW 7 */
+ 13034 "00010000" // /* MW 6 */
+ 13035 "00100101" // /* MW 5 */
+ 13036 "00000000" // /* MW 4 */
+ 13037 "11010000" // /* MW 3 */
+ 13038 "10100011" // /* MW 2 */
+ 13039 "01000000" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+.loop_nesting 1
+ 13040 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13041 "01101110" // /* MW 9 */
+ 13042 "10000001" // /* MW 8 */
+ 13043 "10000100" // /* MW 7 */
+ 13044 "00000010" // /* MW 6 */
+ 13045 "11110100" // /* MW 5 */
+ 13046 "11110000" // /* MW 4 */
+ 13047 "01110001" // /* MW 3 */
+ 13048 "10110011" // /* MW 2 */
+ 13049 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13050 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13051 "00000001" // /* MW 9 */
+ 13052 "10001001" // /* MW 8 */
+ 13053 "10001010" // /* MW 7 */
+ 13054 "01000110" // /* MW 6 */
+ 13055 "00001011" // /* MW 5 */
+ 13056 "10011100" // /* MW 4 */
+ 13057 "11101010" // /* MW 3 */
+ 13058 "00111000" // /* MW 2 */
+ 13059 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13060 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13061 "00000001" // /* MW 9 */
+ 13062 "00110101" // /* MW 8 */
+ 13063 "10001001" // /* MW 7 */
+ 13064 "11000110" // /* MW 6 */
+ 13065 "10000110" // /* MW 5 */
+ 13066 "00110000" // /* MW 4 */
+ 13067 "01101010" // /* MW 3 */
+ 13068 "10110001" // /* MW 2 */
+ 13069 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13070 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13071 "00000110" // /* MW 3 */
+ 13072 "10001001" // /* MW 2 */
+ 13073 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13074 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13075 "10100001" // /* MW 7 */
+ 13076 "01001000" // /* MW 6 */
+ 13077 "10001100" // /* MW 5 */
+ 13078 "11000110" // /* MW 4 */
+ 13079 "10001110" // /* MW 3 */
+ 13080 "10110000" // /* MW 2 */
+ 13081 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13082 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13083 "10100001" // /* MW 7 */
+ 13084 "00110110" // /* MW 6 */
+ 13085 "10001010" // /* MW 5 */
+ 13086 "01000110" // /* MW 4 */
+ 13087 "00001111" // /* MW 3 */
+ 13088 "10011100" // /* MW 2 */
+ 13089 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13090 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13091 "00001110" // /* MW 3 */
+ 13092 "10001001" // /* MW 2 */
+ 13093 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13094 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13095 "11100001" // /* MW 7 */
+ 13096 "10010010" // /* MW 6 */
+ 13097 "10001011" // /* MW 5 */
+ 13098 "01000110" // /* MW 4 */
+ 13099 "00000011" // /* MW 3 */
+ 13100 "00011100" // /* MW 2 */
+ 13101 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13102 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13103 "11100001" // /* MW 7 */
+ 13104 "01010110" // /* MW 6 */
+ 13105 "10001000" // /* MW 5 */
+ 13106 "01000110" // /* MW 4 */
+ 13107 "00000111" // /* MW 3 */
+ 13108 "00011100" // /* MW 2 */
+ 13109 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13110 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13111 "01101110" // /* MW 9 */
+ 13112 "01000001" // /* MW 8 */
+ 13113 "00011000" // /* MW 7 */
+ 13114 "00000001" // /* MW 6 */
+ 13115 "00010000" // /* MW 5 */
+ 13116 "00000000" // /* MW 4 */
+ 13117 "11110000" // /* MW 3 */
+ 13118 "00101100" // /* MW 2 */
+ 13119 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13120 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13121 "01101010" // /* MW 15 */
+ 13122 "01100011" // /* MW 14 */
+ 13123 "01111100" // /* MW 13 */
+ 13124 "10100101" // /* MW 12 */
+ 13125 "00000001" // /* MW 11 */
+ 13126 "00000000" // /* MW 10 */
+ 13127 "00000000" // /* MW 9 */
+ 13128 "00000000" // /* MW 8 */
+ 13129 "01011011" // /* MW 7 */
+ 13130 "00000001" // /* MW 6 */
+ 13131 "00100000" // /* MW 5 */
+ 13132 "00000000" // /* MW 4 */
+ 13133 "11110000" // /* MW 3 */
+ 13134 "00101100" // /* MW 2 */
+ 13135 "00000000" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 13136 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13137 "00011010" // /* MW 15 */
+ 13138 "01001000" // /* MW 14 */
+ 13139 "01111100" // /* MW 13 */
+ 13140 "10100101" // /* MW 12 */
+ 13141 "00000001" // /* MW 11 */
+ 13142 "00000000" // /* MW 10 */
+ 13143 "00000000" // /* MW 9 */
+ 13144 "00000000" // /* MW 8 */
+ 13145 "01011011" // /* MW 7 */
+ 13146 "00000001" // /* MW 6 */
+ 13147 "00100000" // /* MW 5 */
+ 13148 "00000000" // /* MW 4 */
+ 13149 "11110000" // /* MW 3 */
+ 13150 "00101100" // /* MW 2 */
+ 13151 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13152 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13153 "01101110" // /* MW 9 */
+ 13154 "10000001" // /* MW 8 */
+ 13155 "10000100" // /* MW 7 */
+ 13156 "00000010" // /* MW 6 */
+ 13157 "10010000" // /* MW 5 */
+ 13158 "01110011" // /* MW 4 */
+ 13159 "11110100" // /* MW 3 */
+ 13160 "00001100" // /* MW 2 */
+ 13161 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13162 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13163 "00000001" // /* MW 7 */
+ 13164 "10001001" // /* MW 6 */
+ 13165 "10001010" // /* MW 5 */
+ 13166 "01000110" // /* MW 4 */
+ 13167 "00001011" // /* MW 3 */
+ 13168 "10011100" // /* MW 2 */
+ 13169 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13170 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13171 "00000001" // /* MW 7 */
+ 13172 "00110101" // /* MW 6 */
+ 13173 "10001001" // /* MW 5 */
+ 13174 "11000110" // /* MW 4 */
+ 13175 "10000110" // /* MW 3 */
+ 13176 "00110000" // /* MW 2 */
+ 13177 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13178 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13179 "00000110" // /* MW 3 */
+ 13180 "10001001" // /* MW 2 */
+ 13181 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13182 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13183 "10100001" // /* MW 7 */
+ 13184 "01001000" // /* MW 6 */
+ 13185 "10001100" // /* MW 5 */
+ 13186 "01000110" // /* MW 4 */
+ 13187 "00001111" // /* MW 3 */
+ 13188 "10011100" // /* MW 2 */
+ 13189 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13190 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13191 "10100001" // /* MW 9 */
+ 13192 "00110110" // /* MW 8 */
+ 13193 "10001010" // /* MW 7 */
+ 13194 "11000010" // /* MW 6 */
+ 13195 "10001110" // /* MW 5 */
+ 13196 "10110000" // /* MW 4 */
+ 13197 "11110100" // /* MW 3 */
+ 13198 "00101100" // /* MW 2 */
+ 13199 "00000000" // /* MW 1 */
+.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13200 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13201 "00011101" // /* MW 5 */
+ 13202 "00010010" // /* MW 4 */
+ 13203 "10001011" // /* MW 3 */
+ 13204 "00011110" // /* MW 2 */
+ 13205 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13206 "01011010" // MOVXM le, #13376; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13207 "11100001" // /* MW 9 */
+ 13208 "10010010" // /* MW 8 */
+ 13209 "10001011" // /* MW 7 */
+ 13210 "00000010" // /* MW 6 */
+ 13211 "01000100" // /* MW 5 */
+ 13212 "10110111" // /* MW 4 */
+ 13213 "00000001" // /* MW 3 */
+ 13214 "00000000" // /* MW 2 */
+ 13215 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13216 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13296; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13217 "11100001" // /* MW 11 */
+ 13218 "01010110" // /* MW 10 */
+ 13219 "10001000" // /* MW 9 */
+ 13220 "00000010" // /* MW 8 */
+ 13221 "00111111" // /* MW 7 */
+ 13222 "10001111" // /* MW 6 */
+ 13223 "00000001" // /* MW 5 */
+ 13224 "00000000" // /* MW 4 */
+ 13225 "01110000" // /* MW 3 */
+ 13226 "10000101" // /* MW 2 */
+ 13227 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13228 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13229 "01111111" // /* MW 3 */
+ 13230 "01110010" // /* MW 2 */
+ 13231 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13232 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13233 "10011011" // /* MW 3 */
+ 13234 "00011101" // /* MW 2 */
+ 13235 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13236 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13237 "01110100" // /* MW 3 */
+ 13238 "00011100" // /* MW 2 */
+ 13239 "00111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13240 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13241 "10110100" // /* MW 3 */
+ 13242 "01011000" // /* MW 2 */
+ 13243 "00111000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13244 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13245 "10010110" // /* MW 3 */
+ 13246 "00010001" // /* MW 2 */
+ 13247 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13248 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13249 "00010110" // /* MW 3 */
+ 13250 "00010000" // /* MW 2 */
+ 13251 "00001011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13252 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13253 "01101100" // /* MW 3 */
+ 13254 "01010000" // /* MW 2 */
+ 13255 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13256 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13257 "00010100" // /* MW 3 */
+ 13258 "01010011" // /* MW 2 */
+ 13259 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13260 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13261 "01110000" // /* MW 7 */
+ 13262 "00110110" // /* MW 6 */
+ 13263 "10101000" // /* MW 5 */
+ 13264 "00000010" // /* MW 4 */
+ 13265 "01100000" // /* MW 3 */
+ 13266 "01000010" // /* MW 2 */
+ 13267 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13268 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13269 "00000011" // /* MW 3 */
+ 13270 "00011100" // /* MW 2 */
+ 13271 "00011101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13272 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13273 "01110000" // /* MW 7 */
+ 13274 "01000101" // /* MW 6 */
+ 13275 "10000000" // /* MW 5 */
+ 13276 "00000001" // /* MW 4 */
+ 13277 "01100000" // /* MW 3 */
+ 13278 "01010010" // /* MW 2 */
+ 13279 "01000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13280 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13281 "01000001" // /* MW 7 */
+ 13282 "01101101" // /* MW 6 */
+ 13283 "10001100" // /* MW 5 */
+ 13284 "01000110" // /* MW 4 */
+ 13285 "00000111" // /* MW 3 */
+ 13286 "00011100" // /* MW 2 */
+ 13287 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13288 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13289 "01000001" // /* MW 7 */
+ 13290 "00000011" // /* MW 6 */
+ 13291 "10001001" // /* MW 5 */
+ 13292 "11000110" // /* MW 4 */
+ 13293 "10000010" // /* MW 3 */
+ 13294 "00110000" // /* MW 2 */
+ 13295 "00000010" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+.loop_nesting 2
+ 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13297 "01101110" // /* MW 9 */
+ 13298 "10000001" // /* MW 8 */
+ 13299 "10000100" // /* MW 7 */
+ 13300 "00000010" // /* MW 6 */
+ 13301 "11110100" // /* MW 5 */
+ 13302 "11110000" // /* MW 4 */
+ 13303 "01110001" // /* MW 3 */
+ 13304 "10110011" // /* MW 2 */
+ 13305 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13307 "00000001" // /* MW 9 */
+ 13308 "10001001" // /* MW 8 */
+ 13309 "10001010" // /* MW 7 */
+ 13310 "01000110" // /* MW 6 */
+ 13311 "00001011" // /* MW 5 */
+ 13312 "10011100" // /* MW 4 */
+ 13313 "11101010" // /* MW 3 */
+ 13314 "00111000" // /* MW 2 */
+ 13315 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13317 "00000001" // /* MW 9 */
+ 13318 "00110101" // /* MW 8 */
+ 13319 "10001001" // /* MW 7 */
+ 13320 "11000110" // /* MW 6 */
+ 13321 "10000110" // /* MW 5 */
+ 13322 "00110000" // /* MW 4 */
+ 13323 "01101010" // /* MW 3 */
+ 13324 "10110001" // /* MW 2 */
+ 13325 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13327 "00000110" // /* MW 3 */
+ 13328 "10001001" // /* MW 2 */
+ 13329 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13331 "10100001" // /* MW 7 */
+ 13332 "01001000" // /* MW 6 */
+ 13333 "10001100" // /* MW 5 */
+ 13334 "11000110" // /* MW 4 */
+ 13335 "10001110" // /* MW 3 */
+ 13336 "10110000" // /* MW 2 */
+ 13337 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13339 "10100001" // /* MW 7 */
+ 13340 "00110110" // /* MW 6 */
+ 13341 "10001010" // /* MW 5 */
+ 13342 "01000110" // /* MW 4 */
+ 13343 "00001111" // /* MW 3 */
+ 13344 "10011100" // /* MW 2 */
+ 13345 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13347 "00001110" // /* MW 3 */
+ 13348 "10001001" // /* MW 2 */
+ 13349 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13351 "11100001" // /* MW 7 */
+ 13352 "10010010" // /* MW 6 */
+ 13353 "10001011" // /* MW 5 */
+ 13354 "01000110" // /* MW 4 */
+ 13355 "00000011" // /* MW 3 */
+ 13356 "00011100" // /* MW 2 */
+ 13357 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13359 "11100001" // /* MW 7 */
+ 13360 "01010110" // /* MW 6 */
+ 13361 "10001000" // /* MW 5 */
+ 13362 "01000110" // /* MW 4 */
+ 13363 "00000111" // /* MW 3 */
+ 13364 "00011100" // /* MW 2 */
+ 13365 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13366 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13367 "00000101" // /* MW 5 */
+ 13368 "01100001" // /* MW 4 */
+ 13369 "11110100" // /* MW 3 */
+ 13370 "00101100" // /* MW 2 */
+ 13371 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13372 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13373 "01000001" // /* MW 3 */
+ 13374 "01101101" // /* MW 2 */
+ 13375 "10001100" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13377 "00011010" // /* MW 15 */
+ 13378 "01001000" // /* MW 14 */
+ 13379 "01111100" // /* MW 13 */
+ 13380 "10100101" // /* MW 12 */
+ 13381 "00000001" // /* MW 11 */
+ 13382 "00000000" // /* MW 10 */
+ 13383 "00000000" // /* MW 9 */
+ 13384 "00000000" // /* MW 8 */
+ 13385 "01011011" // /* MW 7 */
+ 13386 "00000001" // /* MW 6 */
+ 13387 "00100000" // /* MW 5 */
+ 13388 "00000000" // /* MW 4 */
+ 13389 "11110000" // /* MW 3 */
+ 13390 "00101100" // /* MW 2 */
+ 13391 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 4 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13392 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 13393 "01101000" // /* MW 11 */
+ 13394 "10000001" // /* MW 10 */
+ 13395 "10000100" // /* MW 9 */
+ 13396 "00000010" // /* MW 8 */
+ 13397 "00100111" // /* MW 7 */
+ 13398 "00000100" // /* MW 6 */
+ 13399 "00100000" // /* MW 5 */
+ 13400 "11100111" // /* MW 4 */
+ 13401 "11111000" // /* MW 3 */
+ 13402 "00001100" // /* MW 2 */
+ 13403 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13404 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13405 "00000001" // /* MW 7 */
+ 13406 "10001001" // /* MW 6 */
+ 13407 "10001010" // /* MW 5 */
+ 13408 "01000110" // /* MW 4 */
+ 13409 "00001011" // /* MW 3 */
+ 13410 "10011100" // /* MW 2 */
+ 13411 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13412 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13413 "00000001" // /* MW 7 */
+ 13414 "00110101" // /* MW 6 */
+ 13415 "10001001" // /* MW 5 */
+ 13416 "11000110" // /* MW 4 */
+ 13417 "10000110" // /* MW 3 */
+ 13418 "00110000" // /* MW 2 */
+ 13419 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13420 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13421 "00000110" // /* MW 3 */
+ 13422 "10001001" // /* MW 2 */
+ 13423 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13424 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13425 "10100001" // /* MW 7 */
+ 13426 "01001000" // /* MW 6 */
+ 13427 "10001100" // /* MW 5 */
+ 13428 "01000110" // /* MW 4 */
+ 13429 "00001111" // /* MW 3 */
+ 13430 "10011100" // /* MW 2 */
+ 13431 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13432 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13433 "10100001" // /* MW 7 */
+ 13434 "00110110" // /* MW 6 */
+ 13435 "10001010" // /* MW 5 */
+ 13436 "11000110" // /* MW 4 */
+ 13437 "10001110" // /* MW 3 */
+ 13438 "10110000" // /* MW 2 */
+ 13439 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13440 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13441 "00001110" // /* MW 3 */
+ 13442 "10001001" // /* MW 2 */
+ 13443 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13444 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13445 "11100001" // /* MW 3 */
+ 13446 "10010010" // /* MW 2 */
+ 13447 "10001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13448 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13449 "11100001" // /* MW 3 */
+ 13450 "01010110" // /* MW 2 */
+ 13451 "10001000" // /* MW 1 */
+ 13452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13453 "00000000" // /* MW 1 */
+ 13454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13455 "00000000" // /* MW 1 */
+ 13456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13457 "00000000" // /* MW 1 */
+ 13458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13459 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+ 13460 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13461 "10010110" // /* MW 3 */
+ 13462 "00010001" // /* MW 2 */
+ 13463 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 290 first
+ 13464 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13465 "00000000" // /* MW 5 */
+ 13466 "01010000" // /* MW 4 */
+ 13467 "11000000" // /* MW 3 */
+ 13468 "00000010" // /* MW 2 */
+ 13469 "01100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13470 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13471 "01101100" // /* MW 3 */
+ 13472 "01010000" // /* MW 2 */
+ 13473 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.delay_slot
+ 13474 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13475 "00010100" // /* MW 3 */
+ 13476 "01010011" // /* MW 2 */
+ 13477 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13478 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13479 "01101100" // /* MW 3 */
+ 13480 "01010000" // /* MW 2 */
+ 13481 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.delay_slot
+ 13482 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13483 "00010011" // /* MW 3 */
+ 13484 "10001010" // /* MW 2 */
+ 13485 "00001010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33
+.delay_slot
+ 13486 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13487 "10010011" // /* MW 3 */
+ 13488 "00111010" // /* MW 2 */
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0
+ 13489 "00001010" // /* MW 1 */
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 444 first
+.src_ref 7 "superkernels.cpp" 449 6
+.function_start
+ 13504 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13505 "10000000" // /* MW 5 */
+ 13506 "11001000" // /* MW 4 */
+ 13507 "11001000" // /* MW 3 */
+ 13508 "00000111" // /* MW 2 */
+ 13509 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6 first
+ 13510 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13511 "01000001" // /* MW 5 */
+ 13512 "00101111" // /* MW 4 */
+ 13513 "11010000" // /* MW 3 */
+ 13514 "11000010" // /* MW 2 */
+ 13515 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 444
+ 13516 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13517 "00000001" // /* MW 5 */
+ 13518 "00000000" // /* MW 4 */
+ 13519 "00000000" // /* MW 3 */
+ 13520 "00010000" // /* MW 2 */
+ 13521 "00000000" // /* MW 1 */
+ 13522 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13523 "01110000" // /* MW 7 */
+ 13524 "01110000" // /* MW 6 */
+ 13525 "00101101" // /* MW 5 */
+ 13526 "00000010" // /* MW 4 */
+ 13527 "10110000" // /* MW 3 */
+ 13528 "00111010" // /* MW 2 */
+ 13529 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+ 13530 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13531 "01110000" // /* MW 7 */
+ 13532 "11110000" // /* MW 6 */
+ 13533 "10101000" // /* MW 5 */
+ 13534 "00000001" // /* MW 4 */
+ 13535 "10110000" // /* MW 3 */
+ 13536 "10110110" // /* MW 2 */
+ 13537 "11111111" // /* MW 1 */
+ 13538 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13539 "00011101" // /* MW 3 */
+ 13540 "11101100" // /* MW 2 */
+ 13541 "00001111" // /* MW 1 */
+ 13542 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13543 "10011101" // /* MW 3 */
+ 13544 "11110111" // /* MW 2 */
+ 13545 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+ 13546 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13547 "01110000" // /* MW 7 */
+ 13548 "01100000" // /* MW 6 */
+ 13549 "11001010" // /* MW 5 */
+ 13550 "00000001" // /* MW 4 */
+ 13551 "10110000" // /* MW 3 */
+ 13552 "00000010" // /* MW 2 */
+ 13553 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6
+.src_ref 7 "superkernels.cpp" 449 16
+ 13554 "10000100" // JNZ r16, #13680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13680 delay_slots=5 */
+ 13555 "00000001" // /* MW 5 */
+ 13556 "01000000" // /* MW 4 */
+ 13557 "10111000" // /* MW 3 */
+ 13558 "00011010" // /* MW 2 */
+ 13559 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 13560 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13561 "11000000" // /* MW 3 */
+ 13562 "11010110" // /* MW 2 */
+ 13563 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 22 first
+.delay_slot
+ 13564 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13565 "10010000" // /* MW 3 */
+ 13566 "01100010" // /* MW 2 */
+ 13567 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 30
+.delay_slot
+ 13568 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13569 "11111011" // /* MW 3 */
+ 13570 "01100011" // /* MW 2 */
+ 13571 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13572 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13573 "10100000" // /* MW 5 */
+ 13574 "11001000" // /* MW 4 */
+ 13575 "11000110" // /* MW 3 */
+ 13576 "00000111" // /* MW 2 */
+ 13577 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13578 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13579 "00110001" // /* MW 3 */
+ 13580 "00000110" // /* MW 2 */
+ 13581 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13582 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13583 "00010001" // /* MW 9 */
+ 13584 "00110100" // /* MW 8 */
+ 13585 "10110010" // /* MW 7 */
+ 13586 "11110000" // /* MW 6 */
+ 13587 "00000001" // /* MW 5 */
+ 13588 "00000000" // /* MW 4 */
+ 13589 "01100000" // /* MW 3 */
+ 13590 "10010001" // /* MW 2 */
+ 13591 "11110000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13592 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13593 "00010000" // /* MW 11 */
+ 13594 "00110010" // /* MW 10 */
+ 13595 "10110010" // /* MW 9 */
+ 13596 "11110000" // /* MW 8 */
+ 13597 "00000001" // /* MW 7 */
+ 13598 "00000000" // /* MW 6 */
+ 13599 "10001011" // /* MW 5 */
+ 13600 "10001000" // /* MW 4 */
+ 13601 "11100000" // /* MW 3 */
+ 13602 "11000000" // /* MW 2 */
+ 13603 "00100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13605 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 13606 "00000100" // JL #12096 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12096 delay_slots=5 */
+ 13607 "00000001" // /* MW 5 */
+ 13608 "00000000" // /* MW 4 */
+ 13609 "10100000" // /* MW 3 */
+ 13610 "00010111" // /* MW 2 */
+ 13611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13615 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13617 "00110001" // /* MW 3 */
+ 13618 "00100000" // /* MW 2 */
+ 13619 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 13620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13621 "00000101" // /* MW 3 */
+ 13622 "00100000" // /* MW 2 */
+ 13623 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 13624 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13625 "01110000" // /* MW 7 */
+ 13626 "10100101" // /* MW 6 */
+ 13627 "00000001" // /* MW 5 */
+ 13628 "00000000" // /* MW 4 */
+ 13629 "00110000" // /* MW 3 */
+ 13630 "11000010" // /* MW 2 */
+ 13631 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+.src_ref 7 "superkernels.cpp" 461 2
+.return_address
+ 13632 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13633 "00000000" // /* MW 7 */
+ 13634 "10000010" // /* MW 6 */
+ 13635 "00110011" // /* MW 5 */
+ 13636 "00000001" // /* MW 4 */
+ 13637 "01100000" // /* MW 3 */
+ 13638 "10010001" // /* MW 2 */
+ 13639 "00110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 17 first
+ 13640 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13641 "00111010" // /* MW 3 */
+ 13642 "00000110" // /* MW 2 */
+ 13643 "00000010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13
+.src_ref 7 "superkernels.cpp" 453 15 first
+ 13644 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13645 "00010000" // /* MW 9 */
+ 13646 "00110000" // /* MW 8 */
+ 13647 "00110010" // /* MW 7 */
+ 13648 "11110001" // /* MW 6 */
+ 13649 "00000001" // /* MW 5 */
+ 13650 "00000000" // /* MW 4 */
+ 13651 "01010000" // /* MW 3 */
+ 13652 "11000011" // /* MW 2 */
+ 13653 "01000100" // /* MW 1 */
+ 13654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13655 "00000000" // /* MW 1 */
+ 13656 "10000100" // J #13696 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13696 delay_slots=5 */
+ 13657 "00000000" // /* MW 5 */
+ 13658 "00000000" // /* MW 4 */
+ 13659 "11000000" // /* MW 3 */
+ 13660 "00011010" // /* MW 2 */
+ 13661 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15
+.src_ref 7 "superkernels.cpp" 457 26
+.delay_slot
+ 13662 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13663 "10110000" // /* MW 5 */
+ 13664 "11001000" // /* MW 4 */
+ 13665 "11000110" // /* MW 3 */
+ 13666 "00000111" // /* MW 2 */
+ 13667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13671 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15 first
+.delay_slot
+ 13672 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13673 "00110001" // /* MW 3 */
+ 13674 "00000110" // /* MW 2 */
+ 13675 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13 first
+.delay_slot
+ 13676 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13677 "00010001" // /* MW 3 */
+ 13678 "00000110" // /* MW 2 */
+ 13679 "00001010" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176
+.src_ref 7 "superkernels.cpp" 457 26
+ 13680 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13681 "00000000" // /* MW 15 */
+ 13682 "00000000" // /* MW 14 */
+ 13683 "00010000" // /* MW 13 */
+ 13684 "00101100" // /* MW 12 */
+ 13685 "10110010" // /* MW 11 */
+ 13686 "11110001" // /* MW 10 */
+ 13687 "00000001" // /* MW 9 */
+ 13688 "00000000" // /* MW 8 */
+ 13689 "01011011" // /* MW 7 */
+ 13690 "00000001" // /* MW 6 */
+ 13691 "00100000" // /* MW 5 */
+ 13692 "00000000" // /* MW 4 */
+ 13693 "11110000" // /* MW 3 */
+ 13694 "00101100" // /* MW 2 */
+ 13695 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 13696 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13697 "10000110" // /* MW 3 */
+ 13698 "01100111" // /* MW 2 */
+ 13699 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15
+.src_ref 1 "io_buffer_main.h" 218 49
+ 13700 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13701 "00010000" // /* MW 9 */
+ 13702 "00101000" // /* MW 8 */
+ 13703 "00110010" // /* MW 7 */
+ 13704 "11110010" // /* MW 6 */
+ 13705 "00000001" // /* MW 5 */
+ 13706 "00000000" // /* MW 4 */
+ 13707 "11010000" // /* MW 3 */
+ 13708 "11101110" // /* MW 2 */
+ 13709 "01011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 13710 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13711 "00010110" // /* MW 3 */
+ 13712 "11111110" // /* MW 2 */
+ 13713 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 13714 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13715 "00110110" // /* MW 3 */
+ 13716 "11111110" // /* MW 2 */
+ 13717 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 13718 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13719 "01010110" // /* MW 3 */
+ 13720 "01000110" // /* MW 2 */
+ 13721 "00000010" // /* MW 1 */
+ 13722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13723 "00000000" // /* MW 1 */
+ 13724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13725 "00000000" // /* MW 1 */
+ 13726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13727 "00000000" // /* MW 1 */
+ 13728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13729 "00000000" // /* MW 1 */
+ 13730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13731 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 13732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13733 "00000010" // /* MW 3 */
+ 13734 "01100001" // /* MW 2 */
+ 13735 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 13736 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13737 "00010001" // /* MW 3 */
+ 13738 "00000110" // /* MW 2 */
+ 13739 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 13740 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13741 "11111101" // /* MW 3 */
+ 13742 "11100000" // /* MW 2 */
+ 13743 "00010111" // /* MW 1 */
+ 13744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13745 "00000000" // /* MW 1 */
+ 13746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13747 "00000000" // /* MW 1 */
+ 13748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13749 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 13750 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13751 "00001000" // /* MW 3 */
+ 13752 "10010011" // /* MW 2 */
+ 13753 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11
+.src_ref 7 "superkernels.cpp" 459 47
+.src_ref 7 "superkernels.cpp" 464 6
+.src_ref 7 "superkernels.cpp" 465 16
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 13754 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13755 "00010000" // /* MW 9 */
+ 13756 "00100000" // /* MW 8 */
+ 13757 "10110010" // /* MW 7 */
+ 13758 "11110011" // /* MW 6 */
+ 13759 "00000001" // /* MW 5 */
+ 13760 "00000000" // /* MW 4 */
+ 13761 "00000000" // /* MW 3 */
+ 13762 "00101111" // /* MW 2 */
+ 13763 "00000000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+ 13764 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13765 "11000001" // /* MW 5 */
+ 13766 "00101011" // /* MW 4 */
+ 13767 "00101000" // /* MW 3 */
+ 13768 "00000000" // /* MW 2 */
+ 13769 "00000110" // /* MW 1 */
+ 13770 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13771 "01011010" // /* MW 3 */
+ 13772 "01101000" // /* MW 2 */
+ 13773 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 13774 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13775 "10000001" // /* MW 5 */
+ 13776 "00101001" // /* MW 4 */
+ 13777 "00100111" // /* MW 3 */
+ 13778 "11010011" // /* MW 2 */
+ 13779 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15 first
+ 13780 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13781 "00110110" // /* MW 3 */
+ 13782 "00000110" // /* MW 2 */
+ 13783 "00000100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 26
+.src_ref 7 "superkernels.cpp" 461 2
+ 13784 "10111010" // LDA r16, [p3]; MOVXM p3, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13785 "00010000" // /* MW 9 */
+ 13786 "11000000" // /* MW 8 */
+ 13787 "10110011" // /* MW 7 */
+ 13788 "11110001" // /* MW 6 */
+ 13789 "00000001" // /* MW 5 */
+ 13790 "00000000" // /* MW 4 */
+ 13791 "11010000" // /* MW 3 */
+ 13792 "11000010" // /* MW 2 */
+ 13793 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 13794 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13795 "01010110" // /* MW 3 */
+ 13796 "00000110" // /* MW 2 */
+ 13797 "00000111" // /* MW 1 */
+ 13798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13799 "00000000" // /* MW 1 */
+ 13800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13801 "00000000" // /* MW 1 */
+ 13802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13803 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 13804 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13805 "01110110" // /* MW 3 */
+ 13806 "00000110" // /* MW 2 */
+ 13807 "00000101" // /* MW 1 */
+ 13808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13809 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 24 first
+ 13810 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13811 "00001111" // /* MW 3 */
+ 13812 "01100001" // /* MW 2 */
+ 13813 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 13814 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13815 "00000111" // /* MW 3 */
+ 13816 "10100010" // /* MW 2 */
+ 13817 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+ 13818 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13819 "11111101" // /* MW 3 */
+ 13820 "00100000" // /* MW 2 */
+ 13821 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2 first
+.no_stack_arguments
+ 13822 "00000100" // JL #12768 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12768 delay_slots=5 */
+ 13823 "00000001" // /* MW 5 */
+ 13824 "00000000" // /* MW 4 */
+ 13825 "11110000" // /* MW 3 */
+ 13826 "00011000" // /* MW 2 */
+ 13827 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+.delay_slot
+ 13828 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13829 "00110001" // /* MW 3 */
+ 13830 "00000110" // /* MW 2 */
+ 13831 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+.delay_slot
+ 13832 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13833 "11000001" // /* MW 3 */
+ 13834 "01001001" // /* MW 2 */
+ 13835 "00011000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 201 10 first
+.delay_slot
+ 13836 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13837 "00100101" // /* MW 3 */
+ 13838 "10110100" // /* MW 2 */
+ 13839 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16 first
+.delay_slot
+ 13840 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13841 "00010101" // /* MW 3 */
+ 13842 "10111011" // /* MW 2 */
+ 13843 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+.delay_slot
+ 13844 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13845 "11000001" // /* MW 11 */
+ 13846 "10001010" // /* MW 10 */
+ 13847 "11011111" // /* MW 9 */
+ 13848 "00000011" // /* MW 8 */
+ 13849 "00000000" // /* MW 7 */
+ 13850 "00000000" // /* MW 6 */
+ 13851 "00100000" // /* MW 5 */
+ 13852 "00000000" // /* MW 4 */
+ 13853 "11110000" // /* MW 3 */
+ 13854 "00101100" // /* MW 2 */
+ 13855 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 13856 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13857 "00001010" // /* MW 3 */
+ 13858 "01100111" // /* MW 2 */
+ 13859 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 13860 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13861 "00010110" // /* MW 3 */
+ 13862 "00000110" // /* MW 2 */
+ 13863 "00000010" // /* MW 1 */
+ 13864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13865 "00000000" // /* MW 1 */
+ 13866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13867 "00000000" // /* MW 1 */
+ 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13869 "00000000" // /* MW 1 */
+ 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13871 "00000000" // /* MW 1 */
+ 13872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13873 "00000000" // /* MW 1 */
+ 13874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13875 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 13876 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13877 "11111000" // /* MW 3 */
+ 13878 "00010000" // /* MW 2 */
+ 13879 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 13880 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13881 "00010000" // /* MW 9 */
+ 13882 "00110000" // /* MW 8 */
+ 13883 "10110010" // /* MW 7 */
+ 13884 "11110000" // /* MW 6 */
+ 13885 "00000001" // /* MW 5 */
+ 13886 "00000000" // /* MW 4 */
+ 13887 "11010000" // /* MW 3 */
+ 13888 "11000010" // /* MW 2 */
+ 13889 "01011100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19 first
+ 13890 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13891 "01010110" // /* MW 3 */
+ 13892 "00000110" // /* MW 2 */
+ 13893 "00000001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 13894 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13895 "00110110" // /* MW 3 */
+ 13896 "00000110" // /* MW 2 */
+ 13897 "00000111" // /* MW 1 */
+ 13898 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13899 "10011001" // /* MW 3 */
+ 13900 "11110100" // /* MW 2 */
+ 13901 "00000111" // /* MW 1 */
+ 13902 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13903 "11010001" // /* MW 3 */
+ 13904 "11111001" // /* MW 2 */
+ 13905 "00000111" // /* MW 1 */
+ 13906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13907 "00000000" // /* MW 1 */
+ 13908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13909 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 13910 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13911 "00000001" // /* MW 3 */
+ 13912 "11100001" // /* MW 2 */
+ 13913 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 13914 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13915 "00010001" // /* MW 3 */
+ 13916 "11100110" // /* MW 2 */
+ 13917 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 16 first
+ 13918 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13919 "00101000" // /* MW 3 */
+ 13920 "01100001" // /* MW 2 */
+ 13921 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 13922 "10000100" // JNZ r16, #13952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13952 delay_slots=5 */
+ 13923 "00000001" // /* MW 5 */
+ 13924 "01000000" // /* MW 4 */
+ 13925 "01000000" // /* MW 3 */
+ 13926 "00011011" // /* MW 2 */
+ 13927 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16
+.delay_slot
+ 13928 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13929 "00000001" // /* MW 3 */
+ 13930 "00110000" // /* MW 2 */
+ 13931 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13933 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13935 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13939 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16 first
+ 13940 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13941 "11000001" // /* MW 11 */
+ 13942 "10001000" // /* MW 10 */
+ 13943 "10000011" // /* MW 9 */
+ 13944 "00000011" // /* MW 8 */
+ 13945 "00000000" // /* MW 7 */
+ 13946 "00000000" // /* MW 6 */
+ 13947 "00100000" // /* MW 5 */
+ 13948 "00000000" // /* MW 4 */
+ 13949 "11110000" // /* MW 3 */
+ 13950 "00101100" // /* MW 2 */
+ 13951 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 467
+ 13952 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13953 "01000001" // /* MW 5 */
+ 13954 "11101101" // /* MW 4 */
+ 13955 "00101110" // /* MW 3 */
+ 13956 "10110110" // /* MW 2 */
+ 13957 "11111111" // /* MW 1 */
+ 13958 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13959 "11110001" // /* MW 3 */
+ 13960 "11110001" // /* MW 2 */
+ 13961 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467 first
+ 13962 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13963 "00000000" // /* MW 3 */
+ 13964 "00101000" // /* MW 2 */
+ 13965 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+.delay_slot
+ 13966 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13967 "00000001" // /* MW 5 */
+ 13968 "00000000" // /* MW 4 */
+ 13969 "00000000" // /* MW 3 */
+ 13970 "11110000" // /* MW 2 */
+ 13971 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13973 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13975 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13977 "00000000" // /* MW 1 */
+.delay_slot
+ 13978 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13979 "11000000" // /* MW 3 */
+ 13980 "01100010" // /* MW 2 */
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 13981 "00011111" // /* MW 1 */
+.label __Z13_b896_wrapperPPv___func_begin0
+.label _Z13_b896_wrapperPPv
+.function _b896_wrapper _Z13_b896_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 20 first
+.src_ref 0 "0_0_reloadable3.cc" 22 79
+.function_start
+ 13984 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13985 "11000000" // /* MW 3 */
+ 13986 "01100000" // /* MW 2 */
+ 13987 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 22 79 first
+ 13988 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13989 "00011110" // /* MW 3 */
+ 13990 "00011100" // /* MW 2 */
+ 13991 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 23 79 first
+ 13992 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13993 "10011110" // /* MW 3 */
+ 13994 "00101100" // /* MW 2 */
+ 13995 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 25 81 first
+ 13996 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13997 "10011110" // /* MW 3 */
+ 13998 "11110101" // /* MW 2 */
+ 13999 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 24 47 first
+ 14000 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14001 "00011110" // /* MW 3 */
+ 14002 "00000101" // /* MW 2 */
+ 14003 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 21 4 first
+.tail_call
+ 14004 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */
+ 14005 "00000000" // /* MW 5 */
+ 14006 "00000000" // /* MW 4 */
+ 14007 "01110000" // /* MW 3 */
+ 14008 "00001101" // /* MW 2 */
+ 14009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14013 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14015 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14017 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b896_wrapperPPv__end
+.label __Z13_b896_wrapperPPv___func_end0
+ 14019 "00000000" // /* MW 1 */
+.label __Z13_b901_wrapperPPv___func_begin0
+.label _Z13_b901_wrapperPPv
+.function _b901_wrapper _Z13_b901_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 29 first
+.src_ref 0 "0_0_reloadable3.cc" 31 79
+.function_start
+ 14032 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14033 "11000000" // /* MW 3 */
+ 14034 "01100000" // /* MW 2 */
+ 14035 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 31 79 first
+ 14036 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14037 "00011110" // /* MW 3 */
+ 14038 "00101100" // /* MW 2 */
+ 14039 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 33 81 first
+ 14040 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14041 "00011110" // /* MW 3 */
+ 14042 "11110101" // /* MW 2 */
+ 14043 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 32 47 first
+ 14044 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14045 "10011110" // /* MW 3 */
+ 14046 "00000100" // /* MW 2 */
+ 14047 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 30 4 first
+.tail_call
+ 14048 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */
+ 14049 "00000000" // /* MW 5 */
+ 14050 "00000000" // /* MW 4 */
+ 14051 "00011000" // /* MW 3 */
+ 14052 "00010000" // /* MW 2 */
+ 14053 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14055 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14057 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14059 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14061 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b901_wrapperPPv__end
+.label __Z13_b901_wrapperPPv___func_end0
+ 14063 "00000000" // /* MW 1 */
+.label __Z13_b906_wrapperPPv___func_begin0
+.label _Z13_b906_wrapperPPv
+.function _b906_wrapper _Z13_b906_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 37 first
+.src_ref 0 "0_0_reloadable3.cc" 39 79
+.function_start
+ 14064 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14065 "11000000" // /* MW 3 */
+ 14066 "01100000" // /* MW 2 */
+ 14067 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 39 79 first
+ 14068 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14069 "00011110" // /* MW 3 */
+ 14070 "00101100" // /* MW 2 */
+ 14071 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 41 81 first
+ 14072 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14073 "00011110" // /* MW 3 */
+ 14074 "11110101" // /* MW 2 */
+ 14075 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 40 47 first
+ 14076 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14077 "10011110" // /* MW 3 */
+ 14078 "00000100" // /* MW 2 */
+ 14079 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 38 4 first
+.tail_call
+ 14080 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */
+ 14081 "00000000" // /* MW 5 */
+ 14082 "00000000" // /* MW 4 */
+ 14083 "11001000" // /* MW 3 */
+ 14084 "00010001" // /* MW 2 */
+ 14085 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14087 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14089 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14091 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14093 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b906_wrapperPPv__end
+.label __Z13_b906_wrapperPPv___func_end0
+ 14095 "00000000" // /* MW 1 */
+.label __Z13_b881_wrapperPPv___func_begin0
+.label _Z13_b881_wrapperPPv
+.function _b881_wrapper _Z13_b881_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 45 first
+.src_ref 0 "0_0_reloadable3.cc" 47 79
+.function_start
+ 14096 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14097 "11000000" // /* MW 3 */
+ 14098 "01100000" // /* MW 2 */
+ 14099 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 47 79 first
+ 14100 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14101 "00011110" // /* MW 3 */
+ 14102 "00101100" // /* MW 2 */
+ 14103 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 49 81 first
+ 14104 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14105 "00011110" // /* MW 3 */
+ 14106 "11110101" // /* MW 2 */
+ 14107 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 48 47 first
+ 14108 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14109 "10011110" // /* MW 3 */
+ 14110 "00000100" // /* MW 2 */
+ 14111 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 46 4 first
+.tail_call
+ 14112 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */
+ 14113 "00000000" // /* MW 5 */
+ 14114 "00000000" // /* MW 4 */
+ 14115 "10001000" // /* MW 3 */
+ 14116 "00010100" // /* MW 2 */
+ 14117 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14119 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14121 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14123 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14125 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b881_wrapperPPv__end
+.label __Z13_b881_wrapperPPv___func_end0
+ 14127 "00000000" // /* MW 1 */
+.label __Z13_b891_wrapperPPv___func_begin0
+.label _Z13_b891_wrapperPPv
+.function _b891_wrapper _Z13_b891_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 53 first
+.src_ref 0 "0_0_reloadable3.cc" 55 79
+.function_start
+ 14128 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14129 "11000000" // /* MW 3 */
+ 14130 "01100000" // /* MW 2 */
+ 14131 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 55 79 first
+ 14132 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14133 "00011110" // /* MW 3 */
+ 14134 "00111100" // /* MW 2 */
+ 14135 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 56 47 first
+ 14136 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14137 "10011110" // /* MW 3 */
+ 14138 "11101100" // /* MW 2 */
+ 14139 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 58 81 first
+ 14140 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14141 "10011110" // /* MW 3 */
+ 14142 "00010101" // /* MW 2 */
+ 14143 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 57 80 first
+ 14144 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14145 "00011110" // /* MW 3 */
+ 14146 "00000101" // /* MW 2 */
+ 14147 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 54 4 first
+.tail_call
+ 14148 "10000100" // J #11488 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11488 delay_slots=5 */
+ 14149 "00000000" // /* MW 5 */
+ 14150 "00000000" // /* MW 4 */
+ 14151 "01110000" // /* MW 3 */
+ 14152 "00010110" // /* MW 2 */
+ 14153 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14155 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14157 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14159 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14161 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b891_wrapperPPv__end
+.label __Z13_b891_wrapperPPv___func_end0
+ 14163 "00000000" // /* MW 1 */
+.label __Z13_b919_wrapperPPv___func_begin0
+.label _Z13_b919_wrapperPPv
+.function _b919_wrapper _Z13_b919_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 62 first
+.src_ref 0 "0_0_reloadable3.cc" 64 79
+.function_start
+ 14176 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14177 "11000000" // /* MW 3 */
+ 14178 "01100000" // /* MW 2 */
+ 14179 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 64 79 first
+ 14180 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14181 "00011110" // /* MW 3 */
+ 14182 "00011100" // /* MW 2 */
+ 14183 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 65 79 first
+ 14184 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14185 "10011110" // /* MW 3 */
+ 14186 "00101100" // /* MW 2 */
+ 14187 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 67 81 first
+ 14188 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14189 "10011110" // /* MW 3 */
+ 14190 "11110101" // /* MW 2 */
+ 14191 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 66 47 first
+ 14192 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14193 "00011110" // /* MW 3 */
+ 14194 "00000101" // /* MW 2 */
+ 14195 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 63 4 first
+.tail_call
+ 14196 "10000100" // J #13504 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13504 delay_slots=5 */
+ 14197 "00000000" // /* MW 5 */
+ 14198 "00000000" // /* MW 4 */
+ 14199 "01100000" // /* MW 3 */
+ 14200 "00011010" // /* MW 2 */
+ 14201 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14203 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14205 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14207 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14209 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b919_wrapperPPv__end
+.label __Z13_b919_wrapperPPv___func_end0
+ 14211 "00000000" // /* MW 1 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 115 4 first
+.function_start
+ 14224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14225 "01000001" // /* MW 5 */
+ 14226 "10100000" // /* MW 4 */
+ 14227 "00101111" // /* MW 3 */
+ 14228 "11000000" // /* MW 2 */
+ 14229 "00000000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14231 "00011100" // /* MW 3 */
+ 14232 "11000110" // /* MW 2 */
+ 14233 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14235 "00011100" // /* MW 3 */
+ 14236 "11000110" // /* MW 2 */
+ 14237 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14239 "00011100" // /* MW 3 */
+ 14240 "11000110" // /* MW 2 */
+ 14241 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14243 "00011100" // /* MW 3 */
+ 14244 "11000110" // /* MW 2 */
+ 14245 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14247 "00011100" // /* MW 3 */
+ 14248 "11000110" // /* MW 2 */
+ 14249 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14251 "00011100" // /* MW 3 */
+ 14252 "11000110" // /* MW 2 */
+ 14253 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14255 "00011100" // /* MW 3 */
+ 14256 "11000110" // /* MW 2 */
+ 14257 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14259 "00011100" // /* MW 3 */
+ 14260 "11000110" // /* MW 2 */
+ 14261 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14263 "00011100" // /* MW 3 */
+ 14264 "11000110" // /* MW 2 */
+ 14265 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14267 "00011100" // /* MW 3 */
+ 14268 "11000110" // /* MW 2 */
+ 14269 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14271 "00011100" // /* MW 3 */
+ 14272 "11000110" // /* MW 2 */
+ 14273 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14275 "00011100" // /* MW 3 */
+ 14276 "11000110" // /* MW 2 */
+ 14277 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14279 "00011100" // /* MW 3 */
+ 14280 "11000110" // /* MW 2 */
+ 14281 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14283 "00011100" // /* MW 3 */
+ 14284 "11000110" // /* MW 2 */
+ 14285 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14287 "00011100" // /* MW 3 */
+ 14288 "11000110" // /* MW 2 */
+ 14289 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14291 "00011100" // /* MW 3 */
+ 14292 "11000110" // /* MW 2 */
+ 14293 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14295 "00011100" // /* MW 3 */
+ 14296 "11000110" // /* MW 2 */
+ 14297 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14299 "00011100" // /* MW 3 */
+ 14300 "11000110" // /* MW 2 */
+ 14301 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14303 "00011100" // /* MW 3 */
+ 14304 "11000110" // /* MW 2 */
+ 14305 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14307 "00011100" // /* MW 3 */
+ 14308 "11000110" // /* MW 2 */
+ 14309 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14311 "00011100" // /* MW 3 */
+ 14312 "11000110" // /* MW 2 */
+ 14313 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14315 "00011100" // /* MW 3 */
+ 14316 "11000110" // /* MW 2 */
+ 14317 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14319 "00011100" // /* MW 3 */
+ 14320 "11000110" // /* MW 2 */
+ 14321 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14323 "00011100" // /* MW 3 */
+ 14324 "11000110" // /* MW 2 */
+ 14325 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14327 "00011100" // /* MW 3 */
+ 14328 "11000110" // /* MW 2 */
+ 14329 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14331 "00011100" // /* MW 3 */
+ 14332 "11000110" // /* MW 2 */
+ 14333 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14335 "00011100" // /* MW 3 */
+ 14336 "11000110" // /* MW 2 */
+ 14337 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14339 "00011100" // /* MW 3 */
+ 14340 "11000110" // /* MW 2 */
+ 14341 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 119 first
+ 14342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 14343 "00000000" // /* MW 3 */
+ 14344 "00101000" // /* MW 2 */
+ 14345 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19 first
+.delay_slot
+ 14346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14347 "00011100" // /* MW 3 */
+ 14348 "11000110" // /* MW 2 */
+ 14349 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 14350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14351 "00011100" // /* MW 3 */
+ 14352 "11000110" // /* MW 2 */
+ 14353 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 14354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14355 "00011100" // /* MW 3 */
+ 14356 "11000110" // /* MW 2 */
+ 14357 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 14358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14359 "00011100" // /* MW 3 */
+ 14360 "11000110" // /* MW 2 */
+ 14361 "00010000" // /* MW 1 */
+.delay_slot
+ 14362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14363 "10100000" // /* MW 3 */
+ 14364 "10011111" // /* MW 2 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+ 14365 "00011000" // /* MW 1 */
+.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src"
+.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer"
+.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv"
+.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common"
+.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2"
+.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p"
+.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib"
+.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend"
+.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc"
+.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail"
+.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib"
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.cmico
new file mode 100644
index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.cmico
@@ -0,0 +1 @@
++Mdec
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.lst
new file mode 100644
index 0000000000000000000000000000000000000000..dbc805287627de5330bc9a4f15514421ea46bfa2
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.lst
@@ -0,0 +1,4814 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:02 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable3 me
+
+// Release: ipp V-2024.06-TGT-241219
+
+.text_segment PM 2352
+.entry_point
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function_start
+ 2352 0x00 0xc6 0xd1 0x21 0x41 0xd4 LDA r17, [p0]; MOV r2, r1
+ 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 2364 0xfe 0xf3 0xb0 0x00 0x2b 0xd0 0x70 0x02 ST p7, [sp, #-12]; MOV r1, r15
+ 2372 0xff 0x87 0xb0 0x01 0xe8 0x90 0x70 0x02 ST lr, [sp, #-4]; MOV r15, r2
+ 2380 0xff 0x06 0xb7 0xc1 0xe0 0x5c ST r1, [sp, #-8]; NEZ r16, r15
+ 2386 0x1e 0x98 0x20 0xf8 MOV r26, r16
+ 2390 0x00 0x00 NOPX
+ 2392 0x1f 0x68 0x82 0x18 ADD.NC p7, r17, #4
+ 2396 0x07 0x1e 0x36 0x98 LDA r17, [p7], #4
+ 2400 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12
+ 2404 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8
+ 2408 0x07 0x07 0x76 0x98 LDA r27, [p7]
+ 2412 0x00 0x00 NOPX
+ 2414 0x00 0x00 NOPX
+ 2416 0x00 0x00 NOPX
+ 2418 0x00 0x00 NOPX
+ 2420 0x00 0x00 NOPX
+ 2422 0x00 0x00 NOPX
+ 2424 0x14 0x63 0x32 0x18 SEL.EQZ r17, r17, r19, r27
+ 2428 0x0f 0xd6 0x31 0x98 ST r17, [p7, #-12]
+ 2432 0x17 0xe2 0xfd 0x18 MOVX r17, #-1
+ 2436 0x00 0x00 NOPX
+ 2438 0x00 0x00 NOPX
+ 2440 0x00 0x00 NOPX
+ 2442 0x14 0x97 0x18 0x18 ACQ.COND r18, r17, r26
+ 2446 0x10 0x24 0x09 0x18 MOVX r18, #2
+ 2450 0x14 0x29 0x2d 0x98 LSHL r20, r16, r18
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 2454 0x18 0x8a 0x20 0xf8 MOV dj0, r20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 2458 0x00 0x4e 0xdf 0xd8 0x8b 0x0c LDA r19, [p0, dj0]; ST dj0, [sp, #-20]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2464 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2466 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2468 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2470 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2472 0x10 0x26 0x05 0x18 MOVX r19, #1
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2476 0x14 0xf4 0xfc 0x98 LTU r26, r19, r15
+ 2480 0xfe 0x6a 0xb0 0x03 0xb4 0xc1 0x00 0x02 ST r26, [sp, #-16]; ADD.NC p7, r19, #4
+ 2488 0x07 0x1e 0x76 0x98 LDA r19, [p7], #4
+ 2492 0x07 0x3e 0xb6 0x98 LDA r21, [p7], #12
+ 2496 0x07 0xee 0x96 0x98 LDA r20, [p7], #-8
+ 2500 0x07 0x07 0x76 0x98 LDA r27, [p7]
+ 2504 0x00 0x00 NOPX
+ 2506 0x00 0x00 NOPX
+ 2508 0x00 0x00 NOPX
+ 2510 0x00 0x00 NOPX
+ 2512 0x00 0x00 NOPX
+ 2514 0x00 0x00 NOPX
+ 2516 0x14 0xe7 0x52 0x18 SEL.EQZ r19, r19, r21, r27
+ 2520 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12]
+ 2524 0x00 0x00 NOPX
+ 2526 0x00 0x00 NOPX
+ 2528 0x00 0x00 NOPX
+ 2530 0x00 0x00 NOPX
+ 2532 0x15 0x17 0x18 0x18 ACQ.COND r20, r17, r26
+ 2536 0x10 0x23 0x2d 0x98 LSHL r17, r0, r18
+ 2540 0x18 0x88 0xa0 0xf8 MOV dj0, r17
+ 2544 0x00 0x07 0xce 0xc9 0x00 0x44 MOVXM p7, #509056
+ 2550 0xe0 0x13 0xdf 0xd4 0x2b 0x0c LDA p1, [p7, dj0]; ST r16, [sp, #-24]
+ 2556 0x00 0x00 NOPX
+ 2558 0x00 0x00 NOPX
+ 2560 0x00 0x00 NOPX
+ 2562 0x00 0x00 NOPX
+ 2564 0x00 0x00 NOPX
+ 2566 0x00 0x00 NOPX
+.no_stack_arguments
+ 2568 0x10 0x30 0x40 0x18 JL p1
+.delay_slot
+ 2572 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 2576 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2578 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2580 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2582 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM
+.return_address
+ 2592 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1
+ 2598 0x07 0xeb 0x51 0x18 LDA r26, [sp, #-24]
+ 2602 0x07 0xec 0x41 0x18 LDA dj0, [sp, #-20]
+ 2606 0x07 0xf0 0x29 0x18 LDA el0, [sp, #-16]
+ 2610 0x00 0x00 NOPX
+ 2612 0x00 0x00 NOPX
+ 2614 0x00 0x00 NOPX
+ 2616 0x19 0x68 0x88 0x18 ADD.NC p1, r17, #16
+ 2620 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 2624 0x00 0x00 NOPX
+ 2626 0x00 0x00 NOPX
+ 2628 0x00 0x00 NOPX
+ 2630 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 2632 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 2634 0x1e 0xa0 0x1c 0xf8 MOV r26, el0
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2638 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26
+ 2642 0x3e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p1, #-4]; MOV r27, r15
+ 2648 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0]
+ 2652 0x00 0x00 NOPX
+ 2654 0x00 0x00 NOPX
+ 2656 0x00 0x00 NOPX
+ 2658 0x00 0x00 NOPX
+ 2660 0x00 0x00 NOPX
+ 2662 0x14 0x27 0x11 0x98 SUB r19, r16, r17
+ 2666 0x8c 0x66 0x40 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16
+ 2672 0x00 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p1, #-4]
+ 2678 0x00 0x00 NOPX
+ 2680 0x00 0x00 NOPX
+ 2682 0x00 0x00 NOPX
+ 2684 0x00 0x00 NOPX
+ 2686 0x00 0x00 NOPX
+ 2688 0x00 0x00 NOPX
+ 2690 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26
+ 2694 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+ 2698 0x00 0xf6 0x36 0x98 LDA r17, [p0, #-4]
+ 2702 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12]
+ 2706 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8]
+ 2710 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+ 2716 0x00 0x00 NOPX
+ 2718 0x00 0x00 NOPX
+ 2720 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 2724 0x1e 0xe0 0x1c 0xf8 MOV r27, el0
+.delay_slot
+ 2728 0x14 0x21 0x11 0x98 SUB r16, r16, r17
+.delay_slot
+ 2732 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+.delay_slot
+ 2736 0x08 0xf6 0x11 0x98 ST r16, [p0, #-4]
+.delay_slot
+.swstall delay_slot
+ 2740 0x00 0x00 NOPX
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+
+.text_segment PM 2752
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.function_start
+ 2752 0x03 0x85 0xd0 0x00 0x40 0x88 0x49 0x60 0x78 0xba LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1
+ 2762 0x03 0x81 0xd0 0x3e 0x57 0xe9 0x30 0x82 0x48 0xba LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9
+ 2772 0xff 0x81 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r1, #-4; PADDXM [sp], #64
+ 2782 0x01 0x86 0x07 0xfd 0xb5 0x81 0x00 0x28 0x00 0x10 0x58 0x76 MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16
+ 2794 0x00 0x63 0x07 0xf9 0xd5 0xbf 0x57 0xaa 0x88 0x0f 0x58 0x76 MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15
+ 2806 0xfe 0xbe 0xb0 0x60 0x02 0x5c ST r15, [sp, #-12]; MOVX r24, #0
+ 2812 0x00 0x00 NOPX
+ 2814 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 2818 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 2822 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4
+ 2826 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4
+ 2830 0x00 0x00 NOPX
+ 2832 0x00 0x00 NOPX
+ 2834 0x00 0x00 NOPX
+ 2836 0x00 0x00 NOPX
+ 2838 0x00 0x00 NOPX
+ 2840 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 2844 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 2848 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4
+ 2852 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4
+ 2856 0x00 0x00 NOPX
+ 2858 0x00 0x00 NOPX
+ 2860 0x00 0x00 NOPX
+ 2862 0x00 0x00 NOPX
+ 2864 0x00 0x00 NOPX
+ 2866 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 2870 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 2874 0x00 0x04 0x0e 0x98 LDA eh0, [p0]
+ 2878 0x00 0x14 0x2e 0x98 LDA el0, [p0, #4]
+ 2882 0x00 0x00 NOPX
+ 2884 0x00 0x00 NOPX
+ 2886 0x00 0x00 NOPX
+ 2888 0x00 0x00 NOPX
+ 2890 0x00 0x00 NOPX
+ 2892 0x09 0x04 0x09 0x98 ST eh0, [p1]
+ 2896 0x09 0x14 0x29 0x98 ST el0, [p1, #4]
+ 2900 0x02 0xdd 0xaa 0x98 LDA.u8 r13, [p2], #-3
+ 2904 0x02 0x1e 0x2a 0x98 LDA.u8 r17, [p2], #1
+ 2908 0x02 0xbd 0xca 0x98 LDA.u8 r14, [p2], #-5
+ 2912 0x02 0xfd 0xfa 0x98 LDA.u16 r15, [p2], #-2
+ 2916 0x02 0x0a 0x6a 0x98 LDA.u8 r19, [p2], m0
+ 2920 0x02 0xac 0xea 0x98 LDA.u8 r7, [p2], #-6
+ 2924 0x00 0x00 NOPX
+ 2926 0x13 0x42 0x1d 0x98 LSHL r1, r13, r1
+ 2930 0x0c 0x20 0xf9 0x31 0x01 0x24 EQ r16, r1, r16; ADD.NC r18, r17, #1
+ 2936 0x14 0xa4 0x5d 0x98 LSHL r18, r18, r5
+ 2940 0x13 0xf6 0x47 0x98 EQ r27, r15, r4
+ 2944 0xc1 0x4a 0x40 0xb7 0x39 0xe4 SEL.EQZ r5, r24, r5, r27; MOV eh0, r27
+ 2950 0x14 0x7b 0x22 0x18 SEL.EQZ r29, r17, r18, r27
+ 2954 0x11 0xcc 0x67 0x98 EQ r6, r7, r6
+ 2958 0x11 0xb7 0x04 0x98 AND r27, r6, r16
+ 2962 0x7b 0xeb 0xbc 0xbb 0x41 0xe4 LSHL r15, r15, r21; MOV r25, r27
+ 2968 0xfd 0xbe 0xb3 0x9b 0x04 0x5c ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27
+ 2974 0xc8 0x05 0xf8 0x40 0x01 0x84 JNZ r25, #3056
+.delay_slot
+ 2980 0x11 0xb6 0x47 0x98 EQ r27, r6, r4
+.delay_slot
+ 2984 0x13 0x71 0x44 0x98 AND r24, r13, r20
+.delay_slot
+ 2988 0x14 0xfc 0x5d 0x98 LSHL r30, r19, r5
+.delay_slot
+ 2992 0x16 0xe8 0x4d 0x98 LSHL r20, r27, r4
+.delay_slot
+ 2996 0x11 0x8c 0x32 0x18 SEL.EQZ r6, r6, r3, r27
+ 3000 0xd8 0x05 0xf8 0x40 0x01 0x84 JNZ r27, #3056
+.delay_slot
+.swstall delay_slot
+ 3006 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3008 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3010 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3012 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3014 0x00 0x00 NOPX
+ 3016 0x00 0x2f 0x00 0x00 0x01 0x84 0x00 0x00 0x20 0xba MOVA r15, #1; J #3104
+.delay_slot
+ 3026 0x00 0x1a 0x00 0x3e 0x57 0xab 0x88 0x0c 0x58 0xba MOVA r26, #0; MOVX r5, #-3; MOV r28, #12
+.delay_slot
+ 3036 0x05 0x42 0x21 0x20 0x41 0x64 MOVX r21, #4; MOV r2, #16
+.delay_slot
+ 3042 0x10 0x1a 0x0d 0x18 MOVX r13, #3
+.delay_slot
+ 3046 0x10 0x0e 0x3d 0x18 MOVX r7, #15
+.delay_slot
+ 3050 0x00 0x2c 0xff 0x91 0xe2 0x2c NOPA; MOVX r4, #-4
+.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ 3056 0x02 0x02 0x81 0x82 0x0b 0x01 0x50 0x88 0x8f 0xfc 0x58 0x76 MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4
+ 3068 0x20 0x18 0xe0 0x01 0xa0 0x0b 0x88 0x0c 0x58 0xba ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12
+ 3078 0x02 0x02 0x00 0x3e 0x57 0xa9 0xe8 0x01 0x58 0xba MOVA r2, #16; MOVX r5, #-3; MOV r15, #1
+ 3088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x71 0xe9 0xa8 0x03 0x58 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352
+ 3104 0x5d 0xc5 0x50 0x1b 0xb3 0x3c 0x00 0x3c 0x58 0xba LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60
+ 3114 0x41 0x05 0x50 0x03 0x2d 0x12 0x87 0xcd 0x58 0xba LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51
+ 3124 0x00 0x57 0x00 0x3b 0xda 0x91 0x80 0x37 0x58 0xba MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55
+ 3134 0x01 0x03 0x00 0x2b 0xb0 0x3d 0x07 0xbc 0x58 0xba MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68
+ 3144 0x40 0x10 0x00 0x1f 0x6c 0x6c 0x80 0x70 0x58 0xba MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112
+ 3154 0xb5 0x92 0x08 0x1e 0x5d 0x64 EXTEND.u8 r22, r22; MOV m4, #-105
+ 3160 0xfe 0x5a 0xb0 0x2d 0x61 0x6f 0x80 0x31 0x59 0x3a ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49
+ 3170 0xf7 0xba 0x3c 0x1f 0x05 0x64 SUB r30, r30, r29; MOV m6, #-63
+ 3176 0x13 0xc2 0x11 0x98 SUB r1, r15, r1
+ 3180 0x8f 0xc3 0xf0 0xa0 0x1d 0x64 MUL r31, r17, r1; MOV r1, #7
+ 3186 0x16 0xa3 0x21 0x98 SUB r17, r26, r18
+ 3190 0x17 0xfe 0x1d 0x98 LSHL r31, r31, r1
+ 3194 0x55 0x7e 0x30 0x3b 0xf1 0xee 0x80 0x57 0x59 0x3a ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87
+ 3204 0x4d 0x55 0x50 0x2f 0x30 0x3d 0x87 0xb2 0x58 0xba LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3214 0xfd 0x4e 0xb9 0xcc 0x7b 0x5c ST r19, [sp, #-24]; LSHL r19, r19, r3
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3220 0x49 0x54 0xe0 0x3f 0x6b 0x2d 0x00 0xf6 0x58 0xba ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3230 0x45 0x43 0x50 0x27 0x38 0x10 0x87 0x50 0x58 0xba LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3240 0x15 0xfe 0x67 0x98 EQ r31, r23, r6
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3244 0x10 0xe0 0x67 0x98 EQ r16, r3, r6
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3248 0x17 0xf7 0x05 0x98 OR r27, r31, r16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3252 0x11 0xeb 0x54 0x98 AND r21, r7, r21
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3256 0xf7 0xa3 0xd8 0xa0 0x61 0x64 ASHL r30, r30, r17; MOV r17, #24
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3262 0xfc 0x42 0xb0 0x1f 0x29 0x6f 0xcf 0x80 0x49 0x3a ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1
+ 3272 0x43 0xea 0x3f 0x46 0x3b 0x5c ST r26, [p2], #4; LSHL r17, r30, r17
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3278 0x51 0x6a 0x30 0x02 0x00 0xa8 0x50 0x02 ST r26, [p2], m4; MOV m4, #168
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3286 0x5d 0x49 0x57 0xe7 0xf5 0xa7 0xb0 0x2c 0x0d 0xce 0x78 0x76 LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3298 0x16 0xe3 0x15 0x98 OR r17, r27, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3302 0x16 0xb7 0x81 0x98 SUB r27, r26, r24
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3306 0x14 0xb0 0x90 0x18 EXTEND.u8 r24, r18
+ 3310 0x00 0x00 NOPX
+ 3312 0x00 0x00 NOPX
+ 3314 0x00 0x00 NOPX
+ 3316 0x13 0xe5 0x21 0x98 SUB r18, r15, r18
+ 3320 0x0a 0xca 0x51 0x98 ST r18, [p2], m6
+ 3324 0x02 0xaa 0x4a 0x98 LDA.u8 r18, [p2], m5
+ 3328 0x00 0x00 NOPX
+ 3330 0x00 0x00 NOPX
+ 3332 0x00 0x00 NOPX
+ 3334 0x00 0x00 NOPX
+ 3336 0x00 0x00 NOPX
+ 3338 0x00 0x00 NOPX
+ 3340 0x14 0xa4 0xe1 0x98 SUB r18, r18, r14
+ 3344 0x14 0xa5 0xbe 0x98 ASHL r18, r18, r27
+ 3348 0x14 0xa4 0x2d 0x98 LSHL r18, r18, r2
+ 3352 0x00 0x01 0x0d 0xa0 0x00 0x44 MOVXM r27, #65536
+ 3358 0x16 0xe5 0x20 0x98 ADD r18, r27, r18
+ 3362 0x00 0xff 0x0d 0xa0 0x00 0x44 MOVXM r27, #16711680
+ 3368 0xde 0xe4 0x99 0x3f 0xc1 0x64 AND r27, r27, r18; MOV r18, #-16
+ 3374 0xde 0xe2 0xb8 0xbf 0xe1 0x64 OR r27, r27, r17; MOV r17, #-8
+ 3380 0x43 0xee 0x39 0xce 0x3b 0x5c ST r27, [p2], #4; LSHL r19, r19, r17
+ 3386 0x16 0xb5 0x31 0x98 SUB r26, r26, r19
+ 3390 0x15 0x29 0xad 0x98 LSHL r20, r20, r26
+ 3394 0x13 0xb5 0x65 0x98 OR r26, r14, r22
+ 3398 0x4d 0x6a 0x3f 0x69 0x20 0x5c ST r26, [p2], m3; EXTEND.u8 r26, r30
+ 3404 0x49 0x65 0x50 0x37 0x49 0x6f 0xce 0xa8 0xa8 0xba LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3414 0xb5 0xa3 0xb8 0xa3 0xf9 0x64 LSHL r22, r22, r17; MOV r17, #254
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3420 0x45 0x64 0xed 0x6b 0x1f 0x2c ST.s8 r25, [p2], m1; MUL r26, r26, r24
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3426 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3428 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3430 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3432 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3434 0x86 0x5f 0xbd 0xb5 0xca 0xa4 LSHL r25, r16, r15; ADD.NC r27, r21, r25
+ 3440 0xf8 0x06 0xf8 0x40 0x01 0x84 JNZ r31, #3568
+.delay_slot
+ 3446 0x9d 0x41 0xed 0xbb 0xf2 0xa4 ADD r21, r19, #3; ADD.NC r27, r27, r30
+.delay_slot
+ 3452 0x16 0xeb 0x5d 0x98 LSHL r21, r27, r21
+.delay_slot
+ 3456 0x16 0x63 0x14 0x98 AND r17, r25, r17
+.delay_slot
+ 3460 0x51 0x46 0x30 0x0d 0xbe 0x3e 0x28 0x01 0x59 0x3a ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1
+.delay_slot
+ 3470 0x18 0x9b 0x9c 0xf8 MOV el1, r27
+ 3474 0x07 0xe3 0x91 0x18 LDA r28, [sp, #-32]
+ 3478 0x00 0x00 NOPX
+ 3480 0x00 0x00 NOPX
+ 3482 0x00 0x00 NOPX
+ 3484 0x00 0x00 NOPX
+ 3486 0x00 0x00 NOPX
+ 3488 0x00 0x00 NOPX
+ 3490 0xe0 0x06 0xf8 0x40 0x01 0x84 JNZ r28, #3568
+.delay_slot
+.swstall delay_slot
+ 3496 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3498 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3500 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3502 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3504 0x00 0x00 NOPX
+ 3506 0x00 0xbc 0x00 0x01 0x10 0x8b 0x28 0x40 0x58 0xba MOVA r28, #5; MOVX r17, #4; MOV r25, #64
+ 3516 0x14 0x7e 0xd2 0x18 SEL.EQZ r31, r17, r13, r27
+ 3520 0x16 0x76 0x67 0x98 EQ r27, r25, r6
+ 3524 0xff 0x38 0x4f 0xa0 0x01 0x64 SEL.EQZ r28, r31, r28, r27; MOV r31, #0
+ 3530 0x10 0x32 0x50 0x18 EXTEND.s8 r25, r0
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3534 0x16 0x7d 0xef 0x98 MUL r30, r25, r30
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3538 0xce 0xe3 0x5d 0xc4 0x39 0xe4 LT r27, r25, r17; MOV r27, el1
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3544 0x13 0xe3 0x82 0x18 SEL.EQZ r17, r15, r24, r27
+ 3548 0x14 0x63 0xef 0x98 MUL r17, r17, r30
+ 3552 0x17 0xf9 0xc1 0x98 SUB r28, r31, r28
+ 3556 0x14 0x63 0xce 0x98 ASHL r17, r17, r28
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3560 0x00 0x23 0x14 0x81 0x00 0x00 0x1c 0x22 EXTEND.u8 r17, r17; NOPV
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3568 0x00 0x19 0x00 0x3f 0xc7 0xeb 0x70 0x0e 0x78 0xba MOVA r25, #0; MOVX r28, #-1; MOV r27, el0
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3578 0x16 0x7f 0xc2 0x18 SEL.EQZ r31, r25, r28, r27
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3582 0xfd 0x6e 0x20 0x21 0x04 0x83 0x4f 0x74 0xa8 0xba LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3592 0x09 0x1e 0x00 0x29 0x44 0x83 0xa8 0x09 0x58 0xba MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3602 0x17 0x73 0xe2 0x18 SEL.EQZ r25, r29, r30, r27
+ 3606 0x15 0xf9 0x88 0x98 NE r28, r23, r24
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 3610 0x17 0x7b 0x3d 0x98 LSHL r29, r29, r19
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 3614 0xfd 0xde 0x20 0x00 0x00 0x03 0x0a 0x04 0x10 0xba LDA r23, [sp, #-20]; MOVXM r24, #1032
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3624 0xcc 0xe7 0xbf 0x3a 0xff 0x24 LSHL r19, r25, r19; ADD.NC r30, r26, #-1
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3630 0x14 0xcf 0xe6 0x18 MAC r7, r7, r19, r30
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3634 0xb4 0xd2 0x0b 0xa8 0x29 0x64 EXTEND.u8 r19, r22; MOV r23, #522
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3640 0xbd 0xb0 0x4d 0x21 0x01 0x64 SEL.EQZ r22, r23, r24, r27; MOV r26, #64
+ 3646 0x31 0xb5 0x1d 0xc2 0x39 0xe4 NE r6, r6, r26; MOV r27, eh0
+ 3652 0x11 0xcf 0x24 0x98 AND r7, r7, r18
+ 3656 0xbd 0xde 0x4d 0xa6 0x41 0xe4 SEL.EQZ r23, r23, r15, r27; MOV r27, r6
+ 3662 0x29 0x08 0x49 0x20 0x7d 0x64 SEL.EQZ r4, r5, r4, r27; MOV r18, #31
+ 3668 0x15 0xef 0x24 0x98 AND r23, r23, r18
+ 3672 0xdc 0x1e 0x00 0x20 0x42 0x6e 0x4f 0x70 0x58 0xba MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144
+ 3682 0x17 0xbd 0x22 0x18 SEL.EQZ r30, r30, r18, r27
+ 3686 0x12 0x05 0x00 0x2f 0xa9 0xfe 0x09 0x20 0x58 0xba MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288
+ 3696 0x14 0x20 0x52 0x18 SEL.EQZ r16, r16, r5, r27
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 3700 0x8e 0x09 0xfd 0xbd 0xf2 0xa4 MUL r24, r17, r4; ADD.NC r27, r29, r30
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 3706 0x84 0x3f 0xbd 0xc4 0x39 0xe4 LSHL r16, r16, r31; MOV r27, el1
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3712 0xfb 0xee 0xb7 0x6b 0x5f 0x5c ST r27, [sp, #-36]; MUL r26, r14, r26
+ 3718 0x10 0x84 0x32 0x18 SEL.EQZ r2, r2, r3, r27
+ 3722 0x13 0x7e 0x0c 0x98 LTU r31, r13, r0
+ 3726 0x15 0x31 0x8f 0x98 MUL r24, r20, r24
+ 3730 0x17 0xf7 0xc5 0x98 OR r27, r31, r28
+ 3734 0x10 0x03 0x07 0xee 0x95 0xb7 0xc0 0xee 0x89 0x00 0x58 0x76 MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256
+ 3746 0x1d 0x28 0x40 0xb7 0x39 0xe4 SEL.EQZ r20, r3, r20, r27; MOV eh0, r27
+ 3752 0x00 0x00 0x0f 0xac 0x0c 0x44 MOVXM r31, #1542
+ 3758 0xfd 0x12 0xb0 0x1f 0xb0 0x3c 0x89 0x3f 0xc9 0x3a ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1
+ 3768 0xed 0x8c 0x82 0x1c 0x91 0xad 0xff 0x92 0xcc 0x7f 0xc8 0x76 MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1
+ 3780 0x4d 0x5a 0x30 0x2b 0x57 0xef 0x70 0x8e 0x79 0x3a ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0
+ 3790 0x02 0xd9 0x02 0x1f 0x51 0xa9 0x4e 0x0e 0x00 0x58 0x58 0x76 MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 3802 0x51 0x62 0x3f 0xe3 0x24 0x5c ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 3808 0xfc 0x6e 0x22 0xef 0x91 0xab 0xce 0x0f 0x69 0x90 0x78 0x76 LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3820 0x22 0xf3 0x00 0x29 0xce 0x12 0x8c 0xff 0xc8 0xba MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3830 0xf7 0x1d 0x00 0x3b 0xea 0x73 0x70 0x8e 0x78 0xba MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3840 0xfc 0xee 0x2e 0xca 0x44 0x2c LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3846 0x04 0x1f 0x00 0x3f 0x39 0x93 0x69 0x90 0x78 0xba MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3856 0x17 0xc4 0x22 0x18 SEL.EQZ r2, r31, r2, r27
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3860 0x10 0xeb 0x51 0x98 SUB r21, r3, r21
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3864 0x47 0x8e 0x30 0x04 0x27 0x90 0x6f 0xc0 0x59 0x3a ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3874 0x17 0x38 0x32 0x18 SEL.EQZ r28, r28, r3, r27
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 3878 0x15 0xfe 0x7f 0x98 MUL r31, r23, r7
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 3882 0xfb 0xc6 0x2e 0x0c 0x64 0x2c LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3888 0x5d 0x8e 0x30 0x35 0x29 0x7c 0x80 0x28 0x59 0x3a ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3898 0x45 0x56 0x31 0x0d 0xe4 0x5c ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3904 0xfe 0x02 0x20 0x06 0x38 0xfe 0xa9 0xfc 0xa8 0xba LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3914 0xfd 0xc6 0x22 0x1f 0x11 0x80 0x05 0x06 0x06 0x7a LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3924 0x4f 0xce 0x30 0x00 0x00 0x3e 0x6f 0xf8 0x11 0x3a ST r19, [p2], #28; MOVXM r19, #65520
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3934 0x43 0xd2 0x30 0x3f 0x49 0xa4 0x4b 0xbf 0xc9 0x3a ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3944 0x43 0xc6 0x31 0x56 0x9c 0x5c ST r17, [p2], #4; MSC r21, r21, r2, r20
+ 3950 0x43 0x8a 0x3f 0x7a 0x81 0x5c ST r2, [p2], #4; ADD r30, r30, r20
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 3956 0x43 0xfa 0x38 0x73 0xe3 0x5c ST r30, [p2], #4; SUB r28, r16, r31
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 3962 0x43 0xd6 0x30 0x2d 0xf8 0x30 0x60 0x00 0x59 0x3a ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3972 0x43 0x8c 0x30 0x3e 0x20 0x7e 0x2c 0x7f 0xc9 0x3a ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1
+ 3982 0x43 0x8c 0x38 0xb8 0x0c 0x5c ST dc0, [p2], #4; MAC r14, r14, r17, r0
+ 3988 0x43 0xda 0x30 0x27 0x01 0x24 0x48 0x00 0x59 0x3a ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0
+ 3998 0x43 0xf2 0x30 0x05 0x1f 0x8f 0x70 0x0e 0x79 0x3a ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0
+ 4008 0x43 0x92 0x3b 0xb9 0xdf 0x5c ST r4, [p2], #4; MUL r14, r23, r14
+ 4014 0x43 0xc6 0x30 0x21 0x0f 0x8c 0x08 0x06 0x59 0x3a ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 4024 0x09 0x00 0x02 0x1e 0x11 0x9b 0x00 0x13 0x69 0x90 0x78 0x76 MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 4036 0xfd 0x16 0x20 0x14 0xa4 0x2c LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4042 0x11 0x63 0xaf 0x98 MUL r17, r5, r26
+ 4046 0x43 0x8c 0x30 0x07 0x08 0x6d 0x07 0xc8 0x59 0x3a ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56
+ 4056 0xfc 0x63 0x02 0x48 0x61 0xa0 0xf7 0xed 0xa8 0xc1 0xc8 0x76 MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7
+ 4068 0x41 0x0a 0x36 0xc0 0x7b 0x5c ST r2, [p2], m0; LSHL r16, r13, r3
+ 4074 0x43 0xda 0x38 0x8e 0x41 0x5c ST r22, [p2], #4; ADD r3, r17, r18
+ 4080 0x43 0xca 0x38 0xc8 0x9c 0x5c ST r18, [p2], #4; MSC r18, r18, r17, r4
+ 4086 0x43 0x92 0x32 0x94 0xdb 0x5c ST r4, [p2], #4; LSHL r5, r5, r6
+ 4092 0x43 0x8e 0x30 0x1a 0x38 0x04 0x0f 0xfd 0x59 0x3a ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3
+ 4102 0x10 0xc0 0x0e 0x98 ASHL r0, r3, r0
+ 4106 0x43 0xca 0x37 0x10 0x1f 0x5c ST r18, [p2], #4; MUL r4, r14, r0
+ 4112 0x43 0x8c 0x30 0x0c 0x3b 0x5c ST dc0, [p2], #4; LSHL r3, r0, r1
+ 4118 0xff 0xb6 0x22 0x1c 0x61 0x80 0x03 0xc6 0x31 0xfa LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3
+ 4128 0xff 0x3a 0x22 0x1c 0x91 0xba 0x70 0x30 0x28 0x3f 0xc8 0x76 LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1
+ 4140 0xfe 0xbe 0x22 0x1c 0x31 0x80 0x01 0x41 0xaf 0xfa LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26
+ 4150 0x43 0x8e 0x30 0x50 0x00 0x5c ST r3, [p2], #4; RET lr
+.delay_slot
+ 4156 0x0a 0x5c 0xf1 0x98 ST r7, [p2], #20
+.delay_slot
+ 4160 0x0a 0x1c 0x11 0x98 ST r0, [p2], #4
+.delay_slot
+ 4164 0x0a 0x1c 0x51 0x98 ST r2, [p2], #4
+.delay_slot
+ 4168 0x0a 0x04 0x51 0x98 ST r2, [p2]
+.delay_slot
+ 4172 0x42 0x8a 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r2, [p2, #4]; PADDXM [sp], #-64
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0
+
+.text_segment PM 4192
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.function_start
+ 4192 0x00 0x03 0x82 0x84 0x8b 0x01 0x80 0x08 0x0a 0x60 0x78 0x76 MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2
+ 4204 0x00 0x06 0x88 0x28 0x28 0x34 0x01 0x36 0x00 0x21 0x20 0x09 0x60 0x7e MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4
+ 4218 0x63 0x94 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA dn1, [p3], #4; MOVXM p4, #509032
+ 4228 0x63 0x90 0xd0 0x00 0x00 0x04 0x78 0x78 0x10 0xba LDA m1, [p3], #4; MOVXM ls, #4336
+ 4238 0x60 0x80 0xd0 0x00 0x00 0x05 0xb8 0x90 0x10 0xba LDA m0, [p3]; MOVXM le, #4384
+ 4248 0x7a 0x82 0xd1 0x00 0x01 0x54 LDA r0, [p3, #-12]; MOV dj0, #0
+ 4254 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4]
+ 4258 0x00 0x00 NOPX
+ 4260 0x00 0x00 NOPX
+ 4262 0x00 0x0a 0x80 0x85 0x01 0xf4 VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1
+ 4268 0x3e 0x30 0x14 0x18 VLDB.POP.512.2D x0, [p0, lf0, r24, d1]
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4272 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4276 0x00 0x0a 0x8a 0xe0 0xfd 0x34 VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4282 0xc6 0x02 0x80 0xf5 0x00 0x1c VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4288 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4292 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4294 0x00 0x2c 0xf0 0x00 0x54 0x00 0x01 0xa5 0x7e 0xba NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4304 0x00 0x2c 0xfc 0x60 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV
+.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4336 0x00 0x2c 0xf8 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4352 0x00 0x2c 0xf0 0x00 0xad 0x80 0x03 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4368 0x00 0x2c 0xfc 0x60 0x29 0x00 0x03 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV
+.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4384 0x00 0x2c 0xf0 0x00 0x23 0x00 0x03 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4400 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4402 0x0d 0x80 0x03 0x18 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4406 0x20 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4414 0x18 0x81 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x0
+ 4418 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0]
+ 4422 0xb0 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1
+ 4430 0x20 0x00 0x60 0x00 0x40 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0
+ 4438 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0]
+ 4442 0xb0 0x00 0x60 0x50 0x00 0x5c VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr
+.delay_slot
+ 4448 0x09 0x00 0x03 0x18 VST.FLUSH.512.CONV [p2, sf, r26]
+.delay_slot
+ 4452 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0]
+.delay_slot
+.swstall delay_slot
+ 4456 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4458 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4460 0x00 0x00 NOPX
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0
+
+.text_segment PM 4464
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.function_start
+ 4464 0xf5 0xe0 0x86 0x3f 0x20 0x00 0x80 0x00 0x00 0x0e 0x91 0x11 0x60 0x7e MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128
+ 4478 0x00 0x73 0x07 0xf1 0x95 0xbf 0xc5 0x0a 0x2b 0x60 0x78 0x76 MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3
+ 4490 0x00 0x19 0x07 0xda 0x35 0x81 0x10 0x29 0x34 0x47 0x08 0x76 MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28
+ 4502 0x40 0xca 0xd7 0xf5 0x35 0x80 0x40 0x03 0xa8 0x00 0x10 0x76 LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216
+ 4514 0x0b 0x18 0x87 0xfd 0xd5 0x80 0x7f 0xff 0xef 0xff 0x90 0x76 MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431
+ 4526 0x00 0xb4 0x07 0xe1 0xb5 0x81 0x61 0x0a 0x07 0xec 0x58 0x76 MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20
+ 4538 0x01 0x95 0x07 0xed 0xf5 0x87 0x77 0xca 0x87 0xc4 0x58 0x76 MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60
+ 4550 0xff 0x73 0xb0 0x03 0x80 0x40 0x50 0x02 ST p7, [sp, #-8]; MOV m7, #64
+ 4558 0x0f 0xe4 0x3d 0x98 ST lr, [sp, #-28]
+ 4562 0x00 0x00 NOPX
+ 4564 0x17 0x59 0x20 0x98 ADD r12, r29, r18
+ 4568 0x41 0x32 0x36 0x77 0x9b 0x5c ST r12, [p2], m0; LSHL r29, r12, r28
+ 4574 0x5b 0xf9 0x5e 0xf2 0x2f 0x2c LDA.u8 r30, [p2], #-3; EQ r28, r29, r17
+ 4580 0x02 0xc9 0x2a 0x98 LDA.u8 r9, [p2], m6
+ 4584 0x00 0x00 NOPX
+ 4586 0x00 0x00 NOPX
+ 4588 0x00 0x00 NOPX
+ 4590 0x00 0x00 NOPX
+ 4592 0x00 0x00 NOPX
+ 4594 0x17 0x77 0xec 0x98 LTU r27, r29, r30
+ 4598 0x16 0x5d 0x32 0x18 SEL.EQZ r14, r25, r19, r27
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4602 0x17 0xf6 0xcc 0x98 LTU r27, r31, r12
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4606 0x51 0x70 0xee 0xb7 0xcf 0x2c ST.s8 r28, [p2], m4; EQ r13, r29, r30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4612 0x13 0x7f 0x1d 0x98 LSHL r31, r13, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4616 0x16 0x58 0xe2 0x18 SEL.EQZ r12, r25, r14, r27
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4620 0x17 0xf9 0xc5 0x98 OR r28, r31, r28
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4624 0x8e 0xfd 0x9e 0x3c 0x62 0xa4 LTU r27, r17, r30; ADD.NC r28, r28, r12
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4630 0x16 0x79 0xc2 0x18 SEL.EQZ r28, r25, r28, r27
+ 4634 0x14 0x7f 0xcc 0x98 LTU r31, r17, r28
+ 4638 0x55 0x7e 0x3e 0xf7 0xd1 0x5c ST r31, [p2], m5; NE r29, r29, r30
+ 4644 0x5d 0x79 0x54 0xb2 0x31 0x2c LDA.u8 r30, [p2], m7; NE r12, r9, r17
+ 4650 0x00 0x00 NOPX
+ 4652 0x00 0x00 NOPX
+ 4654 0x00 0x00 NOPX
+ 4656 0x00 0x00 NOPX
+ 4658 0x00 0x00 NOPX
+ 4660 0x00 0x00 NOPX
+ 4662 0xf5 0xad 0x1f 0xbe 0xfc 0x24 NE r22, r30, r22; ADD.NC r31, r30, #-4
+ 4668 0x60 0x09 0x40 0x40 0x01 0x84 JNZ r12, #4736
+.delay_slot
+ 4674 0x17 0x93 0x48 0x98 NE r9, r30, r20
+.delay_slot
+ 4678 0x17 0xfe 0x90 0x18 EXTEND.u8 r31, r31
+.delay_slot
+ 4682 0x12 0x6d 0x64 0x98 AND r22, r9, r22
+.delay_slot
+ 4686 0x17 0xef 0x7c 0x98 LTU r23, r31, r23
+.delay_slot
+ 4690 0x15 0xe1 0x64 0x98 AND r16, r23, r22
+ 4694 0xe8 0x09 0x40 0x40 0x01 0x84 JNZ r29, #4736
+.delay_slot
+ 4700 0x0f 0xeb 0x1d 0x98 ST p6, [sp, #-24]
+.delay_slot
+.swstall delay_slot
+ 4704 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4706 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4708 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4710 0x00 0x00 NOPX
+ 4712 0x00 0x3b 0x00 0x00 0x02 0x56 0x00 0x00 0x20 0xba MOVA r27, #1; J #4784
+.delay_slot
+ 4722 0x18 0x19 0x9c 0xf8 MOV el0, r25
+.delay_slot
+ 4726 0x10 0x26 0x05 0x18 MOVX r19, #1
+.delay_slot
+.swstall delay_slot
+ 4730 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4732 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4734 0x00 0x00 NOPX
+.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ 4736 0x00 0x95 0x07 0xeb 0x1d 0xab 0xbf 0x3c 0x0c 0xce 0x78 0x76 MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25
+ 4748 0x17 0xab 0x5d 0x98 LSHL r21, r30, r21
+ 4752 0x15 0x6b 0x92 0x18 SEL.EQZ r21, r21, r25, r27
+ 4756 0x14 0xf7 0xe7 0x98 EQ r27, r19, r30
+ 4760 0xac 0xf2 0x4d 0xb0 0x41 0xe4 SEL.EQZ r19, r21, r25, r27; MOV r27, r16
+ 4766 0x16 0x67 0x32 0x18 SEL.EQZ r19, r25, r19, r27
+ 4770 0x17 0x29 0x44 0x98 AND r20, r28, r20
+ 4774 0x15 0x36 0xf0 0x18 NEZ r27, r20
+ 4778 0x00 0x2c 0xf9 0xcf 0x8b 0x2c NOPA; OR r19, r19, r28
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320
+ 4784 0x01 0x90 0x82 0x6f 0x71 0xba 0x02 0x5c 0x10 0x00 0x60 0x76 MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832
+.delay_slot
+ 4796 0x02 0x8a 0x67 0x18 ST.s8 r19, [p2], m4
+.delay_slot
+.swstall delay_slot
+ 4800 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4802 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4804 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4806 0x00 0x00 NOPX
+ 4808 0x00 0xff 0xfa 0x3f 0xfe 0x44 MOVXM r20, #16777215
+ 4814 0x14 0xa5 0x44 0x98 AND r18, r18, r20
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 4818 0x00 0x2c 0xf6 0xec 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r18, [p3, #28]; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4832 0x51 0xd2 0xd0 0x27 0x44 0x82 0xcf 0xfd 0x58 0xba LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 4842 0x00 0x52 0x00 0x29 0x5f 0xfa 0x00 0x24 0x58 0xba MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4852 0x51 0x5a 0xd7 0xd0 0x2d 0xab 0x6b 0x26 0x07 0xcc 0x58 0x76 LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4864 0x51 0x5e 0xd7 0xde 0xd5 0xbf 0x37 0xea 0x00 0xc4 0x58 0x76 LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196
+ 4876 0x02 0xff 0xb6 0x98 LDA r29, [p2], #-4
+ 4880 0x02 0x8b 0xf6 0x98 LDA r31, [p2], m4
+ 4884 0x01 0x06 0xb6 0x98 LDA r21, [p1]
+ 4888 0x00 0xd2 0xda 0x26 0x5b 0x2c LDA r20, [p0]; LSHL r9, r20, r18
+ 4894 0x04 0x07 0xd6 0x98 LDA r30, [p4]
+ 4898 0x15 0xad 0x2d 0x98 LSHL r22, r22, r18
+ 4902 0x00 0x00 NOPX
+ 4904 0x17 0x67 0x3e 0x98 ASHL r19, r29, r19
+ 4908 0x17 0xe3 0x18 0x98 NE r17, r31, r17
+ 4912 0x88 0x09 0xd0 0x40 0x01 0x84 JNZ r17, #5024
+.delay_slot
+ 4918 0xbd 0xa5 0xba 0xb5 0xb2 0xa4 LSHL r22, r23, r18; ADD.NC r21, r21, r22
+.delay_slot
+ 4924 0x9d 0x65 0xb0 0x95 0xb2 0xa4 LSHL r21, r19, r18; ADD.NC dn0, r21, r22
+.delay_slot
+ 4930 0xfa 0x84 0xb0 0x01 0xca 0x68 0xa0 0x02 ST dn0, [sp, #-44]; ADD.NC r14, r9, r20
+.delay_slot
+ 4938 0x1b 0xd0 0x80 0xf8 MOV r15, dn0
+.delay_slot
+ 4942 0x1e 0x6a 0xf9 0x58 ADD.NC p6, r21, r30
+ 4946 0x00 0x07 0xce 0xc8 0xd0 0x44 MOVXM p7, #509032
+ 4952 0xe0 0xc4 0x50 0xb4 0x80 0x2c LDA.s8 r17, [p7]; MOVX vaddSign0, #1
+ 4958 0x00 0x00 NOPX
+ 4960 0xff 0x7f 0x0a 0x20 0x00 0x44 MOVXM r20, #-8454144
+ 4966 0x18 0x02 0x91 0x78 VINSERT.32 x0, x0, #0, r20
+ 4970 0x1d 0x15 0xe0 0xf8 MOV r20, sp
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 4974 0x1f 0x6a 0x5f 0x18 ADD.NC p7, r20, #-66
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 4978 0xe0 0xc6 0xe0 0x01 0x25 0xd4 ST.s16 r17, [p7]; VMOV bmll0, x0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4984 0x14 0x7a 0x80 0x18 MOVX crRnd, r17
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4988 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4992 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4994 0x1c 0x41 0x01 0xb8 VEXTRACT.16 r17, x0, #0, vaddSign0
+ 4998 0x00 0x00 NOPX
+ 5000 0x00 0x00 NOPX
+ 5002 0x07 0x06 0x32 0x98 LDA.s16 r17, [p7]
+ 5006 0x00 0x00 NOPX
+ 5008 0x00 0x00 NOPX
+ 5010 0x00 0x00 NOPX
+ 5012 0x00 0x00 NOPX
+ 5014 0x00 0x00 NOPX
+ 5016 0x00 0x00 NOPX
+ 5018 0x00 0x2c 0xff 0xa4 0x6b 0x0c NOPA; ST r17, [sp, #-48]
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5024 0x0b 0x90 0x81 0x8e 0x0b 0x00 0x01 0xf1 0xb2 0x34 0x10 0x76 MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5036 0x51 0x45 0x50 0x84 0x8b 0x33 0x19 0x92 0x68 0x0b 0x58 0x76 LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5048 0x61 0x96 0x00 0x39 0xb9 0x65 0xaa 0x60 0x78 0xba MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5058 0x03 0x06 0x67 0x18 ST.s8 r19, [p3]
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5062 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 5064 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5070 0x14 0x6b 0x2d 0x98 LSHL r21, r17, r18
+.delay_slot
+ 5074 0x1f 0x6a 0xf9 0x58 ADD.NC p7, r21, r30
+.delay_slot
+ 5078 0x16 0x63 0x11 0x98 SUB r17, r25, r17
+.delay_slot
+ 5082 0x8c 0x65 0xba 0x2c 0x35 0x64 LSHL r17, r17, r18; MOV r20, #781
+.delay_slot
+ 5088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2c 0x9a 0x11 0x8b 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV
+.return_address
+ 5104 0x07 0xd4 0x99 0x18 LDA p1, [sp, #-44]
+.no_stack_arguments
+ 5108 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192
+.delay_slot
+.swstall delay_slot
+ 5114 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5116 0x00 0x00 NOPX
+.delay_slot
+ 5118 0x1b 0x56 0x90 0x18 ADD.NC r13, r13, #32
+.delay_slot
+ 5122 0x1a 0x66 0xa0 0xf8 MOV p2, r13
+.delay_slot
+ 5126 0x00 0x2c 0xf0 0x8f 0x0b 0x00 0x00 0x00 0x00 0x7a NOPA; MOVS p0, r15; NOPX
+.return_address
+ 5136 0xd6 0x9a 0x80 0x01 0x37 0xea 0x33 0x63 0x08 0xba MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116
+ 5146 0x83 0x84 0xd0 0x34 0x62 0x2c LDA dn0, [p4], #4; MOVX r13, #12
+ 5152 0x04 0x1c 0x46 0x98 LDA dj0, [p4], #4
+ 5156 0x04 0x1e 0x26 0x98 LDA dn4, [p4], #4
+ 5160 0x04 0x1e 0x46 0x98 LDA dj4, [p4], #4
+ 5164 0x04 0x1c 0x06 0x98 LDA m0, [p4], #4
+ 5168 0x04 0x1c 0x66 0x98 LDA dc0, [p4], #4
+ 5172 0x04 0x1e 0x66 0x98 LDA dc4, [p4], #4
+ 5176 0x04 0x1e 0xd6 0x98 LDA r22, [p4], #4
+ 5180 0x04 0x1e 0x36 0x98 LDA r17, [p4], #4
+ 5184 0x04 0x1f 0x96 0x98 LDA r28, [p4], #4
+ 5188 0x04 0x1e 0xb6 0x98 LDA r21, [p4], #4
+ 5192 0x04 0x1e 0xf6 0x98 LDA r23, [p4], #4
+ 5196 0x04 0x1d 0x9e 0x98 LDA p3, [p4], #4
+ 5200 0x04 0x1d 0x26 0x98 LDA dn2, [p4], #4
+ 5204 0x04 0x1c 0xa6 0x98 LDA dn1, [p4], #4
+ 5208 0x04 0x1c 0xc6 0x98 LDA dj1, [p4], #4
+ 5212 0x04 0x1e 0xa6 0x98 LDA dn5, [p4], #4
+ 5216 0x04 0x1f 0xd6 0x98 LDA r30, [p4], #4
+ 5220 0x04 0x1f 0xb6 0x98 LDA r29, [p4], #4
+ 5224 0x04 0x1c 0xe6 0x98 LDA dc1, [p4], #4
+ 5228 0x04 0xc2 0x4a 0x98 LDA.u8 r18, [p4, dj6]
+ 5232 0x07 0xd2 0x91 0x18 LDA r20, [sp, #-48]
+ 5236 0x04 0x04 0x56 0x98 LDA r2, [p4]
+ 5240 0x00 0x00 NOPX
+ 5242 0x00 0x00 NOPX
+ 5244 0x00 0x00 NOPX
+ 5246 0x00 0x00 NOPX
+ 5248 0x14 0xe7 0x2c 0x98 LTU r19, r19, r18
+ 5252 0x98 0x0c 0x10 0x40 0x01 0x84 JNZ r19, #6176
+.delay_slot
+ 5258 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.delay_slot
+ 5264 0x02 0x05 0xa7 0x18 ST.s8 r13, [p2]
+.delay_slot
+ 5268 0x1c 0xd1 0x72 0xf8 VBCST.16 x9, r20
+.delay_slot
+.swstall delay_slot
+ 5272 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5274 0x00 0x00 NOPX
+ 5276 0xfb 0x43 0x20 0x1b 0xb9 0x3f 0x80 0x84 0x58 0xba LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132
+ 5286 0x00 0x13 0x00 0x3d 0x20 0x0a 0x00 0x3c 0x58 0xba MOVA r19, #0; MOVX r18, #-128; MOV m4, #60
+ 5296 0xf8 0x14 0x80 0x01 0xa0 0x0b 0xe4 0xd0 0x78 0xba MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19
+ 5306 0xef 0x98 0x82 0x1c 0x4b 0x1b 0xd4 0x01 0xa7 0xc0 0x78 0x76 MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5318 0xfa 0x96 0x26 0x1c 0x4b 0x01 0xf7 0x89 0xe8 0x07 0x58 0x76 LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5330 0xfb 0xca 0x20 0x00 0x00 0x05 0x32 0xa0 0x10 0xba LDA r18, [sp, #-36]; MOVXM p2, #5440
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5340 0xfc 0x36 0x20 0x34 0x69 0x12 0x8b 0x0c 0x58 0xba LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780
+ 5350 0xfc 0x87 0x29 0xd7 0x20 0x01 0x90 0x0b 0x08 0x00 0x58 0xb6 LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0
+ 5362 0x04 0x88 0x16 0x98 LDA r0, [p4], m4
+ 5366 0x04 0xab 0x26 0x98 LDA dn6, [p4], m5
+ 5370 0x04 0x2f 0x76 0x98 LDA r27, [p4], #8
+ 5374 0x04 0x1e 0x86 0x98 LDA m5, [p4], #4
+ 5378 0x04 0x8a 0xc6 0x98 LDA dj5, [p4], m4
+ 5382 0x04 0x9e 0x06 0x98 LDA m4, [p4], #-28
+ 5386 0x04 0x1c 0x36 0x98 LDA r1, [p4], #4
+ 5390 0x99 0x02 0xdd 0x06 0x02 0x94 LDA r0, [p4], m6; ADD.NC dj6, r6, r0
+ 5396 0x04 0x14 0x76 0x98 LDA r3, [p4, #4]
+ 5400 0x04 0x04 0x96 0x98 LDA r4, [p4]
+ 5404 0x19 0xda 0x00 0xf8 MOV r7, m5
+ 5408 0x1a 0x83 0x99 0x58 ADD.NC dj2, r7, r6
+ 5412 0x1c 0x1b 0x00 0xf8 MOV r16, dj5
+ 5416 0x1a 0x0d 0x99 0x58 ADD.NC m2, r27, r6
+ 5420 0x1e 0x03 0xe0 0x18 ADD.NC m6, r7, #-64
+ 5424 0x18 0xff 0xee 0x10 0xc0 0x24 ADD r3, r3, #-1; ADD.NC m7, r16, #-64
+ 5430 0x00 0x2c 0xf0 0x00 0x10 0x00 0x82 0x80 0x7e 0xba NOPA; NOPB; MOV m1, dj2
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976
+.loop_nesting 1
+ 5440 0xc3 0x85 0x71 0x85 0x0b 0x04 0xe7 0xec 0x33 0x90 0x78 0x76 VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14
+ 5452 0x22 0x81 0x78 0x28 0x2b 0x0e 0x4b 0x02 0x33 0x98 0xa0 0xf6 VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12
+ 5464 0xa0 0x39 0x78 0x28 0x2f 0x5a 0x4b 0x03 0xc6 0x80 0x70 0xf6 VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6
+ 5476 0xd9 0x0d 0x74 0x03 0x2b 0x53 0x0b 0x01 0x82 0x00 0x70 0xf6 VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2
+ 5488 0x71 0x41 0x74 0x12 0xd4 0x01 0xc0 0x00 0x5e 0xba VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0
+ 5498 0xc3 0x95 0x78 0x28 0x28 0x00 0x00 0x05 0xbb 0x40 0x10 0xb6 VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760
+ 5510 0xdd 0x1d 0x78 0x28 0x28 0x00 0x00 0x04 0x7b 0x28 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712
+ 5522 0x80 0xb5 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24]
+ 5528 0xc3 0xa5 0x78 0x22 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]
+ 5534 0xd9 0x2d 0x78 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5540 0x22 0x81 0x78 0x28 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5546 0x83 0xbd 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5552 0x80 0xcd 0x74 0x11 0x14 0x02 0x9a 0xc3 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5562 0x80 0xc5 0x78 0x28 0x2c 0x98 0x8b 0x01 0x9a 0xc1 0xe0 0xf6 VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5574 0x02 0x81 0x73 0x00 0x54 0x1d 0x48 0x14 0xe9 0x4a VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5584 0xcf 0x35 0x76 0x94 0x96 0x00 0x00 0x5c 0x58 0x07 0x49 0x2c 0xe9 0x6e VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5598 0x82 0xbd 0x7a 0x38 0x96 0x00 0x00 0x4c 0x90 0x3e 0x4a 0x55 0x09 0x6e VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5612 0x71 0x01 0x74 0x98 0x96 0x00 0x00 0x54 0x90 0x1e 0xf8 0x60 0x3d 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5626 0x22 0x81 0x70 0x04 0xf9 0x64 0x3d 0x62 VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5634 0xa0 0x09 0x70 0x04 0xfa 0x88 0x3d 0x62 VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5642 0x71 0x01 0x70 0x04 0x4b 0x6d 0x09 0x62 VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5650 0x22 0x81 0x74 0x01 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5656 0x3c 0x11 0x14 0x18 VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5660 0xa0 0x09 0x78 0x28 0x2d 0x72 0x7d 0x82 0xfb 0x8c 0x3d 0x66 VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5672 0x71 0x01 0x74 0x14 0x14 0x1d 0xa0 0x06 0x29 0x4a VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5682 0x22 0x81 0x74 0x01 0x28 0x00 0x00 0x58 0xaa 0x0f 0xa2 0x46 0x09 0x4e VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5696 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248
+.loop_nesting 2
+.begin_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5712 0xa0 0x09 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x01 0x49 0x01 0xed 0x1b 0x50 0x4b VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5728 0x71 0x01 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x00 0x31 0x4b VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5744 0x22 0x81 0x74 0x01 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x12 0x30 0x4b VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296
+.end_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5760 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20
+.loop_nesting 1
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5776 0xa0 0x09 0x7c 0xbc 0x96 0x00 0x00 0x54 0x90 0x1e 0xa3 0x6a 0x09 0x6e VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5790 0x71 0x01 0x7e 0x1c 0x96 0x00 0x00 0x7c 0x38 0x07 0xa0 0x06 0x29 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5804 0x61 0x91 0x61 0x55 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5814 0x6a 0xc1 0x61 0x92 0x07 0xc4 0xa1 0x2a 0x29 0x4a MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5824 0xeb 0x81 0x62 0x92 0x03 0xc4 0xa3 0x6a 0x09 0x4a MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5834 0xb3 0x91 0x6f 0x57 0x22 0x8f 0x00 0xe6 0xa0 0x06 0x29 0x66 PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5846 0x93 0x91 0x62 0x06 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5856 0x02 0x92 0x03 0xc6 0xa1 0x2a 0x29 0x62 VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5864 0x01 0x92 0x07 0xc6 0xa3 0x6a 0x09 0x62 VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20
+ 5872 0x1f 0x8b 0x00 0xf8 MOV dj7, dj5
+ 5876 0x03 0x0b 0xa0 0xe6 0xa1 0x2a 0x29 0x62 MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20
+ 5884 0x03 0x88 0xa0 0xe6 0xa0 0x06 0x29 0x62 MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20
+ 5892 0x00 0xf7 0x23 0x05 0x00 0xe6 0xa3 0x6a 0x09 0x4a PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20
+ 5902 0x71 0x89 0x6e 0xd7 0x25 0x82 0xa0 0xe6 0xa2 0x46 0x09 0x66 PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20
+ 5914 0x62 0x89 0x60 0x03 0xc5 0x80 0x70 0x02 MOVS dc3, dc5; MOV dj7, dj5
+ 5922 0xa0 0x41 0x60 0x01 0x81 0x00 0x70 0x02 MOVS dc5, r2; MOV m3, m1
+ 5930 0xb2 0x12 0xc0 0x00 0x87 0x50 0x70 0x02 VCONV.bf16.fp32 x11, cml1; MOV m1, r29
+ 5938 0xa2 0x02 0xc0 0x02 0xc7 0x90 0x70 0x02 VCONV.bf16.fp32 x10, cml0; MOV dj5, r30
+ 5946 0x13 0x91 0x61 0x3b 0x90 0x01 0xc8 0x60 0x76 0xba PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0
+ 5956 0x62 0x0a 0xc0 0x00 0x83 0x00 0x70 0x02 VCONV.bf16.fp32 x6, cmh0; MOV m1, m3
+ 5964 0x52 0x22 0xc0 0x57 0x20 0x24 0x02 0xfa 0x00 0x00 0x60 0x36 PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096
+.delay_slot
+ 5976 0x72 0x1a 0xc0 0x00 0xa9 0x60 0x70 0x02 VCONV.bf16.fp32 x7, cmh1; MOV r5, p1
+.delay_slot
+ 5984 0x82 0x32 0xc0 0x03 0xa7 0xc0 0x70 0x02 VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7
+.delay_slot
+ 5992 0x12 0x3a 0xc5 0x2b 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5
+.delay_slot
+ 6002 0x22 0x2a 0xc0 0x02 0xc2 0x80 0x70 0x02 VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2
+.delay_slot
+ 6010 0xe1 0x89 0x60 0x00 0x4d 0xc0 0x70 0x02 MOVS dc7, dc3; MOV r2, dc5
+ 6018 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9
+ 6022 0x1b 0xbc 0xec 0xf8 VMAX_LT.bf16 x7, r16, x7, x9
+ 6026 0x3c 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9
+ 6034 0xa2 0xba 0x60 0x01 0xda 0x76 0x70 0x02 VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9
+ 6042 0x20 0xd2 0x60 0x00 0x02 0xfe 0x00 0x00 0x21 0x3a VST x10, [p1]; J #6128
+.delay_slot
+ 6052 0x22 0xba 0x60 0x02 0xa2 0x76 0x70 0x02 VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9
+.delay_slot
+ 6060 0x1b 0x8c 0xec 0xf8 VMAX_LT.bf16 x7, r16, x1, x9
+.delay_slot
+ 6064 0x00 0xd2 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9
+.delay_slot
+ 6072 0x02 0xba 0x60 0x00 0x8a 0x76 0x70 0x02 VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9
+.delay_slot
+ 6080 0x00 0x2c 0xf0 0x00 0x24 0xa2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632
+ 6096 0x09 0xe0 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p1, dj7]
+ 6100 0x0d 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p5, #64]
+ 6104 0x09 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p1]
+ 6108 0x09 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p1, #64]
+ 6112 0x08 0x06 0x13 0x18 VST x8, [p0]
+ 6116 0x08 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p0, #64]
+ 6120 0x94 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664
+ 6128 0xe2 0x92 0x6f 0x57 0x20 0x06 0x35 0x01 0x40 0x00 0x58 0x36 PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0
+.delay_slot
+ 6140 0x1b 0x44 0x80 0xf8 MOV dn3, dn2
+.delay_slot
+ 6144 0x1a 0x49 0xa0 0xf8 MOV dn2, r19
+.delay_slot
+ 6148 0xeb 0x72 0x05 0x1e 0x01 0xf4 PADDB.3D [p7], d2; MOV dj2, dj7
+.delay_slot
+ 6154 0x1a 0x4e 0x80 0xf8 MOV dn2, dn7
+.delay_slot
+.swstall delay_slot
+ 6158 0x00 0x00 NOPX
+.loop_nesting 0
+ 6160 0x00 0x0d 0x58 0x00 0x00 0x84 J #6832
+.delay_slot
+.swstall delay_slot
+ 6166 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6168 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6170 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6172 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6174 0x00 0x00 NOPX
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712
+ 6176 0xfb 0x7e 0x22 0x0c 0x8b 0x04 0xe1 0x08 0xb3 0x90 0x78 0x76 LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14
+ 6188 0x07 0x90 0x82 0x56 0x0b 0x1b 0xd4 0x03 0x62 0x40 0x78 0x76 MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2
+ 6200 0x07 0x94 0x00 0x19 0x31 0x89 0x05 0xd0 0x78 0xba MOVA r20, #60; MOVX r19, #780; MOV m2, r23
+ 6210 0xef 0x98 0x86 0x5c 0x0b 0x01 0x20 0xca 0xc7 0x90 0x78 0x76 MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30
+ 6222 0xfa 0x83 0x25 0x02 0x0b 0x01 0x90 0x08 0x87 0x50 0x78 0x76 LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29
+ 6234 0xfb 0xd6 0x20 0x01 0x80 0x0b 0x45 0x50 0x78 0xba LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21
+ 6244 0xfc 0x36 0x20 0x00 0x00 0x05 0x34 0x58 0x10 0xba LDA r13, [sp, #-32]; MOVXM p2, #6320
+ 6254 0xfc 0x87 0x26 0xdf 0x72 0x94 LDA lr, [sp, #-28]; ADD.NC p3, r31, r14
+ 6260 0x03 0x1d 0xc6 0x98 LDA dj3, [p3], #4
+ 6264 0x03 0x8a 0x06 0x98 LDA m4, [p3], m4
+ 6268 0x03 0x9e 0x86 0x98 LDA m5, [p3], #-28
+ 6272 0x03 0x1e 0xd6 0x98 LDA r22, [p3], #4
+ 6276 0x03 0xca 0xf6 0x98 LDA r23, [p3], m6
+ 6280 0x03 0x17 0xb6 0x98 LDA r29, [p3, #4]
+ 6284 0x03 0x07 0x96 0x98 LDA r28, [p3]
+ 6288 0x00 0x00 NOPX
+ 6290 0x1f 0x98 0x00 0xf8 MOV r30, m4
+ 6294 0x1e 0x07 0x00 0xf8 MOV m6, dj3
+ 6298 0x1f 0xdc 0x00 0xf8 MOV r31, m6
+ 6302 0x1b 0x0f 0xe0 0x18 ADD.NC m3, r31, #-64
+ 6306 0xef 0x7f 0xee 0x1e 0xc0 0x24 ADD r29, r29, #-1; ADD.NC m7, r30, #-64
+ 6312 0x00 0x2b 0x60 0x03 0xc7 0x90 0x70 0x02 NOPS; MOV dj7, r30
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856
+.loop_nesting 1
+ 6320 0xc3 0x85 0x7a 0x28 0x28 0x00 0x00 0x8f 0x96 0x02 0x71 0x81 0x60 0x7e VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496
+ 6334 0xcd 0x0d 0x7a 0x28 0x28 0x00 0x00 0x05 0xbc 0xc8 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544
+ 6346 0x02 0x81 0x76 0x05 0x28 0x05 0xe9 0x6e 0xbf 0x3f 0x48 0xb6 VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3
+ 6358 0x55 0x59 0x73 0x01 0x14 0x01 0x47 0x90 0x7e 0xba VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30
+ 6368 0xc3 0x95 0x76 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25]
+ 6374 0xdd 0x1d 0x7a 0x21 0xa8 0x3c VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0]
+ 6380 0xc3 0xa5 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25]
+ 6386 0xcd 0x2d 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]
+ 6392 0xc3 0xb5 0x76 0x00 0xa8 0x3c VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25]
+ 6398 0xdd 0x3d 0x76 0x03 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25]
+ 6404 0x68 0x45 0x76 0x03 0xa8 0x3c VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25]
+ 6410 0x68 0x4d 0x75 0x12 0x14 0x01 0x69 0x2d 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22
+ 6420 0x02 0x81 0x75 0x14 0x14 0x02 0xa9 0x2f 0xee 0xba VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23
+ 6430 0x55 0x01 0x7a 0x28 0x2a 0x11 0xdb 0xc2 0x48 0x0b 0x69 0x66 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9
+ 6442 0x02 0x81 0x75 0x11 0xdf 0xc2 0x49 0x35 0x69 0x4a VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9
+ 6452 0x4a 0x49 0x69 0x48 VMAC.f dm2, dm2, ex4, ex11, r9
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 6456 0x4b 0x75 0x69 0x48 VMAC.f dm3, dm3, ex10, ex11, r9
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 6460 0x06 0x00 0xaa 0x8b 0x5f 0xc6 0xa1 0x84 0x3d 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6470 0x03 0x01 0x94 0x00 0xa0 0x80 0x3d 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6478 0x03 0x01 0xd4 0x00 0xa2 0x88 0x3d 0x62 VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6486 0x55 0x01 0x75 0x12 0x14 0x1d 0xa3 0x8c 0x3d 0x4a VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032
+.loop_nesting 2
+.begin_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6496 0xa2 0x82 0x82 0x16 0xb7 0xb4 VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6502 0x0a 0x28 0x2a 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x4a VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6512 0x06 0x00 0xa9 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6522 0x03 0x01 0x94 0x00 0x9b 0x68 0x09 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6530 0x02 0x81 0x76 0x03 0xa8 0x00 0x00 0x00 0x05 0x6c 0x9a 0x46 0x09 0x6e VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080
+.end_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6544 0x55 0x01 0x7a 0x24 0x28 0x01 0x5b 0x00 0x00 0x01 0x45 0xaf 0xe8 0x00 0x00 0xe1 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV
+.loop_nesting 1
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6560 0x07 0x0c 0xff 0x97 0x25 0x9c 0x8b 0x00 0x85 0xad 0xe0 0xf6 PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6572 0x93 0x91 0x6f 0x17 0x22 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x66 PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6584 0x73 0x91 0x6f 0x97 0x21 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x66 PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6596 0x02 0x88 0xa0 0xe6 0x9b 0x68 0x09 0x62 MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6604 0x02 0xb7 0x20 0x9b 0x80 0xe6 0x9a 0x46 0x09 0x4a PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19
+ 6614 0x19 0x0b 0x5b 0xd8 VSHUFFLE ex2, ex1, ex6, r22
+ 6618 0x1a 0x8b 0x5f 0xd8 VSHUFFLE ex5, ex1, ex6, r23
+ 6622 0x01 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x62 VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19
+ 6630 0x02 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x62 VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19
+ 6638 0x9a 0x46 0x09 0x48 VMAC.f dm2, dm2, ex3, ex0, r19
+ 6642 0x9b 0x68 0x09 0x48 VMAC.f dm3, dm3, ex4, ex0, r19
+ 6646 0x00 0x00 NOPX
+ 6648 0x00 0x00 NOPX
+ 6650 0x0d 0x10 0x16 0x18 VCONV.bf16.fp32 x10, cml0
+ 6654 0x0d 0x90 0x96 0x18 VCONV.bf16.fp32 x11, cml1
+ 6658 0x12 0x1a 0xc0 0x2a 0x03 0x4e 0x00 0x00 0x61 0x3a VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768
+.delay_slot
+ 6668 0x0b 0x10 0x56 0x18 VCONV.bf16.fp32 x6, cmh0
+.delay_slot
+ 6672 0x09 0x11 0x96 0x18 VCONV.bf16.fp32 x2, cml3
+.delay_slot
+ 6676 0x0b 0x91 0xd6 0x18 VCONV.bf16.fp32 x7, cmh3
+.delay_slot
+ 6680 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2
+.delay_slot
+ 6684 0x0c 0x11 0x56 0x18 VCONV.bf16.fp32 x8, cmh2
+ 6688 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9
+ 6692 0x18 0x8c 0xec 0xf8 VMAX_LT.bf16 x1, r16, x1, x9
+ 6696 0xac 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9
+ 6704 0x82 0x8a 0x60 0x00 0x5a 0x76 0x70 0x02 VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9
+ 6712 0xa0 0xd2 0x60 0x00 0x03 0x52 0x00 0x00 0x21 0x3a VST x10, [p5]; J #6800
+.delay_slot
+ 6722 0xa2 0x8a 0x60 0x02 0x8a 0x76 0x70 0x02 VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9
+.delay_slot
+ 6730 0x18 0xbc 0xec 0xf8 VMAX_LT.bf16 x1, r16, x7, x9
+.delay_slot
+ 6734 0x6c 0x52 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9
+.delay_slot
+ 6742 0x00 0x2c 0xf7 0x14 0x53 0x02 0x22 0x76 0x72 0xba NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9
+.delay_slot
+ 6752 0x00 0x2c 0xf0 0x00 0x24 0xe2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304
+ 6768 0x0d 0x60 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p5, dj3]
+ 6772 0x0c 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p4, #64]
+ 6776 0x0d 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p5]
+ 6780 0x0d 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p5, #64]
+ 6784 0x0b 0x61 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p3, dj3]
+ 6788 0x0f 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p7, #64]
+ 6792 0x9c 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336
+ 6800 0x62 0xc2 0x6e 0xf5 0x40 0x5c VST x8, [p3, #64]; JNZD r29, r29, p2
+.delay_slot
+ 6806 0x3f 0x8b 0x90 0x18 PADDB [p7], m4
+.delay_slot
+.swstall delay_slot
+ 6810 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6812 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6814 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6816 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368
+.loop_nesting 0
+ 6832 0x07 0xed 0xf1 0x18 LDA r15, [sp, #-20]
+ 6836 0x07 0xf1 0x91 0x18 LDA r12, [sp, #-16]
+ 6840 0x07 0xf5 0x31 0x18 LDA r9, [sp, #-12]
+ 6844 0x07 0xeb 0x19 0x18 LDA p6, [sp, #-24]
+ 6848 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+ 6852 0x07 0xfd 0xd1 0x18 LDA r14, [sp, #-4]
+ 6856 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 6860 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 6866 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6868 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6870 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6872 0x00 0x00 NOPX
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0
+
+.text_segment PM 6880
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 6880 0x00 0x20 0x00 0x00 0x01 0xf2 0x32 0x20 0x10 0xba MOVA r0, #1; MOVXM p4, #508992
+ 6890 0x80 0xc2 0xd0 0x00 0x10 0x08 0x4b 0xd0 0x78 0xba LDA r16, [p4]; MOVX r1, #0; MOV r2, r15
+ 6900 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 6906 0x0f 0xf0 0x55 0x98 ST r2, [sp, #-16]
+ 6910 0x00 0x00 NOPX
+ 6912 0x00 0x00 NOPX
+ 6914 0x00 0x00 NOPX
+ 6916 0x00 0x00 NOPX
+ 6918 0x80 0x0d 0xd8 0x40 0x01 0x84 JNZ r16, #7088
+.delay_slot
+ 6924 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8]
+.delay_slot
+ 6928 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4]
+.delay_slot
+ 6932 0x0f 0xed 0x9d 0x98 ST p3, [sp, #-20]
+.delay_slot
+ 6936 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 6940 0x00 0x07 0xc7 0xab 0x80 0x44 MOVXM r15, #509376
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 6946 0xd0 0x91 0x60 0x00 0x01 0xf3 0xb2 0x34 0x11 0x3a MOVS p6, p1; MOVXM p7, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 6956 0xe0 0xc0 0xe1 0x8f 0x0b 0x00 0x01 0xf3 0xb2 0x32 0x10 0x76 ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6968 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6970 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6972 0x00 0x05 0x60 0x00 0x01 0x04 JL #2752
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6978 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6980 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 6984 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 6988 0xe0 0xc2 0x30 0x03 0xb0 0x60 0x70 0x02 ST r16, [p7]; MOV p7, p0
+.delay_slot
+ 6996 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x32 0x60 0x70 0xf6 NOPA; NOPB; NOPS; MOV p0, p2
+.return_address
+ 7008 0x1a 0x67 0x85 0x98 ADD.NC p2, r15, #11
+ 7012 0x4f 0xc1 0x50 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA.u8 r16, [p2], #7; MOVXM p1, #508996
+ 7022 0x43 0xcf 0x50 0x00 0x01 0xf0 0x32 0x30 0x10 0xba LDA.u16 r19, [p2], #2; MOVXM p0, #509024
+ 7032 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2]
+ 7036 0x00 0x00 NOPX
+ 7038 0x02 0x16 0x5a 0x98 LDA.u16 r18, [p2, #2]
+ 7042 0x00 0x00 NOPX
+ 7044 0x00 0x00 NOPX
+ 7046 0x20 0xc2 0x30 0x00 0xb6 0x60 0x70 0x02 ST r16, [p1]; MOV p1, p6
+ 7054 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16
+ 7058 0x00 0x00 NOPX
+ 7060 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+ 7064 0x00 0x00 NOPX
+ 7066 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16
+ 7070 0x00 0x00 NOPX
+ 7072 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 7088 0xfd 0xbe 0x20 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r15, [sp, #-20]; MOVXM p6, #509000
+ 7098 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x26 0x10 0xba LDA r16, [p6]; MOVXM p2, #509004
+ 7108 0x40 0xc6 0xd0 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba LDA r17, [p2]; MOVXM p7, #508992
+ 7118 0x07 0x06 0x56 0x98 LDA r18, [p7]
+ 7122 0x00 0x00 NOPX
+ 7124 0x00 0x00 NOPX
+ 7126 0x00 0x00 NOPX
+ 7128 0x00 0x00 NOPX
+ 7130 0x80 0x0e 0x18 0x40 0x01 0x84 JNZ r16, #7216
+.delay_slot
+ 7136 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 7140 0x40 0xc6 0x39 0x44 0x0e 0x5c ST r17, [p2]; ADD r17, r18, #1
+.delay_slot
+ 7146 0x14 0x26 0x07 0x18 ADD r19, r16, #1
+.delay_slot
+ 7150 0x0e 0x06 0x71 0x98 ST r19, [p6]
+.delay_slot
+ 7154 0x0f 0x06 0x31 0x98 ST r17, [p7]
+ 7158 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12
+ 7162 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4
+ 7166 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4
+ 7170 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7174 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7176 0x02 0x46 0x16 0x98 LDA r16, [p2, #16]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7180 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7182 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7184 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7186 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7188 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 0x0a 0x06 0x11 0x98 ST r16, [p2]
+ 7196 0x17 0xe2 0xfd 0x18 MOVX r17, #-1
+ 7200 0x00 0x00 NOPX
+ 7202 0x00 0x00 NOPX
+ 7204 0x00 0x00 NOPX
+ 7206 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336
+.no_stack_arguments
+ 7216 0x00 0x08 0xb8 0x00 0x01 0x04 JL #4464
+.delay_slot
+ 7222 0x00 0x07 0xc6 0xcb 0x80 0x44 MOVXM p3, #509376
+.delay_slot
+.swstall delay_slot
+ 7228 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7230 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7232 0x00 0x00 NOPX
+.delay_slot
+ 7234 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x53 0x3d 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p2, r15; NOPV
+.return_address
+ 7248 0xc0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r16, [p6]; MOVXM p1, #508996
+ 7258 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 7262 0x07 0xf0 0x11 0x18 LDA r0, [sp, #-16]
+ 7266 0x00 0x00 NOPX
+ 7268 0x00 0x00 NOPX
+ 7270 0x00 0x00 NOPX
+ 7272 0x00 0x00 NOPX
+ 7274 0x00 0x00 NOPX
+ 7276 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 7280 0x80 0x0e 0x60 0x40 0x01 0x84 JNZ r16, #7360
+.delay_slot
+ 7286 0x10 0x30 0x01 0x18 MOVX r24, #0
+.delay_slot
+.swstall delay_slot
+ 7290 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7292 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7294 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7296 0x00 0x00 NOPX
+ 7298 0x04 0x00 0xa2 0xcf 0x14 0x24 MOVX r16, #1; ADD.NC p1, r15, #20
+ 7304 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 7308 0x00 0x00 NOPX
+ 7310 0x00 0x00 NOPX
+ 7312 0x00 0x00 NOPX
+ 7314 0x00 0x00 NOPX
+ 7316 0x00 0x00 NOPX
+ 7318 0x00 0x00 NOPX
+ 7320 0x14 0x51 0x08 0x18 REL r17, r16
+ 7324 0x3c 0xc6 0xdc 0x0e 0x23 0x0c LDA r17, [p1, #-8]; ST r24, [p6]
+ 7330 0x00 0x00 NOPX
+ 7332 0x00 0x00 NOPX
+ 7334 0x00 0x00 NOPX
+ 7336 0x00 0x00 NOPX
+ 7338 0x00 0x00 NOPX
+ 7340 0x00 0x00 NOPX
+ 7342 0x14 0x21 0x11 0x98 SUB r16, r16, r17
+ 7346 0x00 0x2c 0xf3 0xcc 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p1, #-8]; NOPM; NOPV
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480
+ 7360 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x30 0x10 0xba LDA r16, [p7]; MOVXM p6, #509024
+ 7370 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 7374 0x07 0xf8 0x99 0x18 LDA p1, [sp, #-8]
+ 7378 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 7382 0x00 0x00 NOPX
+ 7384 0x00 0x00 NOPX
+ 7386 0x00 0x00 NOPX
+ 7388 0x00 0x00 NOPX
+ 7390 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 7394 0x80 0x0e 0x80 0x40 0x01 0x84 JNZ r16, #7424
+.delay_slot
+.swstall delay_slot
+ 7400 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7402 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7404 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7406 0x00 0x00 NOPX
+.delay_slot
+ 7408 0x1b 0xd0 0x20 0xf8 MOV r15, r0
+ 7412 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544
+ 7424 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4]
+ 7428 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 7432 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 7438 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7440 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7442 0x00 0x00 NOPX
+.delay_slot
+ 7444 0x0f 0x84 0x8b 0x18 MOVS p7, p1
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 7456
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.function_start
+ 7456 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64
+.delay_slot
+ 7462 0x18 0x50 0xc0 0xf8 MOV r1, p0
+.delay_slot
+ 7466 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32
+.delay_slot
+ 7470 0x08 0x04 0x11 0x98 ST r0, [p0]
+.delay_slot
+ 7474 0x08 0x14 0x11 0x98 ST r0, [p0, #4]
+.delay_slot
+.swstall delay_slot
+ 7478 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 7488
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.function_start
+ 7488 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 7492 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 7498 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8]
+ 7502 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4]
+ 7506 0x00 0x00 NOPX
+ 7508 0x00 0x00 NOPX
+ 7510 0x00 0x00 NOPX
+ 7512 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 7516 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 7520 0x00 0x00 NOPX
+ 7522 0x00 0x00 NOPX
+ 7524 0x00 0x00 NOPX
+ 7526 0x00 0x00 NOPX
+ 7528 0x00 0x00 NOPX
+ 7530 0x00 0x00 NOPX
+ 7532 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 7536 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 7540 0x00 0x00 NOPX
+ 7542 0x00 0x00 NOPX
+ 7544 0x00 0x00 NOPX
+ 7546 0x00 0x00 NOPX
+ 7548 0x00 0x00 NOPX
+ 7550 0x00 0x00 NOPX
+ 7552 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 7556 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4]
+ 7560 0x00 0x00 NOPX
+ 7562 0x00 0x00 NOPX
+.no_stack_arguments
+ 7564 0x00 0x0e 0x90 0x00 0x01 0x04 JL #7456
+.delay_slot
+.swstall delay_slot
+ 7570 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7572 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7574 0x00 0x00 NOPX
+.delay_slot
+ 7576 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12
+.delay_slot
+ 7580 0x1b 0xd0 0xc0 0xf8 MOV r15, p0
+.return_address
+ 7584 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16
+ 7594 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3
+ 7604 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128
+ 7614 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0]
+ 7618 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7620 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7622 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7626 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7630 0x10 0x22 0x05 0x18 MOVX r17, #1
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7634 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7640 0x14 0x77 0x27 0x98 EQ r27, r17, r18
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7644 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27
+.delay_slot
+.swstall delay_slot
+ 7648 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+
+.text_segment PM 7664
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.function_start
+ 7664 0x02 0x80 0x80 0x00 0x10 0xc8 0x08 0x60 0x78 0xba MOVA m0, #20; MOVX r1, #6; MOV r0, p0
+ 7674 0x00 0x00 0xa0 0xc0 0x0c 0x24 MOVX r0, #1; ADD.NC p0, r0, #12
+ 7680 0x00 0x08 0x4a 0x98 LDA.u8 r2, [p0], m0
+ 7684 0x00 0x00 NOPX
+ 7686 0x00 0x00 NOPX
+ 7688 0x00 0x00 NOPX
+ 7690 0x00 0x00 NOPX
+ 7692 0x00 0x00 NOPX
+ 7694 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 7698 0x10 0x80 0x08 0x98 NE r0, r2, r0
+.delay_slot
+ 7702 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1
+.delay_slot
+ 7706 0x02 0x82 0x31 0x0d 0xe0 0x5c ST r0, [p0, #4]; NEZ r3, r2
+.delay_slot
+ 7712 0x10 0xc4 0x1d 0x98 LSHL r2, r3, r1
+.delay_slot
+ 7716 0x08 0x04 0x51 0x98 ST r2, [p0]
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 7728
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function_start
+ 7728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 7734 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4]
+.no_stack_arguments
+ 7738 0x00 0x0e 0xa0 0x00 0x01 0x04 JL #7488
+.delay_slot
+ 7744 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8]
+.delay_slot
+ 7748 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 7752 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7754 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7756 0x00 0x01 0x67 0x98 NOPA
+.return_address
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7760 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7764 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+.tail_call
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7768 0x00 0x0e 0xf8 0x00 0x00 0x84 J #7664
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7774 0x18 0x6e 0xc0 0xf8 MOV p0, p7
+.delay_slot
+ 7778 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 7784 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7786 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7788 0x00 0x00 NOPX
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+
+.text_segment PM 7792
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function_start
+ 7792 0x67 0x82 0xd0 0x00 0x51 0x54 LDA r0, [p3], #12; MOV m0, #20
+ 7798 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3
+ 7804 0x00 0x00 NOPX
+ 7806 0x00 0x00 NOPX
+ 7808 0x00 0x00 NOPX
+ 7810 0x00 0x00 NOPX
+ 7812 0x00 0x00 NOPX
+ 7814 0x00 0x00 NOPX
+ 7816 0x08 0x0f 0x60 0x40 0x01 0x84 JNZ r1, #7872
+.delay_slot
+ 7822 0x17 0xc4 0xe9 0x18 MOVX r2, #-6
+.delay_slot
+ 7826 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2
+.delay_slot
+.swstall delay_slot
+ 7830 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7832 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7834 0x00 0x00 NOPX
+ 7836 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0]
+ 7840 0x00 0x00 NOPX
+ 7842 0x00 0x00 NOPX
+ 7844 0x00 0x00 NOPX
+ 7846 0x00 0x0f 0x70 0x00 0x00 0x84 J #7904
+.delay_slot
+.swstall delay_slot
+ 7852 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7854 0x00 0x00 NOPX
+.delay_slot
+ 7856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1
+.delay_slot
+.swstall delay_slot
+ 7860 0x00 0x00 NOPX
+.delay_slot
+ 7862 0x00 0x2c 0xf0 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p0]; NOPX
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80
+ 7872 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1]
+ 7876 0x00 0x00 NOPX
+ 7878 0x00 0x00 NOPX
+ 7880 0x00 0x00 NOPX
+ 7882 0x00 0x00 NOPX
+ 7884 0x00 0x00 NOPX
+ 7886 0x00 0x00 NOPX
+ 7888 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1
+ 7892 0x00 0x00 NOPX
+ 7894 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112
+ 7904 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3
+ 7914 0x62 0x90 0xd0 0x00 0x00 0x04 0x7f 0xa8 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #8016
+ 7924 0x00 0x00 0x16 0xfe 0xe0 0x44 MOVXM le, #8048
+ 7930 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032
+ 7936 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4]
+ 7940 0x00 0x00 NOPX
+ 7942 0x00 0x00 NOPX
+ 7944 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0
+ 7948 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1
+ 7952 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7956 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7960 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7966 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7974 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7978 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7986 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7990 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7998 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8002 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0
+.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8016 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8020 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8032 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8048 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8064 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8066 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8074 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8076 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8084 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8086 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr
+.delay_slot
+.swstall delay_slot
+ 8092 0x00 0x00 NOPX
+.delay_slot
+ 8094 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 8098 0x00 0x00 NOPX
+.delay_slot
+ 8100 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 8104 0x00 0x00 NOPX
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 8112
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.function_start
+ 8112 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+ 8118 0xff 0x87 0xb0 0x02 0x0a 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p2
+ 8126 0x50 0x91 0x60 0x01 0xb4 0x03 0x00 0x02 MOVS p2, p1; ADD.NC p3, r16, #12
+ 8134 0x65 0xed 0x58 0x21 0x81 0xd4 LDA.u8 r27, [p3], #2; MOV r16, p0
+ 8140 0x73 0xca 0x58 0xab 0xc1 0xd4 LDA.s16 r18, [p3], #-14; MOV r17, sp
+ 8146 0x18 0x68 0xc0 0x18 ADD.NC p0, r17, #-128
+ 8150 0x08 0x07 0x2b 0x18 VST sfh, [p0]
+ 8154 0x00 0x06 0x57 0x18 ST.s16 r18, [p0]
+ 8158 0x00 0x00 NOPX
+ 8160 0x00 0x00 NOPX
+.no_stack_arguments
+ 8162 0x00 0x0f 0x38 0x00 0x01 0x04 JL #7792
+.delay_slot
+ 8168 0x1c 0x50 0xc0 0xf8 MOV r17, p0
+.delay_slot
+.swstall delay_slot
+ 8172 0x00 0x00 NOPX
+.delay_slot
+ 8174 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27
+.delay_slot
+ 8178 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18
+.delay_slot
+ 8184 0x00 0x2b 0x60 0x00 0x34 0x10 0x70 0x02 NOPS; MOV p0, r16
+.return_address
+ 8192 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+ 8196 0x00 0x00 NOPX
+ 8198 0x00 0x00 NOPX
+ 8200 0x00 0x00 NOPX
+ 8202 0x00 0x00 NOPX
+ 8204 0x00 0x00 NOPX
+ 8206 0x00 0x00 NOPX
+ 8208 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 8212 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 8218 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8220 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8222 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8224 0x00 0x00 NOPX
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 8240
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 8240 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992
+ 8246 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID
+ 8252 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 8258 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15
+ 8266 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008
+ 8276 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+ 8280 0x00 0x00 NOPX
+ 8282 0x00 0x00 NOPX
+ 8284 0x80 0x10 0x80 0x40 0x01 0x84 JNZ r16, #8448
+.delay_slot
+ 8290 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 8294 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 8298 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 8302 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0
+.delay_slot
+ 8310 0x00 0x07 0xc0 0xc9 0x80 0x44 MOVXM p0, #509120
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8316 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8322 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8332 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 8334 0x00 0x0f 0x18 0x00 0x01 0x04 JL #7728
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8340 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8342 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8344 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 8348 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 8352 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV
+.return_address
+ 8368 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+ 8374 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r16, [p2]; MOVXM p2, #509120
+ 8384 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r17, [p2]; MOVXM p2, #509120
+ 8394 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012
+ 8404 0x00 0x00 NOPX
+ 8406 0x00 0x00 NOPX
+ 8408 0x00 0x10 0x88 0x00 0x00 0x84 J #8464
+.delay_slot
+ 8414 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024
+.delay_slot
+.swstall delay_slot
+ 8420 0x00 0x00 NOPX
+.delay_slot
+ 8422 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 8426 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0]
+.delay_slot
+ 8432 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 8448 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+ 8464 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12
+ 8472 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992
+ 8482 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4
+ 8486 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4
+ 8490 0x07 0x46 0x56 0x98 LDA r18, [p7, #16]
+ 8494 0x00 0x00 NOPX
+ 8496 0x00 0x00 NOPX
+ 8498 0x00 0x00 NOPX
+ 8500 0x00 0x00 NOPX
+ 8502 0x00 0x00 NOPX
+ 8504 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 8508 0x0f 0x06 0x11 0x98 ST r16, [p7]
+ 8512 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 8516 0x00 0x00 NOPX
+ 8518 0x00 0x00 NOPX
+ 8520 0x00 0x00 NOPX
+ 8522 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 8526 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3
+ 8532 0x00 0x00 NOPX
+ 8534 0x00 0x00 NOPX
+ 8536 0x00 0x06 0x36 0x98 LDA r17, [p0]
+ 8540 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7
+ 8546 0x01 0x06 0x76 0x98 LDA r19, [p1]
+ 8550 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20
+ 8554 0x00 0x00 NOPX
+.no_stack_arguments
+ 8556 0x00 0x0f 0xd8 0x00 0x01 0x04 JL #8112
+.delay_slot
+.swstall delay_slot
+ 8562 0x00 0x00 NOPX
+.delay_slot
+ 8564 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 8568 0x08 0x06 0x31 0x98 ST r17, [p0]
+.delay_slot
+ 8572 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+.delay_slot
+ 8576 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV
+.return_address
+ 8592 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992
+ 8602 0x10 0x20 0x05 0x18 MOVX r16, #1
+ 8606 0x00 0x00 NOPX
+ 8608 0x00 0x00 NOPX
+ 8610 0x00 0x00 NOPX
+ 8612 0x00 0x00 NOPX
+ 8614 0x00 0x00 NOPX
+ 8616 0x14 0x51 0x08 0x18 REL r17, r16
+ 8620 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024
+ 8630 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 8634 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 8638 0x00 0x00 NOPX
+ 8640 0x00 0x00 NOPX
+ 8642 0x00 0x00 NOPX
+ 8644 0x00 0x00 NOPX
+ 8646 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 8650 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 8654 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 8658 0x80 0x10 0xf8 0x40 0x01 0x84 JNZ r16, #8688
+.delay_slot
+.swstall delay_slot
+ 8664 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8666 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8668 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8670 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8672 0x00 0x00 NOPX
+ 8674 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 8678 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 8688 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 8692 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8]
+ 8696 0x00 0x00 NOPX
+ 8698 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 8700 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 8702 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8706 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8708 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8712 0x1f 0x67 0xa0 0xf8 MOV p7, r15
+.delay_slot
+ 8716 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 8722 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8724 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8726 0x00 0x00 NOPX
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 8736
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.function_start
+ 8736 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 8740 0x00 0x00 NOPX
+ 8742 0x00 0x00 NOPX
+ 8744 0x00 0x00 NOPX
+ 8746 0x00 0x00 NOPX
+ 8748 0x00 0x00 NOPX
+ 8750 0x00 0x00 NOPX
+ 8752 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 8756 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 8760 0x00 0x00 NOPX
+ 8762 0x00 0x00 NOPX
+ 8764 0x00 0x00 NOPX
+ 8766 0x00 0x00 NOPX
+ 8768 0x00 0x00 NOPX
+ 8770 0x00 0x00 NOPX
+ 8772 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 8776 0x01 0x6c 0x2e 0x98 LDA el0, [p1], #24
+ 8780 0x01 0x04 0x12 0x98 LDA.s16 r0, [p1]
+ 8784 0x00 0x00 NOPX
+ 8786 0x00 0x00 NOPX
+ 8788 0x00 0x00 NOPX
+ 8790 0x00 0x00 NOPX
+ 8792 0x00 0x00 NOPX
+ 8794 0x08 0x6c 0x29 0x98 ST el0, [p0], #24
+ 8798 0x00 0x04 0x17 0x18 ST.s16 r0, [p0]
+ 8802 0x00 0x00 NOPX
+ 8804 0x00 0x00 NOPX
+ 8806 0x00 0x00 NOPX
+ 8808 0x00 0x00 NOPX
+ 8810 0x00 0x00 NOPX
+ 8812 0x00 0x00 NOPX
+ 8814 0x01 0x24 0x12 0x98 LDA.s16 r0, [p1, #4]
+ 8818 0x00 0x14 0x17 0x18 ST.s16 r0, [p0, #2]
+ 8822 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+ 8826 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8828 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8830 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8832 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8834 0x00 0x00 NOPX
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 8848
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.function_start
+ 8848 0xfb 0xc2 0x80 0x3a 0x68 0x00 0x00 0x08 0x79 0x88 0x10 0xb6 MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976
+ 8860 0xff 0x51 0x00 0x39 0x68 0x00 0x00 0x09 0xb9 0xa0 0x10 0xb6 MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024
+ 8872 0x18 0x14 0xc0 0xf8 MOV r0, p2
+ 8876 0x1a 0x60 0x10 0x18 ADD.NC p2, r0, #32
+ 8880 0x02 0x1c 0x52 0x98 LDA.s16 r2, [p2], #2
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8884 0x02 0x00 0x16 0x98 LDA r0, [p2, dj0]
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8888 0x40 0x86 0x50 0x3a 0x68 0x3c LDA.s16 r1, [p2]; VLDB x4, [p0], #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8894 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8896 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8898 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8900 0x38 0x1c 0xb4 0x18 VLDB x2, [p0], #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8904 0x18 0x09 0x72 0xf8 VBCST.16 x0, r2
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8908 0x00 0x3a 0x68 0x01 0x18 0xed 0x50 0x36 0x78 0x3a VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8918 0x1d 0x78 0xfe 0x98 ADD.NC lc, r17, #-3
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8922 0x18 0x85 0x72 0xf8 VBCST.16 x1, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8926 0x19 0xa8 0xac 0xf8 VMIN_GE.bf16 x3, r16, x5, x1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8930 0x00 0x2c 0xf0 0x39 0x68 0x00 0x00 0x31 0x06 0xcf 0x00 0x2b 0x60 0x7e NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8944 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV
+.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8976 0x00 0x2c 0xf0 0x3a 0x69 0x1d 0xd3 0x00 0x00 0x00 0xd4 0x56 0x78 0x00 0x00 0xe1 NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8992 0x00 0x2c 0xf0 0x39 0x68 0x01 0x5b 0x00 0x00 0x01 0x88 0x36 0x78 0x00 0x00 0xe1 NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9008 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV
+.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9040 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9048 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0
+ 9052 0x23 0x9a 0x60 0x01 0xd8 0x56 0x70 0x02 VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1
+ 9060 0x05 0x00 0x05 0x40 0xd9 0xe4 RET lr; VMAX_LT.bf16 x5, r16, x4, x0
+.delay_slot
+ 9066 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1
+.delay_slot
+ 9074 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0
+.delay_slot
+ 9078 0x1b 0xb0 0xac 0xf8 VMIN_GE.bf16 x7, r16, x6, x1
+.delay_slot
+ 9082 0x09 0x1c 0xd3 0x18 VST x3, [p1], #64
+.delay_slot
+ 9086 0x09 0x1d 0xd3 0x18 VST x7, [p1], #64
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0
+
+.text_segment PM 9104
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 9104 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992
+ 9110 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID
+ 9116 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 9122 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15
+ 9130 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008
+ 9140 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+ 9144 0x00 0x00 NOPX
+ 9146 0x00 0x00 NOPX
+ 9148 0x80 0x12 0x30 0x40 0x01 0x84 JNZ r16, #9312
+.delay_slot
+ 9154 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 9158 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 9162 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 9166 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0
+.delay_slot
+ 9174 0x00 0x07 0xc0 0xcb 0x00 0x44 MOVXM p0, #509312
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9180 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9186 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9196 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9198 0x00 0x11 0x10 0x00 0x01 0x04 JL #8736
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9204 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9206 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9208 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 9212 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 9216 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV
+.return_address
+ 9232 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+ 9238 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0xc0 0x10 0xba LDA r16, [p2]; MOVXM p2, #509312
+ 9248 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0xc0 0x10 0xba LDA r17, [p2]; MOVXM p2, #509312
+ 9258 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #509012
+ 9268 0x00 0x00 NOPX
+ 9270 0x00 0x00 NOPX
+ 9272 0x00 0x12 0x38 0x00 0x00 0x84 J #9328
+.delay_slot
+ 9278 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024
+.delay_slot
+.swstall delay_slot
+ 9284 0x00 0x00 NOPX
+.delay_slot
+ 9286 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 9290 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0]
+.delay_slot
+ 9296 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 9312 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+ 9328 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12
+ 9336 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992
+ 9346 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4
+ 9350 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4
+ 9354 0x07 0x46 0x56 0x98 LDA r18, [p7, #16]
+ 9358 0x00 0x00 NOPX
+ 9360 0x00 0x00 NOPX
+ 9362 0x00 0x00 NOPX
+ 9364 0x00 0x00 NOPX
+ 9366 0x00 0x00 NOPX
+ 9368 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 9372 0x0f 0x06 0x11 0x98 ST r16, [p7]
+ 9376 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 9380 0x00 0x00 NOPX
+ 9382 0x00 0x00 NOPX
+ 9384 0x00 0x00 NOPX
+ 9386 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 9390 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3
+ 9396 0x00 0x00 NOPX
+ 9398 0x00 0x00 NOPX
+ 9400 0x00 0x06 0x36 0x98 LDA r17, [p0]
+ 9404 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7
+ 9410 0x01 0x06 0x76 0x98 LDA r19, [p1]
+ 9414 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20
+ 9418 0x00 0x00 NOPX
+.no_stack_arguments
+ 9420 0x00 0x11 0x48 0x00 0x01 0x04 JL #8848
+.delay_slot
+.swstall delay_slot
+ 9426 0x00 0x00 NOPX
+.delay_slot
+ 9428 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 9432 0x08 0x06 0x31 0x98 ST r17, [p0]
+.delay_slot
+ 9436 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+.delay_slot
+ 9440 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV
+.return_address
+ 9456 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992
+ 9466 0x10 0x20 0x05 0x18 MOVX r16, #1
+ 9470 0x00 0x00 NOPX
+ 9472 0x00 0x00 NOPX
+ 9474 0x00 0x00 NOPX
+ 9476 0x00 0x00 NOPX
+ 9478 0x00 0x00 NOPX
+ 9480 0x14 0x51 0x08 0x18 REL r17, r16
+ 9484 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024
+ 9494 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 9498 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 9502 0x00 0x00 NOPX
+ 9504 0x00 0x00 NOPX
+ 9506 0x00 0x00 NOPX
+ 9508 0x00 0x00 NOPX
+ 9510 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 9514 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 9518 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 9522 0x80 0x12 0xa8 0x40 0x01 0x84 JNZ r16, #9552
+.delay_slot
+.swstall delay_slot
+ 9528 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9530 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9532 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9534 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9536 0x00 0x00 NOPX
+ 9538 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 9542 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 9552 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 9556 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8]
+ 9560 0x00 0x00 NOPX
+ 9562 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 9564 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 9566 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9570 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9572 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9576 0x1f 0x67 0xa0 0xf8 MOV p7, r15
+.delay_slot
+ 9580 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 9586 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9588 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9590 0x00 0x00 NOPX
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 9600
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.function_start
+ 9600 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0
+ 9610 0x17 0x80 0x01 0x18 MOVX r0, #-128
+ 9614 0x00 0x00 NOPX
+ 9616 0x00 0x00 NOPX
+ 9618 0x00 0x00 NOPX
+ 9620 0x00 0x00 NOPX
+ 9622 0x00 0x00 NOPX
+ 9624 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 9628 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 9632 0x00 0x00 NOPX
+ 9634 0x00 0x00 NOPX
+ 9636 0x00 0x00 NOPX
+ 9638 0x00 0x00 NOPX
+ 9640 0x00 0x00 NOPX
+ 9642 0x00 0x00 NOPX
+ 9644 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 9648 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 9652 0x00 0x00 NOPX
+ 9654 0x00 0x00 NOPX
+ 9656 0x00 0x00 NOPX
+ 9658 0x00 0x00 NOPX
+ 9660 0x00 0x00 NOPX
+ 9662 0x00 0x00 NOPX
+ 9664 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 9668 0x01 0x14 0x76 0x98 LDA r3, [p1, #4]
+ 9672 0x00 0x00 NOPX
+ 9674 0x00 0x00 NOPX
+ 9676 0x00 0x00 NOPX
+ 9678 0x00 0x00 NOPX
+ 9680 0x00 0x00 NOPX
+ 9682 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9684 0x08 0x4c 0x71 0x98 ST r3, [p0], #16
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9688 0x00 0x04 0x17 0x18 ST.s16 r0, [p0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9692 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9696 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9702 0x10 0xc2 0x14 0x98 AND r1, r3, r1
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9706 0x10 0x76 0x27 0x98 EQ r27, r1, r2
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9710 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27
+.delay_slot
+.swstall delay_slot
+ 9714 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 9728
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.function_start
+ 9728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 9734 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8]
+.no_stack_arguments
+ 9738 0x00 0x12 0xc0 0x00 0x01 0x04 JL #9600
+.delay_slot
+ 9744 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4]
+.delay_slot
+ 9748 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 9752 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9754 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9756 0x00 0x01 0x67 0x98 NOPA
+.return_address
+ 9760 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8]
+ 9764 0x00 0x00 NOPX
+ 9766 0x00 0x00 NOPX
+ 9768 0x00 0x00 NOPX
+ 9770 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9772 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9774 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9778 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9782 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9784 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9786 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9788 0x10 0x20 0x09 0x18 MOVX r16, #2
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9792 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 9808
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0
+.function_start
+ 9808 0x18 0x16 0xc0 0xf8 MOV r0, p3
+ 9812 0x1b 0x60 0x07 0x18 ADD.NC p3, r0, #14
+ 9816 0x03 0x1c 0x52 0x98 LDA.s16 r2, [p3], #2
+ 9820 0x03 0x04 0x96 0x98 LDA r4, [p3]
+ 9824 0x00 0x00 NOPX
+ 9826 0x00 0x00 NOPX
+ 9828 0x00 0x00 NOPX
+ 9830 0x00 0x00 NOPX
+ 9832 0x10 0x06 0x09 0x18 MOVX r3, #2
+ 9836 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+ 9842 0x10 0xc6 0x4c 0x98 LTU r3, r3, r4
+ 9846 0x00 0x01 0x00 0x06 0x04 0xe2 0x10 0x00 0x60 0xba MOVA r1, #0; JNZ r3, #10000
+.delay_slot
+ 9856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1
+.delay_slot
+ 9860 0x18 0x5e 0xc0 0xf8 MOV r1, p7
+.delay_slot
+ 9864 0x1f 0x65 0xe0 0xf8 MOV p7, sp
+.delay_slot
+ 9868 0xff 0xf2 0x0a 0xdd 0x81 0xf4 PADDB [p7], #-64; MOV p5, p7
+.delay_slot
+ 9874 0x0f 0x04 0x13 0x18 VST x0, [p7]
+ 9878 0x01 0x82 0x84 0x80 0x0b 0x00 0x04 0xb9 0x72 0xba MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2
+ 9888 0x80 0x01 0x54 0x01 0x01 0x54 LDA.u8 r0, [p4, dj0]; MOV m2, #64
+ 9894 0x00 0x00 NOPX
+ 9896 0x00 0x00 NOPX
+ 9898 0x00 0x00 NOPX
+ 9900 0x00 0x00 NOPX
+ 9902 0x00 0x00 NOPX
+ 9904 0x00 0x00 NOPX
+ 9906 0x00 0x13 0x70 0x40 0x01 0x84 JNZ r0, #9952
+.delay_slot
+ 9912 0x18 0x00 0x00 0xb8 MOV m0, #0
+.delay_slot
+ 9916 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032
+.delay_slot
+.swstall delay_slot
+ 9922 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9924 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9926 0x00 0x00 NOPX
+ 9928 0x00 0x04 0x80 0x00 0x04 0xde 0x00 0x00 0x20 0xba MOVA m1, #0; J #9968
+.delay_slot
+.swstall delay_slot
+ 9938 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9940 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9942 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9944 0x00 0x00 NOPX
+.delay_slot
+ 9946 0x00 0x2c 0xf0 0x08 0x26 0x0c NOPA; VST x0, [p0]
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144
+ 9952 0x19 0x00 0x80 0xb8 MOV m1, #64
+ 9956 0x00 0x2c 0xf0 0x00 0x21 0x04 0x13 0x01 0x00 0x00 0x50 0xf6 NOPA; NOPB; VST x0, [p1]; MOV m2, #0
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160
+ 9968 0x00 0x13 0xc8 0x00 0x00 0x84 J #10128
+.delay_slot
+ 9974 0x13 0x91 0x60 0x03 0xb0 0x60 0x70 0x02 MOVS p0, p7; MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 9982 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9984 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9986 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9988 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192
+ 10000 0x10 0x04 0x0d 0x18 MOVX r2, #3
+ 10004 0x10 0x84 0x47 0x98 EQ r2, r2, r4
+ 10008 0x10 0x13 0xa0 0x40 0x01 0x84 JNZ r2, #10048
+.delay_slot
+ 10014 0x3f 0x80 0x00 0x20 0x00 0x44 MOVXM r0, #1065353216
+.delay_slot
+ 10020 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032
+.delay_slot
+.swstall delay_slot
+ 10026 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10028 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10030 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10032 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xe0 0x00 0x08 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10048 0x80 0x80 0x50 0x02 0xd2 0x00 0x47 0xbe 0x58 0xba LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10058 0x18 0x00 0x80 0xb8 MOV m0, #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10062 0x19 0x00 0x00 0xb8 MOV m1, #0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10066 0x1a 0x00 0x80 0xb8 MOV m2, #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10070 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10072 0x18 0x00 0x11 0x78 VINSERT.32 x0, x0, #0, r0
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10076 0xa0 0x02 0xe2 0x01 0x25 0xd4 ST.s16 r0, [p5, dj0]; VMOV bmll1, x0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10082 0x10 0x3a 0x80 0x18 MOVX crRnd, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10086 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10090 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10092 0x18 0x01 0x01 0xb8 VEXTRACT.16 r0, x0, #0, vaddSign0
+ 10096 0x00 0x00 NOPX
+ 10098 0x00 0x00 NOPX
+ 10100 0x05 0x00 0x12 0x98 LDA.s16 r0, [p5, dj0]
+ 10104 0x00 0x00 NOPX
+ 10106 0x00 0x00 NOPX
+ 10108 0x00 0x00 NOPX
+ 10110 0x00 0x00 NOPX
+ 10112 0x00 0x00 NOPX
+ 10114 0x00 0x00 NOPX
+ 10116 0x18 0x01 0x72 0xf8 VBCST.16 x0, r0
+ 10120 0x00 0x00 NOPX
+ 10122 0x00 0x2c 0xff 0xf8 0x66 0x0c NOPA; VST x0, [sp, #-64]
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320
+ 10128 0x78 0x8a 0xde 0x50 0xe8 0x00 0x00 0x08 0x7c 0x00 0x10 0xb6 LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10140 0xff 0x63 0x02 0x90 0x68 0x00 0x00 0x09 0xbc 0x18 0x10 0xb6 MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10152 0x01 0x05 0x7e 0x50 0xe8 0x00 0xf1 0x12 VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10160 0x80 0x90 0x52 0x90 0x68 0x3c LDA.s8 r4, [p4]; VLDB x0, [p1], m2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10166 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10168 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10172 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10174 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10178 0x05 0x71 0x7e 0x86 0x01 0x02 0x01 0x62 ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10186 0x29 0x03 0x7e 0x50 0xe8 0x3c VLDA x0, [p1], m2; VLDB x1, [p7], m1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10192 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x09 0xd4 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0
+.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10240 0x29 0x03 0x7e 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10256 0x01 0x05 0x70 0x00 0x22 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10272 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10288 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0
+.loop_nesting 0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10304 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10310 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 10314 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 10316 0x01 0x02 0x01 0x48 VMAC.f dm1, dm0, x1, x0, r0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10320 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10322 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10326 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+ 10330 0x1f 0x60 0xa0 0xf8 MOV p7, r1
+.delay_slot
+.swstall delay_slot
+ 10334 0x00 0x00 NOPX
+.delay_slot
+ 10336 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 10340 0x00 0x00 NOPX
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0
+
+.text_segment PM 10352
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.function_start
+ 10352 0x01 0x82 0x83 0x88 0x8b 0x00 0x60 0xf0 0x72 0xba MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr
+ 10362 0x40 0x01 0x54 0xc5 0x81 0xd4 LDA.u8 r0, [p2, dj0]; MOV p2, p1
+ 10368 0x00 0x00 NOPX
+ 10370 0x00 0x00 NOPX
+ 10372 0x00 0x00 NOPX
+ 10374 0x00 0x00 NOPX
+ 10376 0x00 0x00 NOPX
+ 10378 0x00 0x00 NOPX
+ 10380 0x00 0x14 0x68 0x00 0x01 0x84 JZ r0, #10448
+.delay_slot
+ 10386 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+.delay_slot
+ 10392 0x18 0x55 0xe0 0xf8 MOV r1, sp
+.delay_slot
+ 10396 0x19 0x60 0xe0 0x18 ADD.NC p1, r1, #-64
+.delay_slot
+ 10400 0x09 0x07 0x2b 0x18 VST sfh, [p1]
+.delay_slot
+.swstall delay_slot
+ 10404 0x00 0x00 NOPX
+.no_stack_arguments
+ 10406 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808
+.delay_slot
+.swstall delay_slot
+ 10412 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10414 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10416 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10418 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10420 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.return_address
+ 10432 0x00 0x14 0x78 0x00 0x00 0x84 J #10480
+.delay_slot
+.swstall delay_slot
+ 10438 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10440 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10442 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10444 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10446 0x00 0x00 NOPX
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96
+.no_stack_arguments
+ 10448 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808
+.delay_slot
+ 10454 0x10 0x91 0x60 0x00 0xb0 0x60 0x70 0x02 MOVS p0, p1; MOV p1, p0
+.delay_slot
+.swstall delay_slot
+ 10462 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10464 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10468 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128
+.return_address
+ 10480 0x1f 0x71 0x80 0xf8 MOV lr, dc0
+ 10484 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10488 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 10494 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10496 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10498 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10500 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0
+
+.text_segment PM 10512
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 10512 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992
+ 10518 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID
+ 10524 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 10530 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15
+ 10538 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008
+ 10548 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+ 10552 0x00 0x00 NOPX
+ 10554 0x00 0x00 NOPX
+ 10556 0x80 0x14 0xf0 0x40 0x01 0x84 JNZ r16, #10720
+.delay_slot
+ 10562 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 10566 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 10570 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 10574 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0
+.delay_slot
+ 10582 0x00 0x07 0xc0 0xca 0x00 0x44 MOVXM p0, #509184
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10588 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10594 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10604 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 10606 0x00 0x13 0x00 0x00 0x01 0x04 JL #9728
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10612 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10614 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10616 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 10620 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 10624 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV
+.return_address
+ 10640 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+ 10646 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #509184
+ 10656 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #509184
+ 10666 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012
+ 10676 0x00 0x00 NOPX
+ 10678 0x00 0x00 NOPX
+ 10680 0x00 0x14 0xf8 0x00 0x00 0x84 J #10736
+.delay_slot
+ 10686 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024
+.delay_slot
+.swstall delay_slot
+ 10692 0x00 0x00 NOPX
+.delay_slot
+ 10694 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 10698 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0]
+.delay_slot
+ 10704 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 10720 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+ 10736 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12
+ 10744 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992
+ 10754 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4
+ 10758 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4
+ 10762 0x07 0x46 0x56 0x98 LDA r18, [p7, #16]
+ 10766 0x00 0x00 NOPX
+ 10768 0x00 0x00 NOPX
+ 10770 0x00 0x00 NOPX
+ 10772 0x00 0x00 NOPX
+ 10774 0x00 0x00 NOPX
+ 10776 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 10780 0x0f 0x06 0x11 0x98 ST r16, [p7]
+ 10784 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 10788 0x00 0x00 NOPX
+ 10790 0x00 0x00 NOPX
+ 10792 0x00 0x00 NOPX
+ 10794 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 10798 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3
+ 10804 0x00 0x00 NOPX
+ 10806 0x00 0x00 NOPX
+ 10808 0x00 0x06 0x36 0x98 LDA r17, [p0]
+ 10812 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7
+ 10818 0x01 0x06 0x76 0x98 LDA r19, [p1]
+ 10822 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20
+ 10826 0x00 0x00 NOPX
+.no_stack_arguments
+ 10828 0x00 0x14 0x38 0x00 0x01 0x04 JL #10352
+.delay_slot
+.swstall delay_slot
+ 10834 0x00 0x00 NOPX
+.delay_slot
+ 10836 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 10840 0x08 0x06 0x31 0x98 ST r17, [p0]
+.delay_slot
+ 10844 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+.delay_slot
+ 10848 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV
+.return_address
+ 10864 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992
+ 10874 0x10 0x20 0x05 0x18 MOVX r16, #1
+ 10878 0x00 0x00 NOPX
+ 10880 0x00 0x00 NOPX
+ 10882 0x00 0x00 NOPX
+ 10884 0x00 0x00 NOPX
+ 10886 0x00 0x00 NOPX
+ 10888 0x14 0x51 0x08 0x18 REL r17, r16
+ 10892 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024
+ 10902 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 10906 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 10910 0x00 0x00 NOPX
+ 10912 0x00 0x00 NOPX
+ 10914 0x00 0x00 NOPX
+ 10916 0x00 0x00 NOPX
+ 10918 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 10922 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 10926 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 10930 0x80 0x15 0x68 0x40 0x01 0x84 JNZ r16, #10960
+.delay_slot
+.swstall delay_slot
+ 10936 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10938 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10940 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10942 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10944 0x00 0x00 NOPX
+ 10946 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 10950 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 10960 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 10964 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8]
+ 10968 0x00 0x00 NOPX
+ 10970 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10972 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10974 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10978 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10980 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10984 0x1f 0x67 0xa0 0xf8 MOV p7, r15
+.delay_slot
+ 10988 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 10994 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10996 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10998 0x00 0x00 NOPX
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 11008
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.function_start
+ 11008 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64
+.delay_slot
+ 11014 0x18 0x50 0xc0 0xf8 MOV r1, p0
+.delay_slot
+ 11018 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32
+.delay_slot
+ 11022 0x08 0x04 0x11 0x98 ST r0, [p0]
+.delay_slot
+ 11026 0x08 0x14 0x11 0x98 ST r0, [p0, #4]
+.delay_slot
+.swstall delay_slot
+ 11030 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0
+
+.text_segment PM 11040
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.function_start
+ 11040 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 11044 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 11050 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4]
+ 11054 0x00 0x00 NOPX
+ 11056 0x00 0x00 NOPX
+ 11058 0x00 0x00 NOPX
+ 11060 0x00 0x00 NOPX
+ 11062 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11066 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 11070 0x00 0x00 NOPX
+ 11072 0x00 0x00 NOPX
+ 11074 0x00 0x00 NOPX
+ 11076 0x00 0x00 NOPX
+ 11078 0x00 0x00 NOPX
+ 11080 0x00 0x00 NOPX
+ 11082 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11086 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 11090 0x00 0x00 NOPX
+ 11092 0x00 0x00 NOPX
+ 11094 0x00 0x00 NOPX
+ 11096 0x00 0x00 NOPX
+ 11098 0x00 0x00 NOPX
+ 11100 0x00 0x00 NOPX
+ 11102 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11106 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4]
+ 11110 0x00 0x00 NOPX
+ 11112 0x00 0x00 NOPX
+.no_stack_arguments
+ 11114 0x00 0x15 0x80 0x00 0x01 0x04 JL #11008
+.delay_slot
+ 11120 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8]
+.delay_slot
+.swstall delay_slot
+ 11124 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11126 0x00 0x00 NOPX
+.delay_slot
+ 11128 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12
+.delay_slot
+ 11132 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.return_address
+ 11136 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+ 11140 0x00 0x00 NOPX
+ 11142 0x00 0x00 NOPX
+ 11144 0x00 0x00 NOPX
+ 11146 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11148 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11150 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11154 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11158 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11160 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11162 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11164 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11168 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 11184
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.function_start
+ 11184 0x04 0x00 0x80 0x00 0x00 0x08 0x7e 0x30 0x10 0xba MOVA m0, #32; MOVXM ls, #11360
+ 11194 0x61 0x0e 0xd0 0x00 0x00 0x09 0xbe 0x38 0x10 0xba LDA r3, [p3], m0; MOVXM le, #11376
+ 11204 0x60 0x90 0xd0 0x3e 0x17 0x48 0x0b 0x3c 0x58 0xba LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828
+ 11214 0x62 0x80 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m0, [p3, #4]; MOVXM p4, #509032
+ 11224 0x04 0x04 0x42 0x98 LDA.s8 r2, [p4]
+ 11228 0x00 0x00 NOPX
+ 11230 0x00 0x00 NOPX
+ 11232 0x00 0x00 NOPX
+ 11234 0x10 0xc2 0x1d 0x98 LSHL r1, r3, r1
+ 11238 0x05 0x0e 0x8a 0xe1 0xf9 0x34 VLDB x1, [p0], m1; ADD.NC lc, r1, #-7
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11244 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11250 0x21 0x1b 0x70 0x50 0xe8 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11258 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11264 0x21 0x1b 0x70 0x50 0xe8 0x3c VLDA x3, [p1], m0; VLDB x1, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11270 0x01 0x08 0x9b 0x98 VLDA x2, [p1], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11274 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11280 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11286 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11296 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11306 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11316 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11326 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11336 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11346 0x21 0x1b 0x70 0x50 0x68 0x00 0x00 0x08 0x70 0x8c 0x00 0xe2 0x41 0x6e VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0
+.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11360 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0
+.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11376 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11392 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11400 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11408 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11416 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11424 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11432 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11440 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11448 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11452 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11458 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11462 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+ 11466 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64
+.delay_slot
+ 11470 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 11474 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0
+
+.text_segment PM 11488
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.function_start
+ 11488 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992
+ 11494 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID
+ 11500 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 11506 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2
+ 11516 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15
+ 11524 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4]
+ 11528 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16]
+ 11532 0x00 0x00 NOPX
+ 11534 0x80 0x16 0xd0 0x40 0x01 0x84 JNZ r16, #11680
+.delay_slot
+ 11540 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 11544 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+.delay_slot
+ 11550 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7
+.delay_slot
+ 11558 0x1b 0xd6 0xc0 0xf8 MOV r15, p3
+.delay_slot
+ 11562 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0xa0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #509248
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11572 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x34 0x11 0x3a MOVS p0, p7; MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11582 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11592 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11594 0x00 0x15 0x90 0x00 0x01 0x04 JL #11040
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11600 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11602 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11604 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 11608 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 11612 0x0a 0x06 0x11 0x98 ST r16, [p2]
+.return_address
+ 11616 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x28 0x10 0xba LDA r16, [p7]; MOVXM p1, #509008
+ 11626 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb2 0x2a 0x10 0xba LDA r17, [p1]; MOVXM p3, #509012
+ 11636 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #509020
+ 11646 0x00 0x00 NOPX
+ 11648 0x00 0x00 NOPX
+ 11650 0x00 0x00 NOPX
+ 11652 0x00 0x16 0xd8 0x00 0x00 0x84 J #11696
+.delay_slot
+ 11658 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024
+.delay_slot
+ 11664 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 11668 0x0a 0x06 0x51 0x98 ST r18, [p2]
+.delay_slot
+ 11672 0x0b 0x06 0x11 0x98 ST r16, [p3]
+.delay_slot
+ 11676 0x09 0x06 0x11 0x98 ST r16, [p1]
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192
+ 11680 0x00 0x07 0xc6 0xc8 0xa8 0x44 MOVXM p3, #509012
+ 11686 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba NOPA; MOVXM p1, #509020
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208
+ 11696 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12
+ 11700 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508992
+ 11710 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4
+ 11714 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4
+ 11718 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 11722 0x00 0x46 0x76 0x98 LDA r19, [p0, #16]
+ 11726 0x00 0x00 NOPX
+ 11728 0x00 0x00 NOPX
+ 11730 0x00 0x00 NOPX
+ 11732 0x00 0x00 NOPX
+ 11734 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 11738 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1
+ 11744 0x0a 0x06 0x11 0x98 ST r16, [p2]
+ 11748 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 11752 0x00 0x00 NOPX
+ 11754 0x00 0x00 NOPX
+ 11756 0x00 0x00 NOPX
+ 11758 0x14 0xd3 0x08 0x18 ACQ r19, r16
+ 11762 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12
+ 11766 0x00 0x00 NOPX
+ 11768 0x00 0x00 NOPX
+ 11770 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4
+ 11774 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+ 11778 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4
+ 11782 0x02 0x56 0x76 0x98 LDA r19, [p2, #20]
+ 11786 0x00 0x00 NOPX
+ 11788 0x00 0x00 NOPX
+ 11790 0x00 0x00 NOPX
+ 11792 0x00 0x00 NOPX
+ 11794 0x00 0x00 NOPX
+ 11796 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27
+ 11800 0x0a 0x06 0x31 0x98 ST r17, [p2]
+ 11804 0x00 0x00 NOPX
+ 11806 0x00 0x00 NOPX
+ 11808 0x00 0x00 NOPX
+ 11810 0x00 0x00 NOPX
+ 11812 0x14 0xd3 0x08 0x18 ACQ r19, r16
+ 11816 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6
+ 11826 0x00 0x00 NOPX
+ 11828 0x00 0x00 NOPX
+ 11830 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20]
+ 11834 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20]
+ 11840 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 11846 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 11850 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11854 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11858 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11860 0x04 0x06 0x76 0x98 LDA r19, [p4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11864 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11866 0x00 0x15 0xd8 0x00 0x01 0x04 JL #11184
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11872 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+.delay_slot
+ 11876 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16
+.delay_slot
+ 11880 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16
+.delay_slot
+ 11884 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16
+.delay_slot
+ 11888 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV
+.return_address
+ 11904 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15
+ 11914 0x00 0x07 0xcc 0xc8 0xc0 0x44 MOVXM p6, #509024
+ 11920 0x00 0x00 NOPX
+ 11922 0x00 0x00 NOPX
+ 11924 0x00 0x00 NOPX
+ 11926 0x00 0x00 NOPX
+ 11928 0x00 0x00 NOPX
+ 11930 0x14 0x51 0x08 0x18 REL r17, r16
+ 11934 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4]
+ 11938 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20]
+ 11942 0x00 0x00 NOPX
+ 11944 0x00 0x00 NOPX
+ 11946 0x00 0x00 NOPX
+ 11948 0x00 0x00 NOPX
+ 11950 0x00 0x00 NOPX
+ 11952 0x14 0x23 0x11 0x98 SUB r17, r16, r17
+ 11956 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4]
+ 11962 0x00 0x00 NOPX
+ 11964 0x00 0x00 NOPX
+ 11966 0x00 0x00 NOPX
+ 11968 0x00 0x00 NOPX
+ 11970 0x00 0x00 NOPX
+ 11972 0x00 0x00 NOPX
+ 11974 0x14 0x51 0x08 0x18 REL r17, r16
+ 11978 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb2 0x20 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508992
+ 11988 0x06 0x06 0x56 0x98 LDA r18, [p6]
+ 11992 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 11996 0x00 0x00 NOPX
+ 11998 0x00 0x00 NOPX
+ 12000 0x00 0x00 NOPX
+ 12002 0x00 0x00 NOPX
+ 12004 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 12008 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 12012 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 12016 0x80 0x17 0x88 0x40 0x01 0x84 JNZ r16, #12048
+.delay_slot
+.swstall delay_slot
+ 12022 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12024 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12026 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12028 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12030 0x00 0x00 NOPX
+ 12032 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 12036 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560
+ 12048 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16]
+ 12052 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+ 12056 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12]
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 12060 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 12062 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8]
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12066 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12068 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12070 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12074 0x0e 0x8e 0x0b 0x18 MOVS p6, r14
+.delay_slot
+ 12078 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 12084 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12086 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12088 0x00 0x00 NOPX
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0
+
+.text_segment PM 12096
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0
+.function_start
+ 12096 0x03 0x85 0xd0 0x00 0x01 0xf0 0xb3 0xc0 0x10 0xba LDA el0, [p0], #4; MOVXM p1, #509824
+ 12106 0x03 0x81 0xd0 0x01 0x00 0x4b 0x08 0x00 0x58 0xba LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0
+ 12116 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 12122 0xfe 0xf3 0xb0 0x00 0x01 0xf3 0xb3 0xc0 0x11 0x3a ST p7, [sp, #-12]; MOVXM p7, #509824
+ 12132 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4]
+ 12136 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8]
+ 12140 0x00 0x00 NOPX
+ 12142 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 12146 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 12150 0x00 0x04 0x2e 0x98 LDA el0, [p0]
+ 12154 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4]
+ 12158 0x00 0x00 NOPX
+ 12160 0x00 0x00 NOPX
+ 12162 0x00 0x00 NOPX
+ 12164 0x00 0x00 NOPX
+ 12166 0x00 0x00 NOPX
+ 12168 0x09 0x04 0x29 0x98 ST el0, [p1]
+ 12172 0x09 0x14 0x09 0x98 ST eh0, [p1, #4]
+ 12176 0x07 0x5e 0x2a 0x98 LDA.u8 r17, [p7], #5
+ 12180 0x07 0xee 0x4a 0x98 LDA.u8 r18, [p7], #-2
+ 12184 0x07 0xec 0x2a 0x98 LDA.u8 r1, [p7], #-2
+ 12188 0x00 0x00 NOPX
+ 12190 0x00 0x00 NOPX
+ 12192 0x00 0x00 NOPX
+ 12194 0x00 0x00 NOPX
+.no_stack_arguments
+ 12196 0x00 0x1b 0xc8 0x00 0x01 0x04 JL #14224
+.delay_slot
+ 12202 0xfc 0xca 0xb8 0xbe 0x43 0x5c ST r18, [sp, #-28]; SUB r15, r17, r18
+.delay_slot
+ 12208 0xfd 0x86 0xb0 0xc2 0x11 0x5c ST r1, [sp, #-20]; NE r16, r1, r16
+.delay_slot
+ 12214 0xfe 0x42 0xb7 0xef 0x15 0x5c ST r16, [sp, #-16]; LT r27, r15, r24
+.delay_slot
+ 12220 0x16 0x22 0xf1 0x98 SUB r17, r24, r15
+.delay_slot
+ 12224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x1e 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV
+.return_address
+ 12240 0xfd 0xd2 0x20 0x40 0x02 0x2c LDA r20, [sp, #-20]; MOVX r16, #0
+ 12246 0xe7 0xc5 0x58 0x48 0x43 0x2c LDA.u8 r17, [p7], #3; SUB r18, r16, r2
+ 12252 0x07 0xee 0x6a 0x98 LDA.u8 r19, [p7], #-2
+ 12256 0x07 0xec 0x31 0x18 LDA r1, [sp, #-20]
+ 12260 0x00 0x00 NOPX
+ 12262 0x00 0x00 NOPX
+ 12264 0x00 0x00 NOPX
+ 12266 0x13 0xe9 0x46 0x98 XOR r20, r15, r20
+ 12270 0x15 0x37 0x0a 0x98 LT r27, r20, r16
+ 12274 0xfd 0x4e 0xb8 0xc6 0x63 0x5c ST r19, [sp, #-24]; SUB r17, r17, r19
+.no_stack_arguments
+ 12280 0xfc 0x46 0xb0 0x00 0x06 0xf2 0x00 0x00 0x41 0x3a ST r17, [sp, #-32]; JL #14224
+.delay_slot
+ 12290 0x10 0xa9 0x22 0x18 SEL.EQZ r20, r2, r18, r27
+.delay_slot
+ 12294 0x14 0x77 0x0a 0x98 LT r27, r17, r16
+.delay_slot
+ 12298 0x14 0x25 0x11 0x98 SUB r18, r16, r17
+.delay_slot
+ 12302 0x15 0x26 0x70 0x18 EXTEND.s16 r19, r20
+.delay_slot
+ 12306 0x00 0x2c 0xf0 0x00 0x24 0x41 0x22 0x3d 0x98 0x09 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1
+.return_address
+ 12320 0xfc 0x0e 0x20 0x3f 0x37 0xc8 0x00 0x42 0x58 0xba LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66
+ 12330 0xfd 0xc2 0x20 0x01 0x80 0x08 0x29 0xfc 0x58 0xba LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508
+ 12340 0xfc 0xda 0x20 0x00 0x60 0x88 0x88 0x02 0x58 0xba LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2
+ 12350 0xe1 0x45 0x50 0x00 0x51 0x0b 0x88 0x17 0x58 0xba LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23
+ 12360 0xfd 0x56 0x20 0x3f 0x27 0x48 0x80 0x20 0x58 0xba LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32
+ 12370 0xfe 0x7a 0x20 0x01 0x70 0xcb 0x48 0x01 0x58 0xba LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1
+ 12380 0xe9 0xc0 0x80 0x05 0xd0 0x0b 0xef 0xc0 0x58 0xba MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64
+ 12390 0x16 0x28 0x21 0x98 SUB r20, r24, r2
+ 12394 0x10 0xc7 0x06 0x98 XOR r3, r3, r16
+ 12398 0x1e 0xf1 0x50 0x36 0x02 0x24 LT r27, r3, r24; ADD.NC r0, r22, #2
+ 12404 0x15 0x28 0x4b 0x3f 0xf5 0x64 SEL.EQZ r20, r2, r20, r27; MOV r22, #-3
+ 12410 0x78 0xe1 0xf1 0x20 0x1d 0x64 MUL r3, r15, r16; MOV r2, #7
+ 12416 0x15 0x28 0x70 0x18 EXTEND.s16 r20, r20
+ 12420 0x08 0x00 0x90 0xa0 0x01 0x24 AND r0, r1, r0; ADD.NC r1, r0, #1
+ 12426 0x0c 0xe7 0xbd 0xb4 0x01 0x24 LSHL r19, r1, r19; ADD.NC r27, r20, #1
+ 12432 0x7d 0x0d 0xb0 0xa3 0x02 0xa4 LSHL r20, r15, r6; ADD.NC r1, r3, r0
+ 12438 0x09 0xcd 0xb0 0x35 0xff 0x24 LSHL r7, r1, r6; ADD.NC r0, r21, #-1
+ 12444 0x16 0xcd 0x0f 0x98 MUL r6, r27, r16
+ 12448 0x13 0xdf 0x1f 0x98 MUL r15, r15, r17
+ 12452 0x9d 0x6b 0xf9 0xb3 0xff 0x24 MUL r21, r19, r21; ADD.NC r19, r19, #-1
+ 12458 0x11 0x37 0x07 0x98 EQ r27, r4, r16
+ 12462 0xff 0xd6 0x37 0x90 0xdf 0x5c ST r21, [p7], #-4; MUL r4, r15, r6
+ 12468 0x17 0x38 0x52 0x18 SEL.EQZ r28, r28, r5, r27
+ 12472 0x11 0x25 0x2d 0x98 LSHL r18, r4, r18
+ 12476 0xe5 0x4a 0x38 0xc8 0x3f 0x5c ST r18, [p7], m1; MUL r18, r17, r1
+ 12482 0xf9 0xf2 0x3f 0x72 0xfb 0x5c ST r28, [p7], #-16; LSHL r28, r30, r23
+ 12488 0xed 0xf2 0x39 0x70 0x1f 0x5c ST r28, [p7], #24; MUL r28, r18, r0
+ 12494 0xe3 0xce 0x39 0xce 0xfb 0x5c ST r19, [p7], #4; LSHL r19, r19, r23
+ 12500 0xe7 0x35 0xb9 0xb3 0xea 0xa4 LSHL r28, r28, r26; ADD.NC r19, r19, r29
+ 12506 0xe3 0xfe 0x39 0x7b 0x5b 0x5c ST r31, [p7], #4; LSHL r30, r18, r26
+ 12512 0x94 0x21 0xf9 0x33 0xe2 0xa4 MUL r16, r18, r16; ADD.NC r18, r19, r28
+ 12518 0xe3 0x82 0x3f 0xf3 0x04 0x5c ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27
+ 12524 0x10 0xff 0x6d 0x98 LSHL r31, r3, r22
+ 12528 0xf0 0x66 0x39 0xbf 0xff 0x24 SUB r1, r30, r19; ADD.NC r19, r31, #-1
+ 12534 0xe3 0x86 0x38 0xc6 0xdb 0x5c ST r1, [p7], #4; LSHL r17, r17, r22
+ 12540 0xc5 0xa4 0x39 0x31 0xff 0x24 SUB r22, r24, r18; ADD.NC r18, r17, #-1
+ 12546 0xe3 0xda 0x33 0xdb 0xc3 0x5c ST r22, [p7], #4; SUB r22, r7, r30
+ 12552 0xe3 0xca 0x38 0x43 0x5b 0x5c ST r18, [p7], #4; LSHL r16, r16, r26
+ 12558 0xe3 0x9e 0x39 0xfc 0x5b 0x5c ST r7, [p7], #4; LSHL r31, r19, r2
+ 12564 0xe3 0xce 0x3e 0xda 0xc1 0x5c ST r19, [p7], #4; ADD r22, r29, r22
+ 12570 0x3c 0x20 0x1e 0xbf 0xf2 0xa4 ADD r16, r7, r16; ADD.NC r29, r31, r30
+ 12576 0xe3 0xda 0x38 0x43 0xa3 0x5c ST r22, [p7], #4; SUB r16, r16, r29
+ 12582 0xe3 0xc2 0x30 0x1f 0x6d 0x6e 0x0f 0xff 0x59 0x3a ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1
+ 12592 0xe3 0xca 0x3e 0x6a 0x81 0x5c ST r18, [p7], #4; ADD r26, r28, r20
+ 12598 0xe3 0xea 0x3a 0x52 0xc3 0x5c ST r26, [p7], #4; SUB r20, r20, r22
+ 12604 0x08 0x11 0x07 0x1e 0x71 0xab 0x08 0xb2 0x6d 0x10 0x08 0x76 MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64
+ 12616 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4
+ 12620 0xe3 0xc6 0x38 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r16, r23
+ 12626 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20
+ 12632 0xe3 0xc6 0x39 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r18, r23
+ 12638 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20
+ 12644 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4
+ 12648 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4
+ 12652 0x0f 0x0a 0x11 0x98 ST r16, [p7], m0
+ 12656 0x07 0x06 0x0a 0x98 LDA.u8 r16, [p7]
+ 12660 0x00 0x00 NOPX
+ 12662 0x00 0x00 NOPX
+ 12664 0x00 0x00 NOPX
+ 12666 0x00 0x00 NOPX
+ 12668 0x00 0x00 NOPX
+ 12670 0x00 0x00 NOPX
+ 12672 0x80 0x18 0xd0 0x00 0x01 0x84 JZ r16, #12704
+.delay_slot
+ 12678 0x19 0x3b 0x60 0xf8 MOV vaddSign0, crMCDEn
+.delay_slot
+ 12682 0xff 0x7f 0x09 0xa0 0x00 0x44 MOVXM r19, #-8454144
+.delay_slot
+.swstall delay_slot
+ 12688 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12690 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12692 0x00 0x00 NOPX
+ 12694 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x00 0x26 0x01 0x7a NOPA; NOPS; MOVX r19, #0
+.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608
+ 12704 0xff 0x87 0x20 0x00 0x01 0xf0 0x32 0x34 0x10 0xba LDA lr, [sp, #-4]; MOVXM p0, #509032
+ 12714 0x00 0xc0 0x50 0x04 0xe2 0xd4 LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19
+ 12720 0xfe 0x83 0x21 0x02 0xe9 0x54 LDA p0, [sp, #-12]; MOV dj0, #186
+ 12726 0xff 0x3e 0x20 0x01 0x25 0xd4 LDA r15, [sp, #-8]; VMOV bmll0, x0
+ 12732 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+ 12738 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 12740 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 12742 0x07 0x02 0x17 0x18 ST.s16 r16, [p7, dj0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 12746 0x05 0x00 0x0f 0x70 0x41 0xe4 RET lr; MOV crRnd, r16
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 12752 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 12756 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12760 0x1c 0x01 0x01 0xb8 VEXTRACT.16 r16, x0, #0, vaddSign0
+.delay_slot
+.swstall delay_slot
+ 12764 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12766 0x00 0x00 NOPX
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.function_start
+ 12768 0x1c 0x56 0xc0 0xf8 MOV r17, p3
+ 12772 0x20 0x93 0xde 0x01 0xa9 0x54 LDA p1, [p1]; MOV m7, #106
+ 12778 0x00 0x83 0xd6 0xd1 0x02 0x14 LDA p0, [p0]; ADD.NC p3, r17, #2
+ 12784 0x03 0xe8 0x8a 0x98 LDA.u8 r4, [p3], m7
+ 12788 0x03 0xfd 0x46 0x98 LDA dj2, [p3], #-4
+ 12792 0x03 0x3d 0x26 0x98 LDA dn2, [p3], #12
+ 12796 0x03 0xff 0x46 0x98 LDA dj6, [p3], #-4
+ 12800 0x03 0x2f 0x26 0x98 LDA dn6, [p3], #8
+ 12804 0x03 0x2d 0x06 0x98 LDA m2, [p3], #8
+ 12808 0x03 0xfc 0x46 0x98 LDA dj0, [p3], #-4
+ 12812 0x03 0x3c 0x26 0x98 LDA dn0, [p3], #12
+ 12816 0x03 0xfe 0x46 0x98 LDA dj4, [p3], #-4
+ 12820 0x03 0x2e 0x26 0x98 LDA dn4, [p3], #8
+ 12824 0x03 0x2c 0x06 0x98 LDA m0, [p3], #8
+ 12828 0x03 0xfc 0xc6 0x98 LDA dj1, [p3], #-4
+ 12832 0x03 0x3c 0xa6 0x98 LDA dn1, [p3], #12
+ 12836 0x03 0xfe 0xc6 0x98 LDA dj5, [p3], #-4
+ 12840 0x03 0x2e 0xa6 0x98 LDA dn5, [p3], #8
+ 12844 0x03 0x2c 0x86 0x98 LDA m1, [p3], #8
+ 12848 0x03 0xff 0xc6 0x98 LDA dj7, [p3], #-4
+ 12852 0x03 0x2f 0xa6 0x98 LDA dn7, [p3], #8
+ 12856 0x65 0xf0 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m7, [p3], #8; MOVXM p4, #509032
+ 12866 0x80 0x98 0x58 0xc5 0x81 0xd4 LDA.s8 r6, [p4]; MOV p4, p1
+ 12872 0x1b 0x0f 0x10 0xb8 MOV m3, #-120
+ 12876 0x80 0x85 0x70 0x3b 0x68 0x00 0x20 0x6a 0x60 0x00 0x58 0xb6 VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0
+ 12888 0x7f 0xb8 0xd0 0x38 0xe9 0x04 0x2d 0xe0 0x10 0x0b 0x62 0x09 0x60 0x7e LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128
+ 12902 0x65 0xb4 0xd1 0x0c 0x4b 0x02 0x80 0x90 0x72 0xba LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2
+ 12912 0x6d 0x30 0xd1 0xab 0x90 0x03 0xe1 0xc0 0x7e 0xba LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1
+ 12922 0x79 0x0a 0xd1 0xf0 0xf4 0x02 0x07 0x90 0x5e 0xba LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112
+ 12932 0x71 0x1e 0x50 0x00 0x82 0x2c LDA.s16 r7, [p3], m4; MOVX r0, #16
+ 12938 0x69 0xc0 0xd6 0x10 0x4b 0x00 0x00 0x0c 0x79 0x78 0x10 0x76 LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13040
+ 12950 0x72 0x92 0xd2 0x10 0x4b 0x00 0x00 0x0d 0xb9 0xa8 0x10 0x76 LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13136
+ 12962 0x0b 0x16 0x84 0x61 0x05 0xb4 VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0
+ 12968 0x1b 0x00 0x8a 0xf8 VMOV cml3, cml0
+ 12972 0x60 0x96 0xd0 0x00 0x00 0x0d 0xb1 0xc8 0x10 0xba LDA r5, [p3]; MOVXM p3, #13200
+ 12982 0x00 0x2c 0xf0 0x00 0x14 0x0a 0x8e 0x01 0xa8 0xba NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0
+ 12992 0x07 0x91 0x00 0x00 0x20 0x01 0x5b 0x00 0x36 0x08 0x0e 0xb9 0x78 0x00 0x00 0xe1 MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV
+ 13008 0x00 0x2c 0xf0 0x00 0x20 0x10 0x4b 0x0d 0xd4 0x02 0x0e 0x03 0xac 0x63 0x6a 0x0b NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13024 0x40 0xa3 0xd0 0x00 0x25 0x10 0x4b 0x04 0x2f 0xda 0xb9 0x3f 0xcc 0x48 0x1a 0x0b LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13040 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13050 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13060 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13070 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13074 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13082 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13090 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3
+ 13094 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17
+ 13102 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17
+ 13110 0x00 0x2c 0xf0 0x00 0x10 0x01 0x18 0x41 0x6e 0xba NOPA; NOPB; VSHIFT x4, x6, x1, r0
+ 13120 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x63 0x6a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 13136 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17
+.loop_nesting 0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13152 0x03 0x0c 0xf4 0x73 0x90 0x02 0x84 0x81 0x6e 0xba PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13162 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13170 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13178 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13182 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13190 0x00 0x2c 0xf4 0xb0 0x8e 0xc2 0x8a 0x36 0xa1 0x4a NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17
+.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432
+.loop_nesting 1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13200 0x3e 0x1e 0x8b 0x12 0x1d 0xb4 VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13206 0x00 0x00 0x01 0xb7 0x44 0x02 0x8b 0x92 0xe1 0x5a MOVXM le, #13376; VMAC.f dm3, dm4, x9, x7, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13216 0x80 0x85 0x70 0x00 0x01 0x8f 0x3f 0x02 0x88 0x56 0xe1 0x46 VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13296; VMAC.f dm0, dm2, x11, x7, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13228 0x1d 0x72 0x7f 0x98 ADD.NC lc, r4, #-1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13232 0x00 0x1d 0x9b 0x98 VLDA x6, [p0], #64
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13236 0x38 0x1c 0x74 0x18 VLDB x1, [p0], #64
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13240 0x38 0x58 0xb4 0x18 VLDB.3D x2, [p0], d2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13244 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13248 0x0b 0x10 0x16 0x18 VCONV.bf16.fp32 x6, cml0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13252 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13256 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13260 0x51 0x42 0x60 0x02 0xa8 0x36 0x70 0x02 VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13268 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13272 0x47 0x52 0x60 0x01 0x80 0x45 0x70 0x02 VST.3D x10, [p2], d1; VMOV cml3, cml0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13280 0x04 0x1c 0x07 0x46 0x8c 0x6d 0x41 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13288 0x02 0x30 0x82 0xc6 0x89 0x03 0x41 0x62 VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528
+.loop_nesting 2
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 13296 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13306 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13316 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13326 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13330 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13338 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13346 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3
+ 13350 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17
+ 13358 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17
+ 13366 0x00 0x2c 0xf4 0x61 0x05 0x94 NOPA; VSHIFT x4, x6, x1, r0
+ 13372 0x8c 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r17
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608
+.end_of_loop
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 13376 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17
+.loop_nesting 1
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13392 0x03 0x0c 0xf8 0xe7 0x20 0x04 0x27 0x02 0x84 0x81 0x68 0xb6 PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13404 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13412 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13420 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13424 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13432 0x04 0xb0 0x8e 0xc6 0x8a 0x36 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17
+.loop_nesting 0
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13440 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3
+ 13444 0x8b 0x92 0xe1 0x48 VMAC.f dm3, dm4, x9, x7, r17
+ 13448 0x88 0x56 0xe1 0x48 VMAC.f dm0, dm2, x11, x7, r17
+ 13452 0x00 0x00 NOPX
+ 13454 0x00 0x00 NOPX
+ 13456 0x00 0x00 NOPX
+ 13458 0x00 0x00 NOPX
+ 13460 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3
+ 13464 0x62 0x02 0xc0 0x50 0x00 0x5c VCONV.bf16.fp32 x6, cml0; RET lr
+.delay_slot
+ 13470 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0
+.delay_slot
+ 13474 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5
+.delay_slot
+ 13478 0x1d 0x50 0x6c 0xf8 VMAX_LT.bf16 x10, r16, x10, x0
+.delay_slot
+ 13482 0x0a 0x8a 0x13 0x18 VST x8, [p2], m4
+.delay_slot
+ 13486 0x0a 0x3a 0x93 0x18 VST.3D x10, [p2], d1
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0
+
+.text_segment PM 13504
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 13504 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992
+ 13510 0x80 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p4]; MOV r0, r15
+ 13516 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+ 13522 0xff 0x3a 0xb0 0x02 0x2d 0x70 0x70 0x02 ST r14, [sp, #-8]; MOV r17, CORE_ID
+ 13530 0xff 0xb6 0xb0 0x01 0xa8 0xf0 0x70 0x02 ST r13, [sp, #-4]; MOV r13, lr
+ 13538 0x0f 0xec 0x1d 0x98 ST p0, [sp, #-20]
+ 13542 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12]
+ 13546 0xfe 0x02 0xb0 0x01 0xca 0x60 0x70 0x02 ST r0, [sp, #-16]; MOV r14, p2
+ 13554 0x80 0x1a 0xb8 0x40 0x01 0x84 JNZ r16, #13680
+.delay_slot
+ 13560 0x1b 0xd6 0xc0 0xf8 MOV r15, p3
+.delay_slot
+ 13564 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 13568 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 13572 0x00 0x07 0xc6 0xc8 0xa0 0x44 MOVXM p3, #509008
+.delay_slot
+ 13578 0x0b 0x06 0x31 0x98 ST r17, [p3]
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13582 0xf0 0x91 0x60 0x00 0x01 0xf0 0xb2 0x34 0x11 0x3a MOVS p7, p1; MOVXM p1, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13592 0x20 0xc0 0xe0 0x88 0x8b 0x00 0x01 0xf0 0xb2 0x32 0x10 0x76 ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13604 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 13606 0x00 0x17 0xa0 0x00 0x01 0x04 JL #12096
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13612 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13614 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13616 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 13620 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 13624 0x20 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p1]; NOPM
+.return_address
+ 13632 0x33 0x91 0x60 0x01 0x33 0x82 0x00 0x02 MOVS p1, p7; ADD.NC p2, r14, #8
+ 13640 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2]
+ 13644 0x44 0xc3 0x50 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA.u16 r16, [p2, #4]; MOVXM p2, #509024
+ 13654 0x00 0x00 NOPX
+ 13656 0x00 0x1a 0xc0 0x00 0x00 0x84 J #13696
+.delay_slot
+ 13662 0x00 0x07 0xc6 0xc8 0xb0 0x44 MOVXM p3, #509016
+.delay_slot
+.swstall delay_slot
+ 13668 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 13670 0x00 0x00 NOPX
+.delay_slot
+ 13672 0x0b 0x06 0x31 0x98 ST r17, [p3]
+.delay_slot
+ 13676 0x0a 0x06 0x11 0x98 ST r16, [p2]
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176
+ 13680 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0xb2 0x2c 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192
+ 13696 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12
+ 13700 0x5f 0xee 0xd0 0x00 0x01 0xf2 0x32 0x28 0x10 0xba LDA r27, [p2], #-4; MOVXM p4, #509008
+ 13710 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4
+ 13714 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+ 13718 0x02 0x46 0x56 0x98 LDA r18, [p2, #16]
+ 13722 0x00 0x00 NOPX
+ 13724 0x00 0x00 NOPX
+ 13726 0x00 0x00 NOPX
+ 13728 0x00 0x00 NOPX
+ 13730 0x00 0x00 NOPX
+ 13732 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 13736 0x0a 0x06 0x11 0x98 ST r16, [p2]
+ 13740 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 13744 0x00 0x00 NOPX
+ 13746 0x00 0x00 NOPX
+ 13748 0x00 0x00 NOPX
+ 13750 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 13754 0x00 0x2f 0x00 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba MOVA r15, #1; MOVXM p7, #508992
+ 13764 0x06 0x00 0x28 0x2b 0xc1 0xe4 MOVX r24, #0; MOV r16, sp
+ 13770 0x18 0x68 0x5a 0x18 ADD.NC p0, r16, #-76
+ 13774 0xfd 0xd3 0x27 0x29 0x81 0xd4 LDA p5, [sp, #-20]; MOV r14, p2
+ 13780 0x04 0x06 0x36 0x98 LDA r17, [p4]
+ 13784 0x60 0xc2 0xd0 0x00 0x01 0xf1 0xb3 0xc0 0x10 0xba LDA r16, [p3]; MOVXM p3, #509824
+ 13794 0x07 0x06 0x56 0x98 LDA r18, [p7]
+ 13798 0x00 0x00 NOPX
+ 13800 0x00 0x00 NOPX
+ 13802 0x00 0x00 NOPX
+ 13804 0x05 0x06 0x76 0x98 LDA r19, [p5]
+ 13808 0x00 0x00 NOPX
+ 13810 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+ 13814 0x14 0xa2 0x07 0x18 ADD r17, r18, #1
+ 13818 0x14 0x20 0xfd 0x98 LSHL r16, r16, r15
+.no_stack_arguments
+ 13822 0x00 0x18 0xf0 0x00 0x01 0x04 JL #12768
+.delay_slot
+ 13828 0x0f 0x06 0x31 0x98 ST r17, [p7]
+.delay_slot
+ 13832 0x18 0x49 0xc1 0x58 ADD.NC dn0, r19, r16
+.delay_slot
+ 13836 0x0f 0xb4 0x25 0x98 ST dn0, [sp, #-76]
+.delay_slot
+ 13840 0x0f 0xbb 0x15 0x98 ST r24, [sp, #-72]
+.delay_slot
+ 13844 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xdf 0x8a 0xc1 0x36 NOPA; NOPB; ST r24, [sp, #-68]; NOPX
+.return_address
+ 13856 0x1a 0x67 0x0a 0x18 ADD.NC p2, r14, #20
+ 13860 0x02 0x06 0x16 0x98 LDA r16, [p2]
+ 13864 0x00 0x00 NOPX
+ 13866 0x00 0x00 NOPX
+ 13868 0x00 0x00 NOPX
+ 13870 0x00 0x00 NOPX
+ 13872 0x00 0x00 NOPX
+ 13874 0x00 0x00 NOPX
+ 13876 0x14 0x10 0xf8 0x18 REL r16, r15
+ 13880 0x5c 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x30 0x10 0xba LDA r16, [p2, #-8]; MOVXM p1, #509024
+ 13890 0x01 0x06 0x56 0x98 LDA r18, [p1]
+ 13894 0x07 0x06 0x36 0x98 LDA r17, [p7]
+ 13898 0x07 0xf4 0x99 0x18 LDA p1, [sp, #-12]
+ 13902 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8]
+ 13906 0x00 0x00 NOPX
+ 13908 0x00 0x00 NOPX
+ 13910 0x13 0xe1 0x01 0x98 SUB r16, r15, r16
+ 13914 0x0a 0xe6 0x11 0x98 ST r16, [p2, #-8]
+ 13918 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 13922 0x80 0x1b 0x40 0x40 0x01 0x84 JNZ r16, #13952
+.delay_slot
+ 13928 0x10 0x30 0x01 0x18 MOVX r24, #0
+.delay_slot
+.swstall delay_slot
+ 13932 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 13934 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 13936 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 13938 0x00 0x00 NOPX
+ 13940 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 13952 0xff 0xb6 0x2e 0xed 0x41 0xd4 LDA r13, [sp, #-4]; MOV lr, r13
+ 13958 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16]
+ 13962 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 13966 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 13972 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 13974 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 13976 0x00 0x00 NOPX
+.delay_slot
+ 13978 0x1f 0x62 0xc0 0xf8 MOV p7, p1
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 13984
+.label __Z13_b896_wrapperPPv___func_begin0
+.label _Z13_b896_wrapperPPv
+.function_start
+ 13984 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+ 13988 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4
+ 13992 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8
+ 13996 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4]
+ 14000 0x02 0x05 0x1e 0x98 LDA p2, [p2]
+.tail_call
+ 14004 0x00 0x0d 0x70 0x00 0x00 0x84 J #6880
+.delay_slot
+.swstall delay_slot
+ 14010 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14012 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14014 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14016 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14018 0x00 0x00 NOPX
+.label _Z13_b896_wrapperPPv__end
+.label __Z13_b896_wrapperPPv___func_end0
+
+.text_segment PM 14032
+.label __Z13_b901_wrapperPPv___func_begin0
+.label _Z13_b901_wrapperPPv
+.function_start
+ 14032 0x19 0x60 0xc0 0xf8 MOV p1, p0
+ 14036 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8
+ 14040 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4]
+ 14044 0x01 0x04 0x9e 0x98 LDA p1, [p1]
+.tail_call
+ 14048 0x00 0x10 0x18 0x00 0x00 0x84 J #8240
+.delay_slot
+.swstall delay_slot
+ 14054 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14056 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14058 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14060 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14062 0x00 0x00 NOPX
+.label _Z13_b901_wrapperPPv__end
+.label __Z13_b901_wrapperPPv___func_end0
+.label __Z13_b906_wrapperPPv___func_begin0
+.label _Z13_b906_wrapperPPv
+.function_start
+ 14064 0x19 0x60 0xc0 0xf8 MOV p1, p0
+ 14068 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8
+ 14072 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4]
+ 14076 0x01 0x04 0x9e 0x98 LDA p1, [p1]
+.tail_call
+ 14080 0x00 0x11 0xc8 0x00 0x00 0x84 J #9104
+.delay_slot
+.swstall delay_slot
+ 14086 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14088 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14090 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14092 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14094 0x00 0x00 NOPX
+.label _Z13_b906_wrapperPPv__end
+.label __Z13_b906_wrapperPPv___func_end0
+.label __Z13_b881_wrapperPPv___func_begin0
+.label _Z13_b881_wrapperPPv
+.function_start
+ 14096 0x19 0x60 0xc0 0xf8 MOV p1, p0
+ 14100 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8
+ 14104 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4]
+ 14108 0x01 0x04 0x9e 0x98 LDA p1, [p1]
+.tail_call
+ 14112 0x00 0x14 0x88 0x00 0x00 0x84 J #10512
+.delay_slot
+.swstall delay_slot
+ 14118 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14120 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14122 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14124 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14126 0x00 0x00 NOPX
+.label _Z13_b881_wrapperPPv__end
+.label __Z13_b881_wrapperPPv___func_end0
+.label __Z13_b891_wrapperPPv___func_begin0
+.label _Z13_b891_wrapperPPv
+.function_start
+ 14128 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+ 14132 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12
+ 14136 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8
+ 14140 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4]
+ 14144 0x02 0x05 0x1e 0x98 LDA p2, [p2]
+.tail_call
+ 14148 0x00 0x16 0x70 0x00 0x00 0x84 J #11488
+.delay_slot
+.swstall delay_slot
+ 14154 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14156 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14158 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14160 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14162 0x00 0x00 NOPX
+.label _Z13_b891_wrapperPPv__end
+.label __Z13_b891_wrapperPPv___func_end0
+
+.text_segment PM 14176
+.label __Z13_b919_wrapperPPv___func_begin0
+.label _Z13_b919_wrapperPPv
+.function_start
+ 14176 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+ 14180 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4
+ 14184 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8
+ 14188 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4]
+ 14192 0x02 0x05 0x1e 0x98 LDA p2, [p2]
+.tail_call
+ 14196 0x00 0x1a 0x60 0x00 0x00 0x84 J #13504
+.delay_slot
+.swstall delay_slot
+ 14202 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14204 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14206 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14208 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14210 0x00 0x00 NOPX
+.label _Z13_b919_wrapperPPv__end
+.label __Z13_b919_wrapperPPv___func_end0
+
+.text_segment PM 14224
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function_start
+ 14224 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0
+ 14230 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14234 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14238 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14242 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14246 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14250 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14254 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14258 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14262 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14266 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14270 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14274 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14278 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14282 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14286 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14290 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14294 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14298 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14302 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14306 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14310 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14314 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14318 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14322 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14326 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14330 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14334 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14338 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 14342 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 14346 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 14350 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 14354 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 14358 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 14362 0x18 0x9f 0xa0 0xf8 MOV r2, r31
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+
+.bss_segment DMb 508992 32
+
+.data_segment DMb 509024
+.label _ZL8num_iter
+ 0x1
+ 0x0
+ 0x0
+ 0x0
+
+.bss_segment DMb 509028 4
+
+.bss_segment DMb 509032 1
+
+.rodata_segment DMb 509056
+.label _ZL20g_uniformKernelFuncs
+ 0xa0
+ 0x36
+ 0x0
+ 0x0
+ 0xd0
+ 0x36
+ 0x0
+ 0x0
+ 0xf0
+ 0x36
+ 0x0
+ 0x0
+ 0x10
+ 0x37
+ 0x0
+ 0x0
+ 0x30
+ 0x37
+ 0x0
+ 0x0
+ 0x60
+ 0x37
+ 0x0
+ 0x0
+
+.bss_segment DMb 509120 960
+
+.stack DM_stack 506560 508928
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.map
new file mode 100644
index 0000000000000000000000000000000000000000..9dbbb173fa1fb48bf48811e71fa4be69d5d5295a
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.map
@@ -0,0 +1,287 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:02 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable3 ../Release/0_0_reloadable3.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable3.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3342 -pme
+
+// Release: ipp V-2024.06-TGT-241219
+
+Memory map for memory 'DM_stack':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 2368
+
+ 0x0007bac0..0x0007c3ff ( 2368 items) : Stack
+
+Memory map for memory 'DMb':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 3393
+
+ 0x00000000..0x0007babf ( 506560 items) : Reserved
+ 0x0007bac0..0x0007c3ff ( 2368 items) : Stack
+ 0x0007c400..0x0007c43f ( 64 items) : Reserved
+ 0x0007c440..0x0007c443 ( 4 items) : ../Release/0_0_reloadable3.o::_ZL9curr_iter (Data, Local, .bss.DMb.4)
+ 0x0007c444..0x0007c447 ( 4 items) : ../Release/0_0_reloadable3.o::_ZL14num_depth_iter (Data, Local, .bss.DMb.4)
+ 0x0007c448..0x0007c44b ( 4 items) : ../Release/0_0_reloadable3.o::_ZL10depth_iter (Data, Local, .bss.DMb.4)
+ 0x0007c44c..0x0007c44f ( 4 items) : ../Release/0_0_reloadable3.o::_ZL11total_iters (Data, Local, .bss.DMb.4)
+ 0x0007c450..0x0007c453 ( 4 items) : ../Release/0_0_reloadable3.o::_ZL8core_row (Data, Local, .bss.DMb.4)
+ 0x0007c454..0x0007c457 ( 4 items) : ../Release/0_0_reloadable3.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4)
+ 0x0007c458..0x0007c45b ( 4 items) : ../Release/0_0_reloadable3.o::_ZL10ifmsv_size (Data, Local, .bss.DMb.4)
+ 0x0007c45c..0x0007c45f ( 4 items) : ../Release/0_0_reloadable3.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4)
+ 0x0007c460..0x0007c463 ( 4 items) : ../Release/0_0_reloadable3.o::_ZL8num_iter (Data, Local, .data.DMb.4)
+ 0x0007c464..0x0007c467 ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4)
+ 0x0007c468..0x0007c468 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1)
+ 0x0007c480..0x0007c497 ( 24 items) : ../Release/0_0_reloadable3.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64)
+
+ Called functions : _Z13_b896_wrapperPPv
+ _Z13_b901_wrapperPPv
+ _Z13_b906_wrapperPPv
+ _Z13_b881_wrapperPPv
+ _Z13_b891_wrapperPPv
+ _Z13_b919_wrapperPPv
+
+ 0x0007c4c0..0x0007c4ff ( 64 items) : ../Release/0_0_reloadable3.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64)
+ 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable3.o::mul1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64)
+ 0x0007c540..0x0007c57f ( 64 items) : ../Release/0_0_reloadable3.o::mul1d_params (Data, Global, .bss.DMb.64)
+ 0x0007c580..0x0007c5bf ( 64 items) : ../Release/0_0_reloadable3.o::clip1d_params (Data, Global, .bss.DMb.64)
+ 0x0007c5c0..0x0007c77f ( 448 items) : ../Release/0_0_reloadable3.o::conv2d_params (Data, Global, .bss.DMb.64)
+ 0x0007c780..0x0007c87f ( 256 items) : ../Release/0_0_reloadable3.o::conv2d_dw_params (Data, Global, .bss.DMb.64)
+ 0x0007ccc0..0x000fffff ( 537408 items) : Reserved
+
+Memory map for memory 'PM':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 11754
+
+ 0x00000000..0x0000092f ( 2352 items) : Reserved
+ 0x00000930..0x00000ab5 ( 390 items) : ../Release/0_0_reloadable3.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64)
+
+ Referenced symbols: _ZL20g_uniformKernelFuncs
+
+ 0x00000ac0..0x00001055 ( 1430 items) : ../Release/0_0_reloadable3.o::_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (Function, Weak, .text) (stack frame size = 64)
+ 0x00001060..0x0000116d ( 270 items) : ../Release/0_0_reloadable3.o::_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001170..0x00001ad9 ( 2410 items) : ../Release/0_0_reloadable3.o::_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (Function, Weak, .text) (stack frame size = 128)
+
+ Called functions : _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001ae0..0x00001d17 ( 568 items) : ../Release/0_0_reloadable3.o::_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+
+ Referenced symbols: _ZL9curr_iter
+ conv2d_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL14num_depth_iter
+ _ZL8num_iter
+ _ZL10depth_iter
+ _ZL11total_iters
+
+ 0x00001d20..0x00001d37 ( 24 items) : ../Release/0_0_reloadable3.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00001d40..0x00001de1 ( 162 items) : ../Release/0_0_reloadable3.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+
+ 0x00001df0..0x00001e27 ( 56 items) : ../Release/0_0_reloadable3.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00001e30..0x00001e6d ( 62 items) : ../Release/0_0_reloadable3.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+
+ 0x00001e70..0x00001fa9 ( 314 items) : ../Release/0_0_reloadable3.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001fb0..0x00002021 ( 114 items) : ../Release/0_0_reloadable3.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128)
+
+ Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+
+ 0x00002030..0x00002217 ( 488 items) : ../Release/0_0_reloadable3.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ add1d_attribute_broadcasting_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL8num_iter
+
+ 0x00002220..0x00002283 ( 100 items) : ../Release/0_0_reloadable3.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0)
+ 0x00002290..0x00002381 ( 242 items) : ../Release/0_0_reloadable3.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00002390..0x00002577 ( 488 items) : ../Release/0_0_reloadable3.o::_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+ _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ clip1d_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL8num_iter
+
+ 0x00002580..0x000025f3 ( 116 items) : ../Release/0_0_reloadable3.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0)
+ 0x00002600..0x00002649 ( 74 items) : ../Release/0_0_reloadable3.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+
+ 0x00002650..0x00002865 ( 534 items) : ../Release/0_0_reloadable3.o::_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (Function, Local, .text) (stack frame size = 128)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00002870..0x00002905 ( 150 items) : ../Release/0_0_reloadable3.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+
+ 0x00002910..0x00002af7 ( 488 items) : ../Release/0_0_reloadable3.o::_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ mul1d_attribute_broadcasting_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL8num_iter
+
+ 0x00002b00..0x00002b17 ( 24 items) : ../Release/0_0_reloadable3.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00002b20..0x00002ba9 ( 138 items) : ../Release/0_0_reloadable3.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+
+ 0x00002bb0..0x00002cd3 ( 292 items) : ../Release/0_0_reloadable3.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00002ce0..0x00002f39 ( 602 items) : ../Release/0_0_reloadable3.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ mul1d_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL11ifm2_offset
+ _ZL8num_iter
+
+ 0x00002f40..0x000031df ( 672 items) : ../Release/0_0_reloadable3.o::_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh (Function, Local, .text) (stack frame size = 64)
+
+ Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_
+
+ Referenced symbols: conv2d_dw_params
+ _ZN12me_primitive11control_rndE
+
+ 0x000031e0..0x000034b1 ( 722 items) : ../Release/0_0_reloadable3.o::_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x000034c0..0x0000369d ( 478 items) : ../Release/0_0_reloadable3.o::_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128)
+
+ Called functions : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+ _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL8num_iter
+ _ZL10ifmsv_size
+ conv2d_dw_params
+
+ 0x000036a0..0x000036c3 ( 36 items) : ../Release/0_0_reloadable3.o::_Z13_b896_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x000036d0..0x000036ef ( 32 items) : ../Release/0_0_reloadable3.o::_Z13_b901_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x000036f0..0x0000370f ( 32 items) : ../Release/0_0_reloadable3.o::_Z13_b906_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003710..0x0000372f ( 32 items) : ../Release/0_0_reloadable3.o::_Z13_b881_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003730..0x00003753 ( 36 items) : ../Release/0_0_reloadable3.o::_Z13_b891_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+
+ 0x00003760..0x00003783 ( 36 items) : ../Release/0_0_reloadable3.o::_Z13_b919_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003790..0x0000381d ( 142 items) : me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0)
+
+External symbols:
+
+ __dso_handle = 0x0
+ _ctors_end = 0x0
+ _ctors_start = 0x0
+ _dtors_end = 0x0
+ _dtors_start = 0x0
+ _pc_end = 0x381e
+ _pc_start = 0x930
+ _sp_end_DM_stack = 0x7c400
+ _sp_start_DM_stack = 0x7bac0
+
+Section summary for memory 'DM_stack':
+
+ .stack File
+ ---------- ----------
+ 2368
+ ---------- ----------
+ 2368 Total
+
+Section summary for memory 'DMb':
+
+ .bss .data .rodata File
+ ---------- ---------- ---------- ----------
+ 992 4 24 ../Release/0_0_reloadable3.o
+ 5 0 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ ---------- ---------- ---------- ----------
+ 997 4 24 Total
+
+Section summary for memory 'PM':
+
+ .text File
+ ---------- ----------
+ 11612 ../Release/0_0_reloadable3.o
+ 142 me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ ---------- ----------
+ 11754 Total
+
+File summary:
+
+../Release/0_0_reloadable3.o
+ DMb 1020
+ PM 11612
+
+me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ DMb 5
+
+me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ PM 142
+
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.sdr
new file mode 100644
index 0000000000000000000000000000000000000000..531300c36b89212abd7e8ea03e380e975fc5f93c
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.sdr
@@ -0,0 +1,123 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:02 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable3 ../Release/0_0_reloadable3.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable3.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3342 -pme
+
+// Release: ipp V-2024.06-TGT-241219
+
+// Symbols in memory 'DM_bankA':
+// Symbols in memory 'DM_bankAB':
+// Symbols in memory 'DM_bankAC':
+// Symbols in memory 'DM_bankAD':
+// Symbols in memory 'DM_bankB':
+// Symbols in memory 'DM_bankBC':
+// Symbols in memory 'DM_bankBD':
+// Symbols in memory 'DM_bankC':
+// Symbols in memory 'DM_bankCD':
+// Symbols in memory 'DM_bankD':
+// Symbols in memory 'DM_stack':
+// Symbols in memory 'DM_test':
+// Symbols in memory 'DMb':
+_symbol _ZN12me_primitive11control_satE 0x0007c464
+_symbol _ZN12me_primitive11control_rndE 0x0007c468
+_symbol add1d_attribute_broadcasting_params 0x0007c4c0
+_symbol mul1d_attribute_broadcasting_params 0x0007c500
+_symbol mul1d_params 0x0007c540
+_symbol clip1d_params 0x0007c580
+_symbol conv2d_params 0x0007c5c0
+_symbol conv2d_dw_params 0x0007c780
+// Symbols in memory 'DMh':
+// Symbols in memory 'DMh_bankA':
+// Symbols in memory 'DMh_bankAB':
+// Symbols in memory 'DMh_bankAC':
+// Symbols in memory 'DMh_bankAD':
+// Symbols in memory 'DMh_bankB':
+// Symbols in memory 'DMh_bankBC':
+// Symbols in memory 'DMh_bankBD':
+// Symbols in memory 'DMh_bankC':
+// Symbols in memory 'DMh_bankCD':
+// Symbols in memory 'DMh_bankD':
+// Symbols in memory 'DMh_stack':
+// Symbols in memory 'DMs':
+// Symbols in memory 'DMs_bankA':
+// Symbols in memory 'DMs_bankAB':
+// Symbols in memory 'DMs_bankAC':
+// Symbols in memory 'DMs_bankAD':
+// Symbols in memory 'DMs_bankB':
+// Symbols in memory 'DMs_bankBC':
+// Symbols in memory 'DMs_bankBD':
+// Symbols in memory 'DMs_bankC':
+// Symbols in memory 'DMs_bankCD':
+// Symbols in memory 'DMs_bankD':
+// Symbols in memory 'DMs_stack':
+// Symbols in memory 'DMv':
+// Symbols in memory 'DMv_bankA':
+// Symbols in memory 'DMv_bankAB':
+// Symbols in memory 'DMv_bankAC':
+// Symbols in memory 'DMv_bankAD':
+// Symbols in memory 'DMv_bankB':
+// Symbols in memory 'DMv_bankBC':
+// Symbols in memory 'DMv_bankBD':
+// Symbols in memory 'DMv_bankC':
+// Symbols in memory 'DMv_bankCD':
+// Symbols in memory 'DMv_bankD':
+// Symbols in memory 'DMv_stack':
+// Symbols in memory 'DMw':
+// Symbols in memory 'DMw_bankA':
+// Symbols in memory 'DMw_bankAB':
+// Symbols in memory 'DMw_bankAC':
+// Symbols in memory 'DMw_bankAD':
+// Symbols in memory 'DMw_bankB':
+// Symbols in memory 'DMw_bankBC':
+// Symbols in memory 'DMw_bankBD':
+// Symbols in memory 'DMw_bankC':
+// Symbols in memory 'DMw_bankCD':
+// Symbols in memory 'DMw_bankD':
+// Symbols in memory 'DMw_stack':
+// Symbols in memory 'DMx':
+// Symbols in memory 'DMx_bankA':
+// Symbols in memory 'DMx_bankAB':
+// Symbols in memory 'DMx_bankAC':
+// Symbols in memory 'DMx_bankAD':
+// Symbols in memory 'DMx_bankB':
+// Symbols in memory 'DMx_bankBC':
+// Symbols in memory 'DMx_bankBD':
+// Symbols in memory 'DMx_bankC':
+// Symbols in memory 'DMx_bankCD':
+// Symbols in memory 'DMx_bankD':
+// Symbols in memory 'DMx_stack':
+// Symbols in memory 'PM':
+_symbol _Z13kernelWrapperPPvjjjj 0x00000930
+_symbol _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh 0x00000ac0
+_symbol _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams 0x00001060
+_symbol _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params 0x00001170
+_symbol _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001ae0
+_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E 0x00001d20
+_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001d40
+_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E 0x00001df0
+_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001e30
+_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00001e70
+_symbol _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E 0x00001fb0
+_symbol _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002030
+_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv 0x00002220
+_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E 0x00002290
+_symbol _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002390
+_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv 0x00002580
+_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002600
+_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E 0x00002870
+_symbol _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002910
+_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x00002b00
+_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002b20
+_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x00002bb0
+_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x00002ce0
+_symbol _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params 0x000031e0
+_symbol _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x000034c0
+_symbol _Z13_b896_wrapperPPv 0x000036a0
+_symbol _Z13_b901_wrapperPPv 0x000036d0
+_symbol _Z13_b906_wrapperPPv 0x000036f0
+_symbol _Z13_b881_wrapperPPv 0x00003710
+_symbol _Z13_b891_wrapperPPv 0x00003730
+_symbol _Z13_b919_wrapperPPv 0x00003760
+_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x00003790
+// Symbols in memory 'PMw':
+// Symbols in memory 'TM4':
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.srv
new file mode 100644
index 0000000000000000000000000000000000000000..f6eec0e2b8bd493ef0112849914646d03f76489e
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.srv
@@ -0,0 +1,17226 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:03 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable3 me
+
+// Release: ipp V-2024.06-TGT-241219
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function kernelWrapper _Z13kernelWrapperPPvjjjj
+.src_ref 0 "0_0_reloadable3.cc" 82 first
+.src_ref 0 "0_0_reloadable3.cc" 84 60 first
+.src_ref 0 "0_0_reloadable3.cc" 84 110
+.src_ref 0 "0_0_reloadable3.cc" 86 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.function_start
+ 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2353 "01000001" // /* MW 5 */
+ 2354 "00100001" // /* MW 4 */
+ 2355 "11010001" // /* MW 3 */
+ 2356 "11000110" // /* MW 2 */
+ 2357 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 82
+ 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2359 "00000001" // /* MW 5 */
+ 2360 "00000000" // /* MW 4 */
+ 2361 "00000000" // /* MW 3 */
+ 2362 "00001000" // /* MW 2 */
+ 2363 "00000000" // /* MW 1 */
+ 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2365 "01110000" // /* MW 7 */
+ 2366 "11010000" // /* MW 6 */
+ 2367 "00101011" // /* MW 5 */
+ 2368 "00000000" // /* MW 4 */
+ 2369 "10110000" // /* MW 3 */
+ 2370 "11110011" // /* MW 2 */
+ 2371 "11111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 84 110
+.src_ref 0 "0_0_reloadable3.cc" 86 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2373 "01110000" // /* MW 7 */
+ 2374 "10010000" // /* MW 6 */
+ 2375 "11101000" // /* MW 5 */
+ 2376 "00000001" // /* MW 4 */
+ 2377 "10110000" // /* MW 3 */
+ 2378 "10000111" // /* MW 2 */
+ 2379 "11111111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 84 110 first
+ 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2381 "11100000" // /* MW 5 */
+ 2382 "11000001" // /* MW 4 */
+ 2383 "10110111" // /* MW 3 */
+ 2384 "00000110" // /* MW 2 */
+ 2385 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2387 "00100000" // /* MW 3 */
+ 2388 "10011000" // /* MW 2 */
+ 2389 "00011110" // /* MW 1 */
+ 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2391 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2393 "10000010" // /* MW 3 */
+ 2394 "01101000" // /* MW 2 */
+ 2395 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2397 "00110110" // /* MW 3 */
+ 2398 "00011110" // /* MW 2 */
+ 2399 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2401 "01110110" // /* MW 3 */
+ 2402 "00111110" // /* MW 2 */
+ 2403 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2405 "01010110" // /* MW 3 */
+ 2406 "11101110" // /* MW 2 */
+ 2407 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2409 "01110110" // /* MW 3 */
+ 2410 "00000111" // /* MW 2 */
+ 2411 "00000111" // /* MW 1 */
+ 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2413 "00000000" // /* MW 1 */
+ 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2415 "00000000" // /* MW 1 */
+ 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2417 "00000000" // /* MW 1 */
+ 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2419 "00000000" // /* MW 1 */
+ 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2421 "00000000" // /* MW 1 */
+ 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2423 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2425 "00110010" // /* MW 3 */
+ 2426 "01100011" // /* MW 2 */
+ 2427 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2429 "00110001" // /* MW 3 */
+ 2430 "11010110" // /* MW 2 */
+ 2431 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2433 "11111101" // /* MW 3 */
+ 2434 "11100010" // /* MW 2 */
+ 2435 "00010111" // /* MW 1 */
+ 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2437 "00000000" // /* MW 1 */
+ 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2439 "00000000" // /* MW 1 */
+ 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2441 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2443 "00011000" // /* MW 3 */
+ 2444 "10010111" // /* MW 2 */
+ 2445 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 60
+.src_ref 0 "0_0_reloadable3.cc" 90 7
+ 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2447 "00001001" // /* MW 3 */
+ 2448 "00100100" // /* MW 2 */
+ 2449 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 60 first
+ 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2451 "00101101" // /* MW 3 */
+ 2452 "00101001" // /* MW 2 */
+ 2453 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 60
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2455 "00100000" // /* MW 3 */
+ 2456 "10001010" // /* MW 2 */
+ 2457 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 60
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2459 "10001011" // /* MW 5 */
+ 2460 "11011000" // /* MW 4 */
+ 2461 "11011111" // /* MW 3 */
+ 2462 "01001110" // /* MW 2 */
+ 2463 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2465 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2467 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2469 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2471 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 110
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2473 "00000101" // /* MW 3 */
+ 2474 "00100110" // /* MW 2 */
+ 2475 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 86 110
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2477 "11111100" // /* MW 3 */
+ 2478 "11110100" // /* MW 2 */
+ 2479 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2481 "00000000" // /* MW 7 */
+ 2482 "11000001" // /* MW 6 */
+ 2483 "10110100" // /* MW 5 */
+ 2484 "00000011" // /* MW 4 */
+ 2485 "10110000" // /* MW 3 */
+ 2486 "01101010" // /* MW 2 */
+ 2487 "11111110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2489 "01110110" // /* MW 3 */
+ 2490 "00011110" // /* MW 2 */
+ 2491 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2493 "10110110" // /* MW 3 */
+ 2494 "00111110" // /* MW 2 */
+ 2495 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2497 "10010110" // /* MW 3 */
+ 2498 "11101110" // /* MW 2 */
+ 2499 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2501 "01110110" // /* MW 3 */
+ 2502 "00000111" // /* MW 2 */
+ 2503 "00000111" // /* MW 1 */
+ 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2505 "00000000" // /* MW 1 */
+ 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2507 "00000000" // /* MW 1 */
+ 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2509 "00000000" // /* MW 1 */
+ 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2511 "00000000" // /* MW 1 */
+ 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2513 "00000000" // /* MW 1 */
+ 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2515 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2517 "01010010" // /* MW 3 */
+ 2518 "11100111" // /* MW 2 */
+ 2519 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2521 "01110001" // /* MW 3 */
+ 2522 "11010110" // /* MW 2 */
+ 2523 "00001111" // /* MW 1 */
+ 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2525 "00000000" // /* MW 1 */
+ 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2527 "00000000" // /* MW 1 */
+ 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2529 "00000000" // /* MW 1 */
+ 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2531 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2533 "00011000" // /* MW 3 */
+ 2534 "00010111" // /* MW 2 */
+ 2535 "00010101" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 90 7 first
+ 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2537 "00101101" // /* MW 3 */
+ 2538 "00100011" // /* MW 2 */
+ 2539 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 90 7
+ 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2541 "10100000" // /* MW 3 */
+ 2542 "10001000" // /* MW 2 */
+ 2543 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 90 7
+ 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2545 "00000000" // /* MW 5 */
+ 2546 "11001001" // /* MW 4 */
+ 2547 "11001110" // /* MW 3 */
+ 2548 "00000111" // /* MW 2 */
+ 2549 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 90 7
+ 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2551 "00101011" // /* MW 5 */
+ 2552 "11010100" // /* MW 4 */
+ 2553 "11011111" // /* MW 3 */
+ 2554 "00010011" // /* MW 2 */
+ 2555 "11100000" // /* MW 1 */
+ 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2557 "00000000" // /* MW 1 */
+ 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2559 "00000000" // /* MW 1 */
+ 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2561 "00000000" // /* MW 1 */
+ 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2563 "00000000" // /* MW 1 */
+ 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2565 "00000000" // /* MW 1 */
+ 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2567 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 90 4
+.no_stack_arguments
+ 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */
+ 2569 "01000000" // /* MW 3 */
+ 2570 "00110000" // /* MW 2 */
+ 2571 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 93 60
+.src_ref 0 "0_0_reloadable3.cc" 95 60
+.delay_slot
+ 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2573 "11000000" // /* MW 3 */
+ 2574 "01100000" // /* MW 2 */
+ 2575 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2577 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2579 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2581 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2583 "01111110" // /* MW 9 */
+ 2584 "10100101" // /* MW 8 */
+ 2585 "00000001" // /* MW 7 */
+ 2586 "00000000" // /* MW 6 */
+ 2587 "00010000" // /* MW 5 */
+ 2588 "00000000" // /* MW 4 */
+ 2589 "11110000" // /* MW 3 */
+ 2590 "00101100" // /* MW 2 */
+ 2591 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 93 60 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+.src_ref 1 "io_buffer_main.h" 440 8
+.return_address
+ 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2593 "00001010" // /* MW 5 */
+ 2594 "01000000" // /* MW 4 */
+ 2595 "11010000" // /* MW 3 */
+ 2596 "11000110" // /* MW 2 */
+ 2597 "11100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2599 "01010001" // /* MW 3 */
+ 2600 "11101011" // /* MW 2 */
+ 2601 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 95 60
+ 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2603 "01000001" // /* MW 3 */
+ 2604 "11101100" // /* MW 2 */
+ 2605 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2607 "00101001" // /* MW 3 */
+ 2608 "11110000" // /* MW 2 */
+ 2609 "00000111" // /* MW 1 */
+ 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2611 "00000000" // /* MW 1 */
+ 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2613 "00000000" // /* MW 1 */
+ 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+ 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2617 "10001000" // /* MW 3 */
+ 2618 "01101000" // /* MW 2 */
+ 2619 "00011001" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+ 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2621 "00110110" // /* MW 3 */
+ 2622 "00000110" // /* MW 2 */
+ 2623 "00000001" // /* MW 1 */
+ 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2625 "00000000" // /* MW 1 */
+ 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2627 "00000000" // /* MW 1 */
+ 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2629 "00000000" // /* MW 1 */
+ 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2631 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2633 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2635 "00011100" // /* MW 3 */
+ 2636 "10100000" // /* MW 2 */
+ 2637 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2639 "00001000" // /* MW 3 */
+ 2640 "01010101" // /* MW 2 */
+ 2641 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2643 "01000001" // /* MW 5 */
+ 2644 "10101111" // /* MW 4 */
+ 2645 "11011101" // /* MW 3 */
+ 2646 "11000110" // /* MW 2 */
+ 2647 "00111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 95 60 first
+ 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2649 "01010110" // /* MW 3 */
+ 2650 "00000010" // /* MW 2 */
+ 2651 "00000111" // /* MW 1 */
+ 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2653 "00000000" // /* MW 1 */
+ 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2655 "00000000" // /* MW 1 */
+ 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2657 "00000000" // /* MW 1 */
+ 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2659 "00000000" // /* MW 1 */
+ 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2661 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2663 "00010001" // /* MW 3 */
+ 2664 "00100111" // /* MW 2 */
+ 2665 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2667 "00010000" // /* MW 5 */
+ 2668 "11010010" // /* MW 4 */
+ 2669 "01000000" // /* MW 3 */
+ 2670 "01100110" // /* MW 2 */
+ 2671 "10001100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+.src_ref 1 "io_buffer_compiler.h" 606 22 first
+ 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2673 "01100011" // /* MW 5 */
+ 2674 "11101100" // /* MW 4 */
+ 2675 "11010011" // /* MW 3 */
+ 2676 "11000110" // /* MW 2 */
+ 2677 "00000000" // /* MW 1 */
+ 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2679 "00000000" // /* MW 1 */
+ 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2681 "00000000" // /* MW 1 */
+ 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2683 "00000000" // /* MW 1 */
+ 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2685 "00000000" // /* MW 1 */
+ 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2687 "00000000" // /* MW 1 */
+ 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2689 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+ 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2691 "00001000" // /* MW 3 */
+ 2692 "01010101" // /* MW 2 */
+ 2693 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 98
+ 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2695 "00111001" // /* MW 3 */
+ 2696 "11111100" // /* MW 2 */
+ 2697 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2699 "00110110" // /* MW 3 */
+ 2700 "11110110" // /* MW 2 */
+ 2701 "00000000" // /* MW 1 */
+ 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2703 "10011001" // /* MW 3 */
+ 2704 "11110111" // /* MW 2 */
+ 2705 "00000111" // /* MW 1 */
+ 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2707 "11110001" // /* MW 3 */
+ 2708 "11111001" // /* MW 2 */
+ 2709 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 98 first
+ 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2711 "00000001" // /* MW 5 */
+ 2712 "00000000" // /* MW 4 */
+ 2713 "00000000" // /* MW 3 */
+ 2714 "11111000" // /* MW 2 */
+ 2715 "11111111" // /* MW 1 */
+ 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2717 "00000000" // /* MW 1 */
+ 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2719 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 98
+ 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 2721 "00000000" // /* MW 3 */
+ 2722 "00101000" // /* MW 2 */
+ 2723 "00010000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2725 "00011100" // /* MW 3 */
+ 2726 "11100000" // /* MW 2 */
+ 2727 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+.delay_slot
+ 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2729 "00010001" // /* MW 3 */
+ 2730 "00100001" // /* MW 2 */
+ 2731 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2733 "00000010" // /* MW 3 */
+ 2734 "01100001" // /* MW 2 */
+ 2735 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 22
+.delay_slot
+ 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2737 "00010001" // /* MW 3 */
+ 2738 "11110110" // /* MW 2 */
+ 2739 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+ 2741 "00000000" // /* MW 1 */
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 432 first
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.function_start
+ 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2753 "01111000" // /* MW 9 */
+ 2754 "01100000" // /* MW 8 */
+ 2755 "01001001" // /* MW 7 */
+ 2756 "10001000" // /* MW 6 */
+ 2757 "01000000" // /* MW 5 */
+ 2758 "00000000" // /* MW 4 */
+ 2759 "11010000" // /* MW 3 */
+ 2760 "10000101" // /* MW 2 */
+ 2761 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2763 "01001000" // /* MW 9 */
+ 2764 "10000010" // /* MW 8 */
+ 2765 "00110000" // /* MW 7 */
+ 2766 "11101001" // /* MW 6 */
+ 2767 "01010111" // /* MW 5 */
+ 2768 "00111110" // /* MW 4 */
+ 2769 "11010000" // /* MW 3 */
+ 2770 "10000001" // /* MW 2 */
+ 2771 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 432
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+ 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2773 "01110000" // /* MW 9 */
+ 2774 "00000000" // /* MW 8 */
+ 2775 "00000000" // /* MW 7 */
+ 2776 "00000000" // /* MW 6 */
+ 2777 "00000010" // /* MW 5 */
+ 2778 "00000000" // /* MW 4 */
+ 2779 "00000000" // /* MW 3 */
+ 2780 "10000001" // /* MW 2 */
+ 2781 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+ 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2783 "01011000" // /* MW 11 */
+ 2784 "00010000" // /* MW 10 */
+ 2785 "00000000" // /* MW 9 */
+ 2786 "00101000" // /* MW 8 */
+ 2787 "00000000" // /* MW 7 */
+ 2788 "10000001" // /* MW 6 */
+ 2789 "10110101" // /* MW 5 */
+ 2790 "11111101" // /* MW 4 */
+ 2791 "00000111" // /* MW 3 */
+ 2792 "10000110" // /* MW 2 */
+ 2793 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2795 "01011000" // /* MW 11 */
+ 2796 "00001111" // /* MW 10 */
+ 2797 "10001000" // /* MW 9 */
+ 2798 "10101010" // /* MW 8 */
+ 2799 "01010111" // /* MW 7 */
+ 2800 "10111111" // /* MW 6 */
+ 2801 "11010101" // /* MW 5 */
+ 2802 "11111001" // /* MW 4 */
+ 2803 "00000111" // /* MW 3 */
+ 2804 "01100011" // /* MW 2 */
+ 2805 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2807 "00000010" // /* MW 5 */
+ 2808 "01100000" // /* MW 4 */
+ 2809 "10110000" // /* MW 3 */
+ 2810 "10111110" // /* MW 2 */
+ 2811 "11111110" // /* MW 1 */
+ 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2815 "00101001" // /* MW 3 */
+ 2816 "00011100" // /* MW 2 */
+ 2817 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2819 "00001001" // /* MW 3 */
+ 2820 "00011100" // /* MW 2 */
+ 2821 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2823 "00101110" // /* MW 3 */
+ 2824 "00011100" // /* MW 2 */
+ 2825 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2827 "00001110" // /* MW 3 */
+ 2828 "00011100" // /* MW 2 */
+ 2829 "00000000" // /* MW 1 */
+ 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2831 "00000000" // /* MW 1 */
+ 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2833 "00000000" // /* MW 1 */
+ 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2835 "00000000" // /* MW 1 */
+ 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2837 "00000000" // /* MW 1 */
+ 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2839 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2841 "00101001" // /* MW 3 */
+ 2842 "00011100" // /* MW 2 */
+ 2843 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2845 "00001001" // /* MW 3 */
+ 2846 "00011100" // /* MW 2 */
+ 2847 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2849 "00101110" // /* MW 3 */
+ 2850 "00011100" // /* MW 2 */
+ 2851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2853 "00001110" // /* MW 3 */
+ 2854 "00011100" // /* MW 2 */
+ 2855 "00000000" // /* MW 1 */
+ 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2857 "00000000" // /* MW 1 */
+ 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2859 "00000000" // /* MW 1 */
+ 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2861 "00000000" // /* MW 1 */
+ 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2863 "00000000" // /* MW 1 */
+ 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2865 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2867 "00101001" // /* MW 3 */
+ 2868 "00011100" // /* MW 2 */
+ 2869 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2871 "00001001" // /* MW 3 */
+ 2872 "00011100" // /* MW 2 */
+ 2873 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2875 "00001110" // /* MW 3 */
+ 2876 "00000100" // /* MW 2 */
+ 2877 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2879 "00101110" // /* MW 3 */
+ 2880 "00010100" // /* MW 2 */
+ 2881 "00000000" // /* MW 1 */
+ 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2883 "00000000" // /* MW 1 */
+ 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2885 "00000000" // /* MW 1 */
+ 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2887 "00000000" // /* MW 1 */
+ 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2889 "00000000" // /* MW 1 */
+ 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2891 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2893 "00001001" // /* MW 3 */
+ 2894 "00000100" // /* MW 2 */
+ 2895 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2897 "00101001" // /* MW 3 */
+ 2898 "00010100" // /* MW 2 */
+ 2899 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 40 first
+ 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2901 "10101010" // /* MW 3 */
+ 2902 "11011101" // /* MW 2 */
+ 2903 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 447 34 first
+ 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2905 "00101010" // /* MW 3 */
+ 2906 "00011110" // /* MW 2 */
+ 2907 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 448 34 first
+ 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2909 "11001010" // /* MW 3 */
+ 2910 "10111101" // /* MW 2 */
+ 2911 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2913 "11111010" // /* MW 3 */
+ 2914 "11111101" // /* MW 2 */
+ 2915 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+ 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2917 "01101010" // /* MW 3 */
+ 2918 "00001010" // /* MW 2 */
+ 2919 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 20 first
+ 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2921 "11101010" // /* MW 3 */
+ 2922 "10101100" // /* MW 2 */
+ 2923 "00000010" // /* MW 1 */
+ 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2925 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+ 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2927 "00011101" // /* MW 3 */
+ 2928 "01000010" // /* MW 2 */
+ 2929 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+ 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2931 "00000001" // /* MW 5 */
+ 2932 "00110001" // /* MW 4 */
+ 2933 "11111001" // /* MW 3 */
+ 2934 "00100000" // /* MW 2 */
+ 2935 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2937 "01011101" // /* MW 3 */
+ 2938 "10100100" // /* MW 2 */
+ 2939 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2941 "01000111" // /* MW 3 */
+ 2942 "11110110" // /* MW 2 */
+ 2943 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2945 "00111001" // /* MW 5 */
+ 2946 "10110111" // /* MW 4 */
+ 2947 "01000000" // /* MW 3 */
+ 2948 "01001010" // /* MW 2 */
+ 2949 "11000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2951 "00100010" // /* MW 3 */
+ 2952 "01111011" // /* MW 2 */
+ 2953 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+ 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2955 "01100111" // /* MW 3 */
+ 2956 "11001100" // /* MW 2 */
+ 2957 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+ 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2959 "00000100" // /* MW 3 */
+ 2960 "10110111" // /* MW 2 */
+ 2961 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+ 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2963 "01000001" // /* MW 5 */
+ 2964 "10111011" // /* MW 4 */
+ 2965 "10111100" // /* MW 3 */
+ 2966 "11101011" // /* MW 2 */
+ 2967 "01111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+ 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2969 "00000100" // /* MW 5 */
+ 2970 "10011011" // /* MW 4 */
+ 2971 "10110011" // /* MW 3 */
+ 2972 "10111110" // /* MW 2 */
+ 2973 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+ 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 2975 "00000001" // /* MW 5 */
+ 2976 "01000000" // /* MW 4 */
+ 2977 "11111000" // /* MW 3 */
+ 2978 "00000101" // /* MW 2 */
+ 2979 "11001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+.delay_slot
+ 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2981 "01000111" // /* MW 3 */
+ 2982 "10110110" // /* MW 2 */
+ 2983 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+.delay_slot
+ 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2985 "01000100" // /* MW 3 */
+ 2986 "01110001" // /* MW 2 */
+ 2987 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.delay_slot
+ 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2989 "01011101" // /* MW 3 */
+ 2990 "11111100" // /* MW 2 */
+ 2991 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11 first
+.delay_slot
+ 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2993 "01001101" // /* MW 3 */
+ 2994 "11101000" // /* MW 2 */
+ 2995 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.delay_slot
+ 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2997 "00110010" // /* MW 3 */
+ 2998 "10001100" // /* MW 2 */
+ 2999 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+ 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 3001 "00000001" // /* MW 5 */
+ 3002 "01000000" // /* MW 4 */
+ 3003 "11111000" // /* MW 3 */
+ 3004 "00000101" // /* MW 2 */
+ 3005 "11011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3007 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3013 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3015 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */
+ 3017 "00100000" // /* MW 9 */
+ 3018 "00000000" // /* MW 8 */
+ 3019 "00000000" // /* MW 7 */
+ 3020 "10000100" // /* MW 6 */
+ 3021 "00000001" // /* MW 5 */
+ 3022 "00000000" // /* MW 4 */
+ 3023 "00000000" // /* MW 3 */
+ 3024 "00101111" // /* MW 2 */
+ 3025 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3027 "01011000" // /* MW 9 */
+ 3028 "00001100" // /* MW 8 */
+ 3029 "10001000" // /* MW 7 */
+ 3030 "10101011" // /* MW 6 */
+ 3031 "01010111" // /* MW 5 */
+ 3032 "00111110" // /* MW 4 */
+ 3033 "00000000" // /* MW 3 */
+ 3034 "00011010" // /* MW 2 */
+ 3035 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3037 "01000001" // /* MW 5 */
+ 3038 "00100000" // /* MW 4 */
+ 3039 "00100001" // /* MW 3 */
+ 3040 "01000010" // /* MW 2 */
+ 3041 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.delay_slot
+ 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3043 "00001101" // /* MW 3 */
+ 3044 "00011010" // /* MW 2 */
+ 3045 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.delay_slot
+ 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3047 "00111101" // /* MW 3 */
+ 3048 "00001110" // /* MW 2 */
+ 3049 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3051 "11100010" // /* MW 5 */
+ 3052 "10010001" // /* MW 4 */
+ 3053 "11111111" // /* MW 3 */
+ 3054 "00101100" // /* MW 2 */
+ 3055 "00000000" // /* MW 1 */
+.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3057 "01011000" // /* MW 11 */
+ 3058 "11111100" // /* MW 10 */
+ 3059 "10001111" // /* MW 9 */
+ 3060 "10001000" // /* MW 8 */
+ 3061 "01010000" // /* MW 7 */
+ 3062 "00000001" // /* MW 6 */
+ 3063 "00001011" // /* MW 5 */
+ 3064 "10000010" // /* MW 4 */
+ 3065 "10000001" // /* MW 3 */
+ 3066 "00000010" // /* MW 2 */
+ 3067 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3069 "01011000" // /* MW 9 */
+ 3070 "00001100" // /* MW 8 */
+ 3071 "10001000" // /* MW 7 */
+ 3072 "00001011" // /* MW 6 */
+ 3073 "10100000" // /* MW 5 */
+ 3074 "00000001" // /* MW 4 */
+ 3075 "11100000" // /* MW 3 */
+ 3076 "00011000" // /* MW 2 */
+ 3077 "00100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3079 "01011000" // /* MW 9 */
+ 3080 "00000001" // /* MW 8 */
+ 3081 "11101000" // /* MW 7 */
+ 3082 "10101001" // /* MW 6 */
+ 3083 "01010111" // /* MW 5 */
+ 3084 "00111110" // /* MW 4 */
+ 3085 "00000000" // /* MW 3 */
+ 3086 "00000010" // /* MW 2 */
+ 3087 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+ 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 3089 "00000000" // /* MW 15 */
+ 3090 "00000000" // /* MW 14 */
+ 3091 "01011000" // /* MW 13 */
+ 3092 "00000011" // /* MW 12 */
+ 3093 "10101000" // /* MW 11 */
+ 3094 "11101001" // /* MW 10 */
+ 3095 "01110001" // /* MW 9 */
+ 3096 "00000000" // /* MW 8 */
+ 3097 "01011011" // /* MW 7 */
+ 3098 "00000001" // /* MW 6 */
+ 3099 "00100000" // /* MW 5 */
+ 3100 "00000000" // /* MW 4 */
+ 3101 "11110000" // /* MW 3 */
+ 3102 "00101100" // /* MW 2 */
+ 3103 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.src_ref 2 "conv2d_bf16_params.h" 495 68 first
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+ 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3105 "01011000" // /* MW 9 */
+ 3106 "00111100" // /* MW 8 */
+ 3107 "00000000" // /* MW 7 */
+ 3108 "00111100" // /* MW 6 */
+ 3109 "10110011" // /* MW 5 */
+ 3110 "00011011" // /* MW 4 */
+ 3111 "01010000" // /* MW 3 */
+ 3112 "11000101" // /* MW 2 */
+ 3113 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24 first
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+ 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3115 "01011000" // /* MW 9 */
+ 3116 "11001101" // /* MW 8 */
+ 3117 "10000111" // /* MW 7 */
+ 3118 "00010010" // /* MW 6 */
+ 3119 "00101101" // /* MW 5 */
+ 3120 "00000011" // /* MW 4 */
+ 3121 "01010000" // /* MW 3 */
+ 3122 "00000101" // /* MW 2 */
+ 3123 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18 first
+.src_ref 2 "conv2d_bf16_params.h" 496 68
+.src_ref 2 "conv2d_bf16_params.h" 504 35
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 578 47
+ 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3125 "01011000" // /* MW 9 */
+ 3126 "00110111" // /* MW 8 */
+ 3127 "10000000" // /* MW 7 */
+ 3128 "10010001" // /* MW 6 */
+ 3129 "11011010" // /* MW 5 */
+ 3130 "00111011" // /* MW 4 */
+ 3131 "00000000" // /* MW 3 */
+ 3132 "01010111" // /* MW 2 */
+ 3133 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.src_ref 2 "conv2d_bf16_params.h" 504 45 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+.src_ref 2 "conv2d_bf16_params.h" 519 42
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+ 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3135 "01011000" // /* MW 9 */
+ 3136 "10111100" // /* MW 8 */
+ 3137 "00000111" // /* MW 7 */
+ 3138 "00111101" // /* MW 6 */
+ 3139 "10110000" // /* MW 5 */
+ 3140 "00101011" // /* MW 4 */
+ 3141 "00000000" // /* MW 3 */
+ 3142 "00000011" // /* MW 2 */
+ 3143 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 492 25 first
+.src_ref 2 "conv2d_bf16_params.h" 497 46
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+ 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3145 "01011000" // /* MW 9 */
+ 3146 "01110000" // /* MW 8 */
+ 3147 "10000000" // /* MW 7 */
+ 3148 "01101100" // /* MW 6 */
+ 3149 "01101100" // /* MW 5 */
+ 3150 "00011111" // /* MW 4 */
+ 3151 "00000000" // /* MW 3 */
+ 3152 "00010000" // /* MW 2 */
+ 3153 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 520 34 first
+ 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3155 "01011101" // /* MW 5 */
+ 3156 "00011110" // /* MW 4 */
+ 3157 "00001000" // /* MW 3 */
+ 3158 "10010010" // /* MW 2 */
+ 3159 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+ 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3161 "01011001" // /* MW 9 */
+ 3162 "00110001" // /* MW 8 */
+ 3163 "10000000" // /* MW 7 */
+ 3164 "01101111" // /* MW 6 */
+ 3165 "01100001" // /* MW 5 */
+ 3166 "00101101" // /* MW 4 */
+ 3167 "10110000" // /* MW 3 */
+ 3168 "01011010" // /* MW 2 */
+ 3169 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+.src_ref 2 "conv2d_bf16_params.h" 507 42 first
+ 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3171 "00000101" // /* MW 5 */
+ 3172 "00011111" // /* MW 4 */
+ 3173 "00111100" // /* MW 3 */
+ 3174 "10111010" // /* MW 2 */
+ 3175 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 99 first
+ 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3177 "00010001" // /* MW 3 */
+ 3178 "11000010" // /* MW 2 */
+ 3179 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 610 64
+.src_ref 2 "conv2d_bf16_params.h" 709 96
+ 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3181 "00011101" // /* MW 5 */
+ 3182 "10100000" // /* MW 4 */
+ 3183 "11110000" // /* MW 3 */
+ 3184 "11000011" // /* MW 2 */
+ 3185 "10001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+ 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3187 "00100001" // /* MW 3 */
+ 3188 "10100011" // /* MW 2 */
+ 3189 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96 first
+ 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3191 "00011101" // /* MW 3 */
+ 3192 "11111110" // /* MW 2 */
+ 3193 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 506 48
+.src_ref 2 "conv2d_bf16_params.h" 519 42 first
+ 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3195 "01011001" // /* MW 9 */
+ 3196 "01010111" // /* MW 8 */
+ 3197 "10000000" // /* MW 7 */
+ 3198 "11101110" // /* MW 6 */
+ 3199 "11110001" // /* MW 5 */
+ 3200 "00111011" // /* MW 4 */
+ 3201 "00110000" // /* MW 3 */
+ 3202 "01111110" // /* MW 2 */
+ 3203 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 68 first
+.src_ref 2 "conv2d_bf16_params.h" 504 35 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68
+ 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3205 "01011000" // /* MW 9 */
+ 3206 "10110010" // /* MW 8 */
+ 3207 "10000111" // /* MW 7 */
+ 3208 "00111101" // /* MW 6 */
+ 3209 "00110000" // /* MW 5 */
+ 3210 "00101111" // /* MW 4 */
+ 3211 "01010000" // /* MW 3 */
+ 3212 "01010101" // /* MW 2 */
+ 3213 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3215 "01111011" // /* MW 5 */
+ 3216 "11001100" // /* MW 4 */
+ 3217 "10111001" // /* MW 3 */
+ 3218 "01001110" // /* MW 2 */
+ 3219 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53 first
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3221 "01011000" // /* MW 9 */
+ 3222 "11110110" // /* MW 8 */
+ 3223 "00000000" // /* MW 7 */
+ 3224 "00101101" // /* MW 6 */
+ 3225 "01101011" // /* MW 5 */
+ 3226 "00111111" // /* MW 4 */
+ 3227 "11100000" // /* MW 3 */
+ 3228 "01010100" // /* MW 2 */
+ 3229 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 46 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3231 "01011000" // /* MW 9 */
+ 3232 "01010000" // /* MW 8 */
+ 3233 "10000111" // /* MW 7 */
+ 3234 "00010000" // /* MW 6 */
+ 3235 "00111000" // /* MW 5 */
+ 3236 "00100111" // /* MW 4 */
+ 3237 "01010000" // /* MW 3 */
+ 3238 "01000011" // /* MW 2 */
+ 3239 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3241 "01100111" // /* MW 3 */
+ 3242 "11111110" // /* MW 2 */
+ 3243 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3245 "01100111" // /* MW 3 */
+ 3246 "11100000" // /* MW 2 */
+ 3247 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3249 "00000101" // /* MW 3 */
+ 3250 "11110111" // /* MW 2 */
+ 3251 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3253 "01010100" // /* MW 3 */
+ 3254 "11101011" // /* MW 2 */
+ 3255 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3257 "01100001" // /* MW 5 */
+ 3258 "10100000" // /* MW 4 */
+ 3259 "11011000" // /* MW 3 */
+ 3260 "10100011" // /* MW 2 */
+ 3261 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25 first
+.src_ref 2 "conv2d_bf16_params.h" 507 34
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3263 "01001001" // /* MW 9 */
+ 3264 "10000000" // /* MW 8 */
+ 3265 "11001111" // /* MW 7 */
+ 3266 "01101111" // /* MW 6 */
+ 3267 "00101001" // /* MW 5 */
+ 3268 "00011111" // /* MW 4 */
+ 3269 "10110000" // /* MW 3 */
+ 3270 "01000010" // /* MW 2 */
+ 3271 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47 first
+ 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3273 "00111011" // /* MW 5 */
+ 3274 "01000110" // /* MW 4 */
+ 3275 "00111111" // /* MW 3 */
+ 3276 "11101010" // /* MW 2 */
+ 3277 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3279 "01010000" // /* MW 7 */
+ 3280 "10101000" // /* MW 6 */
+ 3281 "00000000" // /* MW 5 */
+ 3282 "00000010" // /* MW 4 */
+ 3283 "00110000" // /* MW 3 */
+ 3284 "01101010" // /* MW 2 */
+ 3285 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77 first
+.src_ref 2 "conv2d_bf16_params.h" 509 19 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3287 "01111000" // /* MW 11 */
+ 3288 "11001110" // /* MW 10 */
+ 3289 "00001101" // /* MW 9 */
+ 3290 "00101100" // /* MW 8 */
+ 3291 "10110000" // /* MW 7 */
+ 3292 "10100111" // /* MW 6 */
+ 3293 "11110101" // /* MW 5 */
+ 3294 "11100111" // /* MW 4 */
+ 3295 "01010111" // /* MW 3 */
+ 3296 "01001001" // /* MW 2 */
+ 3297 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 19 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3299 "00010101" // /* MW 3 */
+ 3300 "11100011" // /* MW 2 */
+ 3301 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3303 "10000001" // /* MW 3 */
+ 3304 "10110111" // /* MW 2 */
+ 3305 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 47 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3307 "10010000" // /* MW 3 */
+ 3308 "10110000" // /* MW 2 */
+ 3309 "00010100" // /* MW 1 */
+ 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3311 "00000000" // /* MW 1 */
+ 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3313 "00000000" // /* MW 1 */
+ 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3315 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 57 first
+ 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3317 "00100001" // /* MW 3 */
+ 3318 "11100101" // /* MW 2 */
+ 3319 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+ 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3321 "01010001" // /* MW 3 */
+ 3322 "11001010" // /* MW 2 */
+ 3323 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 48 first
+ 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3325 "01001010" // /* MW 3 */
+ 3326 "10101010" // /* MW 2 */
+ 3327 "00000010" // /* MW 1 */
+ 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3329 "00000000" // /* MW 1 */
+ 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3331 "00000000" // /* MW 1 */
+ 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3333 "00000000" // /* MW 1 */
+ 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3335 "00000000" // /* MW 1 */
+ 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3337 "00000000" // /* MW 1 */
+ 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3339 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 62
+ 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3341 "11100001" // /* MW 3 */
+ 3342 "10100100" // /* MW 2 */
+ 3343 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+ 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3345 "10111110" // /* MW 3 */
+ 3346 "10100101" // /* MW 2 */
+ 3347 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45 first
+ 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3349 "00101101" // /* MW 3 */
+ 3350 "10100100" // /* MW 2 */
+ 3351 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3353 "00000000" // /* MW 5 */
+ 3354 "10100000" // /* MW 4 */
+ 3355 "00001101" // /* MW 3 */
+ 3356 "00000001" // /* MW 2 */
+ 3357 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3359 "00100000" // /* MW 3 */
+ 3360 "11100101" // /* MW 2 */
+ 3361 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3363 "00000000" // /* MW 5 */
+ 3364 "10100000" // /* MW 4 */
+ 3365 "00001101" // /* MW 3 */
+ 3366 "11111111" // /* MW 2 */
+ 3367 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 642 99
+ 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3369 "11000001" // /* MW 5 */
+ 3370 "00111111" // /* MW 4 */
+ 3371 "10011001" // /* MW 3 */
+ 3372 "11100100" // /* MW 2 */
+ 3373 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 19 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3375 "11100001" // /* MW 5 */
+ 3376 "10111111" // /* MW 4 */
+ 3377 "10111000" // /* MW 3 */
+ 3378 "11100010" // /* MW 2 */
+ 3379 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 512 64 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122 first
+ 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3381 "00111011" // /* MW 5 */
+ 3382 "11001110" // /* MW 4 */
+ 3383 "00111001" // /* MW 3 */
+ 3384 "11101110" // /* MW 2 */
+ 3385 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3387 "00110001" // /* MW 3 */
+ 3388 "10110101" // /* MW 2 */
+ 3389 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3391 "10101101" // /* MW 3 */
+ 3392 "00101001" // /* MW 2 */
+ 3393 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+ 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3395 "01100101" // /* MW 3 */
+ 3396 "10110101" // /* MW 2 */
+ 3397 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 36 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68 first
+ 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3399 "00100000" // /* MW 5 */
+ 3400 "01101001" // /* MW 4 */
+ 3401 "00111111" // /* MW 3 */
+ 3402 "01101010" // /* MW 2 */
+ 3403 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 65 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62 first
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3405 "10101000" // /* MW 9 */
+ 3406 "10101000" // /* MW 8 */
+ 3407 "11001110" // /* MW 7 */
+ 3408 "01101111" // /* MW 6 */
+ 3409 "01001001" // /* MW 5 */
+ 3410 "00110111" // /* MW 4 */
+ 3411 "01010000" // /* MW 3 */
+ 3412 "01100101" // /* MW 2 */
+ 3413 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3415 "11111001" // /* MW 5 */
+ 3416 "10100011" // /* MW 4 */
+ 3417 "10111000" // /* MW 3 */
+ 3418 "10100011" // /* MW 2 */
+ 3419 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 45 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3421 "00011111" // /* MW 5 */
+ 3422 "01101011" // /* MW 4 */
+ 3423 "11101101" // /* MW 3 */
+ 3424 "01100100" // /* MW 2 */
+ 3425 "01000101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3427 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3429 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3431 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3433 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 48 first
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3435 "11001010" // /* MW 5 */
+ 3436 "10110101" // /* MW 4 */
+ 3437 "10111101" // /* MW 3 */
+ 3438 "01011111" // /* MW 2 */
+ 3439 "10000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3441 "00000001" // /* MW 5 */
+ 3442 "01000000" // /* MW 4 */
+ 3443 "11111000" // /* MW 3 */
+ 3444 "00000110" // /* MW 2 */
+ 3445 "11111000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 76 first
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3447 "11110010" // /* MW 5 */
+ 3448 "10111011" // /* MW 4 */
+ 3449 "11101101" // /* MW 3 */
+ 3450 "01000001" // /* MW 2 */
+ 3451 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3453 "01011101" // /* MW 3 */
+ 3454 "11101011" // /* MW 2 */
+ 3455 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93 first
+.delay_slot
+ 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3457 "00010100" // /* MW 3 */
+ 3458 "01100011" // /* MW 2 */
+ 3459 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.src_ref 2 "conv2d_bf16_params.h" 539 139 first
+.src_ref 2 "conv2d_bf16_params.h" 555 59
+.src_ref 2 "conv2d_bf16_params.h" 559 59
+.src_ref 2 "conv2d_bf16_params.h" 700 17
+.delay_slot
+ 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3461 "01011001" // /* MW 9 */
+ 3462 "00000001" // /* MW 8 */
+ 3463 "00101000" // /* MW 7 */
+ 3464 "00111110" // /* MW 6 */
+ 3465 "10111110" // /* MW 5 */
+ 3466 "00001101" // /* MW 4 */
+ 3467 "00110000" // /* MW 3 */
+ 3468 "01000110" // /* MW 2 */
+ 3469 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3471 "10011100" // /* MW 3 */
+ 3472 "10011011" // /* MW 2 */
+ 3473 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3475 "10010001" // /* MW 3 */
+ 3476 "11100011" // /* MW 2 */
+ 3477 "00000111" // /* MW 1 */
+ 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3479 "00000000" // /* MW 1 */
+ 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3481 "00000000" // /* MW 1 */
+ 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3483 "00000000" // /* MW 1 */
+ 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3485 "00000000" // /* MW 1 */
+ 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3487 "00000000" // /* MW 1 */
+ 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3491 "00000001" // /* MW 5 */
+ 3492 "01000000" // /* MW 4 */
+ 3493 "11111000" // /* MW 3 */
+ 3494 "00000110" // /* MW 2 */
+ 3495 "11100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3501 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3503 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3505 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3507 "01011000" // /* MW 9 */
+ 3508 "01000000" // /* MW 8 */
+ 3509 "00101000" // /* MW 7 */
+ 3510 "10001011" // /* MW 6 */
+ 3511 "00010000" // /* MW 5 */
+ 3512 "00000001" // /* MW 4 */
+ 3513 "00000000" // /* MW 3 */
+ 3514 "10111100" // /* MW 2 */
+ 3515 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3517 "11010010" // /* MW 3 */
+ 3518 "01111110" // /* MW 2 */
+ 3519 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3521 "01100111" // /* MW 3 */
+ 3522 "01110110" // /* MW 2 */
+ 3523 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3525 "00000001" // /* MW 5 */
+ 3526 "10100000" // /* MW 4 */
+ 3527 "01001111" // /* MW 3 */
+ 3528 "00111000" // /* MW 2 */
+ 3529 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 46
+ 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3531 "01010000" // /* MW 3 */
+ 3532 "00110010" // /* MW 2 */
+ 3533 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 44
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3535 "11101111" // /* MW 3 */
+ 3536 "01111101" // /* MW 2 */
+ 3537 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3539 "00111001" // /* MW 5 */
+ 3540 "11000100" // /* MW 4 */
+ 3541 "01011101" // /* MW 3 */
+ 3542 "11100011" // /* MW 2 */
+ 3543 "11001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3545 "10000010" // /* MW 3 */
+ 3546 "11100011" // /* MW 2 */
+ 3547 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 79
+ 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3549 "11101111" // /* MW 3 */
+ 3550 "01100011" // /* MW 2 */
+ 3551 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3553 "11000001" // /* MW 3 */
+ 3554 "11111001" // /* MW 2 */
+ 3555 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3557 "11001110" // /* MW 3 */
+ 3558 "01100011" // /* MW 2 */
+ 3559 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 55 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3561 "00011100" // /* MW 7 */
+ 3562 "00000000" // /* MW 6 */
+ 3563 "00000000" // /* MW 5 */
+ 3564 "10000001" // /* MW 4 */
+ 3565 "00010100" // /* MW 3 */
+ 3566 "00100011" // /* MW 2 */
+ 3567 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3569 "01111000" // /* MW 9 */
+ 3570 "00001110" // /* MW 8 */
+ 3571 "01110000" // /* MW 7 */
+ 3572 "11101011" // /* MW 6 */
+ 3573 "11000111" // /* MW 5 */
+ 3574 "00111111" // /* MW 4 */
+ 3575 "00000000" // /* MW 3 */
+ 3576 "00011001" // /* MW 2 */
+ 3577 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63 first
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3579 "11000010" // /* MW 3 */
+ 3580 "01111111" // /* MW 2 */
+ 3581 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 34 first
+.src_ref 2 "conv2d_bf16_params.h" 641 32 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3583 "10101000" // /* MW 9 */
+ 3584 "01110100" // /* MW 8 */
+ 3585 "01001111" // /* MW 7 */
+ 3586 "10000011" // /* MW 6 */
+ 3587 "00000100" // /* MW 5 */
+ 3588 "00100001" // /* MW 4 */
+ 3589 "00100000" // /* MW 3 */
+ 3590 "01101110" // /* MW 2 */
+ 3591 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 61 first
+.src_ref 2 "conv2d_bf16_params.h" 640 16
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3593 "01011000" // /* MW 9 */
+ 3594 "00001001" // /* MW 8 */
+ 3595 "10101000" // /* MW 7 */
+ 3596 "10000011" // /* MW 6 */
+ 3597 "01000100" // /* MW 5 */
+ 3598 "00101001" // /* MW 4 */
+ 3599 "00000000" // /* MW 3 */
+ 3600 "00011110" // /* MW 2 */
+ 3601 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3603 "11100010" // /* MW 3 */
+ 3604 "01110011" // /* MW 2 */
+ 3605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 47 first
+ 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3607 "10001000" // /* MW 3 */
+ 3608 "11111001" // /* MW 2 */
+ 3609 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 640 16 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3611 "00111101" // /* MW 3 */
+ 3612 "01111011" // /* MW 2 */
+ 3613 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3615 "00010000" // /* MW 9 */
+ 3616 "00000100" // /* MW 8 */
+ 3617 "00001010" // /* MW 7 */
+ 3618 "00000011" // /* MW 6 */
+ 3619 "00000000" // /* MW 5 */
+ 3620 "00000000" // /* MW 4 */
+ 3621 "00100000" // /* MW 3 */
+ 3622 "11011110" // /* MW 2 */
+ 3623 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 641 44 first
+.src_ref 2 "conv2d_bf16_params.h" 642 45 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3625 "11111111" // /* MW 5 */
+ 3626 "00111010" // /* MW 4 */
+ 3627 "10111111" // /* MW 3 */
+ 3628 "11100111" // /* MW 2 */
+ 3629 "11001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3631 "11100110" // /* MW 3 */
+ 3632 "11001111" // /* MW 2 */
+ 3633 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 55 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3635 "00101001" // /* MW 5 */
+ 3636 "10101000" // /* MW 4 */
+ 3637 "00001011" // /* MW 3 */
+ 3638 "11010010" // /* MW 2 */
+ 3639 "10110100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3641 "00000001" // /* MW 5 */
+ 3642 "00100001" // /* MW 4 */
+ 3643 "01001101" // /* MW 3 */
+ 3644 "10110000" // /* MW 2 */
+ 3645 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3647 "00111001" // /* MW 5 */
+ 3648 "11000010" // /* MW 4 */
+ 3649 "00011101" // /* MW 3 */
+ 3650 "10110101" // /* MW 2 */
+ 3651 "00110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 99 first
+ 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3653 "00100100" // /* MW 3 */
+ 3654 "11001111" // /* MW 2 */
+ 3655 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3657 "01000001" // /* MW 5 */
+ 3658 "10100110" // /* MW 4 */
+ 3659 "01001101" // /* MW 3 */
+ 3660 "11011110" // /* MW 2 */
+ 3661 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3663 "01111101" // /* MW 5 */
+ 3664 "00100000" // /* MW 4 */
+ 3665 "01001001" // /* MW 3 */
+ 3666 "00001000" // /* MW 2 */
+ 3667 "00101001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119 first
+ 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3669 "00100100" // /* MW 3 */
+ 3670 "11101111" // /* MW 2 */
+ 3671 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 15 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3673 "01011000" // /* MW 9 */
+ 3674 "01110000" // /* MW 8 */
+ 3675 "01001111" // /* MW 7 */
+ 3676 "01101110" // /* MW 6 */
+ 3677 "01000010" // /* MW 5 */
+ 3678 "00100000" // /* MW 4 */
+ 3679 "00000000" // /* MW 3 */
+ 3680 "00011110" // /* MW 2 */
+ 3681 "11011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3683 "00100010" // /* MW 3 */
+ 3684 "10111101" // /* MW 2 */
+ 3685 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 85 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3687 "01011000" // /* MW 9 */
+ 3688 "00100000" // /* MW 8 */
+ 3689 "00001001" // /* MW 7 */
+ 3690 "11111110" // /* MW 6 */
+ 3691 "10101001" // /* MW 5 */
+ 3692 "00101111" // /* MW 4 */
+ 3693 "00000000" // /* MW 3 */
+ 3694 "00000101" // /* MW 2 */
+ 3695 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3697 "01010010" // /* MW 3 */
+ 3698 "00100000" // /* MW 2 */
+ 3699 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 559 59 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3701 "11110010" // /* MW 5 */
+ 3702 "10111101" // /* MW 4 */
+ 3703 "11111101" // /* MW 3 */
+ 3704 "00001001" // /* MW 2 */
+ 3705 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 41 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3707 "00111001" // /* MW 5 */
+ 3708 "11000100" // /* MW 4 */
+ 3709 "10111101" // /* MW 3 */
+ 3710 "00111111" // /* MW 2 */
+ 3711 "10000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 117 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3713 "01011111" // /* MW 5 */
+ 3714 "01101011" // /* MW 4 */
+ 3715 "10110111" // /* MW 3 */
+ 3716 "11101110" // /* MW 2 */
+ 3717 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+ 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3719 "00110010" // /* MW 3 */
+ 3720 "10000100" // /* MW 2 */
+ 3721 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 52 first
+ 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3723 "00001100" // /* MW 3 */
+ 3724 "01111110" // /* MW 2 */
+ 3725 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 92 first
+ 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3727 "10001111" // /* MW 3 */
+ 3728 "00110001" // /* MW 2 */
+ 3729 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 36 first
+ 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3731 "11000101" // /* MW 3 */
+ 3732 "11110111" // /* MW 2 */
+ 3733 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 64 first
+.src_ref 2 "conv2d_bf16_params.h" 611 47
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 629 82
+ 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3735 "01011000" // /* MW 11 */
+ 3736 "00000000" // /* MW 10 */
+ 3737 "10001001" // /* MW 9 */
+ 3738 "11101110" // /* MW 8 */
+ 3739 "11000000" // /* MW 7 */
+ 3740 "10110111" // /* MW 6 */
+ 3741 "10010101" // /* MW 5 */
+ 3742 "11101110" // /* MW 4 */
+ 3743 "00000111" // /* MW 3 */
+ 3744 "00000011" // /* MW 2 */
+ 3745 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+ 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3747 "00111001" // /* MW 5 */
+ 3748 "10110111" // /* MW 4 */
+ 3749 "01000000" // /* MW 3 */
+ 3750 "00101000" // /* MW 2 */
+ 3751 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3753 "00001100" // /* MW 5 */
+ 3754 "10101100" // /* MW 4 */
+ 3755 "00001111" // /* MW 3 */
+ 3756 "00000000" // /* MW 2 */
+ 3757 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 60 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+ 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3759 "11001001" // /* MW 9 */
+ 3760 "00111111" // /* MW 8 */
+ 3761 "10001001" // /* MW 7 */
+ 3762 "00111100" // /* MW 6 */
+ 3763 "10110000" // /* MW 5 */
+ 3764 "00011111" // /* MW 4 */
+ 3765 "10110000" // /* MW 3 */
+ 3766 "00010010" // /* MW 2 */
+ 3767 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 53
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 555 59 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3769 "11001000" // /* MW 11 */
+ 3770 "01111111" // /* MW 10 */
+ 3771 "11001100" // /* MW 9 */
+ 3772 "10010010" // /* MW 8 */
+ 3773 "11111111" // /* MW 7 */
+ 3774 "10101101" // /* MW 6 */
+ 3775 "10010001" // /* MW 5 */
+ 3776 "00011100" // /* MW 4 */
+ 3777 "10000010" // /* MW 3 */
+ 3778 "10001100" // /* MW 2 */
+ 3779 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 240 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3781 "01111001" // /* MW 9 */
+ 3782 "10001110" // /* MW 8 */
+ 3783 "01110000" // /* MW 7 */
+ 3784 "11101111" // /* MW 6 */
+ 3785 "01010111" // /* MW 5 */
+ 3786 "00101011" // /* MW 4 */
+ 3787 "00110000" // /* MW 3 */
+ 3788 "01011010" // /* MW 2 */
+ 3789 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 53 first
+.src_ref 2 "conv2d_bf16_params.h" 559 53
+.src_ref 2 "conv2d_bf16_params.h" 621 140
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3791 "01011000" // /* MW 11 */
+ 3792 "01011000" // /* MW 10 */
+ 3793 "00000000" // /* MW 9 */
+ 3794 "00001110" // /* MW 8 */
+ 3795 "01001110" // /* MW 7 */
+ 3796 "10101001" // /* MW 6 */
+ 3797 "01010001" // /* MW 5 */
+ 3798 "00011111" // /* MW 4 */
+ 3799 "00000010" // /* MW 3 */
+ 3800 "11011001" // /* MW 2 */
+ 3801 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 53 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3803 "00100100" // /* MW 5 */
+ 3804 "11100011" // /* MW 4 */
+ 3805 "00111111" // /* MW 3 */
+ 3806 "01100010" // /* MW 2 */
+ 3807 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 47 first
+.src_ref 2 "conv2d_bf16_params.h" 621 222
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3809 "01111000" // /* MW 11 */
+ 3810 "10010000" // /* MW 10 */
+ 3811 "01101001" // /* MW 9 */
+ 3812 "00001111" // /* MW 8 */
+ 3813 "11001110" // /* MW 7 */
+ 3814 "10101011" // /* MW 6 */
+ 3815 "10010001" // /* MW 5 */
+ 3816 "11101111" // /* MW 4 */
+ 3817 "00100010" // /* MW 3 */
+ 3818 "01101110" // /* MW 2 */
+ 3819 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 661 61
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3821 "11001000" // /* MW 9 */
+ 3822 "11111111" // /* MW 8 */
+ 3823 "10001100" // /* MW 7 */
+ 3824 "00010010" // /* MW 6 */
+ 3825 "11001110" // /* MW 5 */
+ 3826 "00101001" // /* MW 4 */
+ 3827 "00000000" // /* MW 3 */
+ 3828 "11110011" // /* MW 2 */
+ 3829 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 710 60
+.src_ref 2 "conv2d_bf16_params.h" 710 65
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3831 "01111000" // /* MW 9 */
+ 3832 "10001110" // /* MW 8 */
+ 3833 "01110000" // /* MW 7 */
+ 3834 "01110011" // /* MW 6 */
+ 3835 "11101010" // /* MW 5 */
+ 3836 "00111011" // /* MW 4 */
+ 3837 "00000000" // /* MW 3 */
+ 3838 "00011101" // /* MW 2 */
+ 3839 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3841 "01000100" // /* MW 5 */
+ 3842 "11001010" // /* MW 4 */
+ 3843 "00101110" // /* MW 3 */
+ 3844 "11101110" // /* MW 2 */
+ 3845 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 649 41 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3847 "01111000" // /* MW 9 */
+ 3848 "10010000" // /* MW 8 */
+ 3849 "01101001" // /* MW 7 */
+ 3850 "10010011" // /* MW 6 */
+ 3851 "00111001" // /* MW 5 */
+ 3852 "00111111" // /* MW 4 */
+ 3853 "00000000" // /* MW 3 */
+ 3854 "00011111" // /* MW 2 */
+ 3855 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3857 "00100010" // /* MW 3 */
+ 3858 "11000100" // /* MW 2 */
+ 3859 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 82 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3861 "01010001" // /* MW 3 */
+ 3862 "11101011" // /* MW 2 */
+ 3863 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 611 47 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3865 "01011001" // /* MW 9 */
+ 3866 "11000000" // /* MW 8 */
+ 3867 "01101111" // /* MW 7 */
+ 3868 "10010000" // /* MW 6 */
+ 3869 "00100111" // /* MW 5 */
+ 3870 "00000100" // /* MW 4 */
+ 3871 "00110000" // /* MW 3 */
+ 3872 "10001110" // /* MW 2 */
+ 3873 "01000111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3875 "00110010" // /* MW 3 */
+ 3876 "00111000" // /* MW 2 */
+ 3877 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 643 22 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3879 "01111111" // /* MW 3 */
+ 3880 "11111110" // /* MW 2 */
+ 3881 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3883 "01100100" // /* MW 5 */
+ 3884 "00001100" // /* MW 4 */
+ 3885 "00101110" // /* MW 3 */
+ 3886 "11000110" // /* MW 2 */
+ 3887 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 47 first
+.src_ref 2 "conv2d_bf16_params.h" 629 45
+.src_ref 2 "conv2d_bf16_params.h" 684 30 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3889 "01011001" // /* MW 9 */
+ 3890 "00101000" // /* MW 8 */
+ 3891 "10000000" // /* MW 7 */
+ 3892 "01111100" // /* MW 6 */
+ 3893 "00101001" // /* MW 5 */
+ 3894 "00110101" // /* MW 4 */
+ 3895 "00110000" // /* MW 3 */
+ 3896 "10001110" // /* MW 2 */
+ 3897 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 45 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3899 "11100100" // /* MW 5 */
+ 3900 "00001101" // /* MW 4 */
+ 3901 "00110001" // /* MW 3 */
+ 3902 "01010110" // /* MW 2 */
+ 3903 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 644 22
+.src_ref 2 "conv2d_bf16_params.h" 700 17 first
+.src_ref 2 "conv2d_bf16_params.h" 705 50
+.src_ref 2 "conv2d_bf16_params.h" 705 61
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3905 "10101000" // /* MW 9 */
+ 3906 "11111100" // /* MW 8 */
+ 3907 "10101001" // /* MW 7 */
+ 3908 "11111110" // /* MW 6 */
+ 3909 "00111000" // /* MW 5 */
+ 3910 "00000110" // /* MW 4 */
+ 3911 "00100000" // /* MW 3 */
+ 3912 "00000010" // /* MW 2 */
+ 3913 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 38 first
+.src_ref 2 "conv2d_bf16_params.h" 700 111
+.src_ref 2 "conv2d_bf16_params.h" 700 149
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3915 "00000110" // /* MW 9 */
+ 3916 "00000110" // /* MW 8 */
+ 3917 "00000101" // /* MW 7 */
+ 3918 "10000000" // /* MW 6 */
+ 3919 "00010001" // /* MW 5 */
+ 3920 "00011111" // /* MW 4 */
+ 3921 "00100010" // /* MW 3 */
+ 3922 "11000110" // /* MW 2 */
+ 3923 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14
+.src_ref 2 "conv2d_bf16_params.h" 649 38 first
+.src_ref 2 "conv2d_bf16_params.h" 674 24
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3925 "00010001" // /* MW 9 */
+ 3926 "11111000" // /* MW 8 */
+ 3927 "01101111" // /* MW 7 */
+ 3928 "00111110" // /* MW 6 */
+ 3929 "00000000" // /* MW 5 */
+ 3930 "00000000" // /* MW 4 */
+ 3931 "00110000" // /* MW 3 */
+ 3932 "11001110" // /* MW 2 */
+ 3933 "01001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14 first
+.src_ref 2 "conv2d_bf16_params.h" 662 61
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3935 "11001001" // /* MW 9 */
+ 3936 "10111111" // /* MW 8 */
+ 3937 "01001011" // /* MW 7 */
+ 3938 "10100100" // /* MW 6 */
+ 3939 "01001001" // /* MW 5 */
+ 3940 "00111111" // /* MW 4 */
+ 3941 "00110000" // /* MW 3 */
+ 3942 "11010010" // /* MW 2 */
+ 3943 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 663 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3945 "10011100" // /* MW 5 */
+ 3946 "01010110" // /* MW 4 */
+ 3947 "00110001" // /* MW 3 */
+ 3948 "11000110" // /* MW 2 */
+ 3949 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+ 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3951 "10000001" // /* MW 5 */
+ 3952 "01111010" // /* MW 4 */
+ 3953 "00111111" // /* MW 3 */
+ 3954 "10001010" // /* MW 2 */
+ 3955 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3957 "11100011" // /* MW 5 */
+ 3958 "01110011" // /* MW 4 */
+ 3959 "00111000" // /* MW 3 */
+ 3960 "11111010" // /* MW 2 */
+ 3961 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3963 "01011001" // /* MW 9 */
+ 3964 "00000000" // /* MW 8 */
+ 3965 "01100000" // /* MW 7 */
+ 3966 "00110000" // /* MW 6 */
+ 3967 "11111000" // /* MW 5 */
+ 3968 "00101101" // /* MW 4 */
+ 3969 "00110000" // /* MW 3 */
+ 3970 "11010110" // /* MW 2 */
+ 3971 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3973 "11001001" // /* MW 9 */
+ 3974 "01111111" // /* MW 8 */
+ 3975 "00101100" // /* MW 7 */
+ 3976 "01111110" // /* MW 6 */
+ 3977 "00100000" // /* MW 5 */
+ 3978 "00111110" // /* MW 4 */
+ 3979 "00110000" // /* MW 3 */
+ 3980 "10001100" // /* MW 2 */
+ 3981 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 705 50 first
+.src_ref 2 "conv2d_bf16_params.h" 705 61 first
+ 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3983 "00001100" // /* MW 5 */
+ 3984 "10111000" // /* MW 4 */
+ 3985 "00111000" // /* MW 3 */
+ 3986 "10001100" // /* MW 2 */
+ 3987 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10
+.src_ref 2 "conv2d_bf16_params.h" 674 24 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.src_ref 2 "conv2d_bf16_params.h" 720 50
+ 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3989 "01011001" // /* MW 9 */
+ 3990 "00000000" // /* MW 8 */
+ 3991 "01001000" // /* MW 7 */
+ 3992 "00100100" // /* MW 6 */
+ 3993 "00000001" // /* MW 5 */
+ 3994 "00100111" // /* MW 4 */
+ 3995 "00110000" // /* MW 3 */
+ 3996 "11011010" // /* MW 2 */
+ 3997 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3999 "01111001" // /* MW 9 */
+ 4000 "00001110" // /* MW 8 */
+ 4001 "01110000" // /* MW 7 */
+ 4002 "10001111" // /* MW 6 */
+ 4003 "00011111" // /* MW 5 */
+ 4004 "00000101" // /* MW 4 */
+ 4005 "00110000" // /* MW 3 */
+ 4006 "11110010" // /* MW 2 */
+ 4007 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 707 61 first
+ 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4009 "11011111" // /* MW 5 */
+ 4010 "10111001" // /* MW 4 */
+ 4011 "00111011" // /* MW 3 */
+ 4012 "10010010" // /* MW 2 */
+ 4013 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 674 22 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4015 "01011001" // /* MW 9 */
+ 4016 "00000110" // /* MW 8 */
+ 4017 "00001000" // /* MW 7 */
+ 4018 "10001100" // /* MW 6 */
+ 4019 "00001111" // /* MW 5 */
+ 4020 "00100001" // /* MW 4 */
+ 4021 "00110000" // /* MW 3 */
+ 4022 "11000110" // /* MW 2 */
+ 4023 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4025 "01111000" // /* MW 11 */
+ 4026 "10010000" // /* MW 10 */
+ 4027 "01101001" // /* MW 9 */
+ 4028 "00010011" // /* MW 8 */
+ 4029 "00000000" // /* MW 7 */
+ 4030 "10011011" // /* MW 6 */
+ 4031 "00010001" // /* MW 5 */
+ 4032 "00011110" // /* MW 4 */
+ 4033 "00000010" // /* MW 3 */
+ 4034 "00000000" // /* MW 2 */
+ 4035 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4037 "10100100" // /* MW 5 */
+ 4038 "00010100" // /* MW 4 */
+ 4039 "00100000" // /* MW 3 */
+ 4040 "00010110" // /* MW 2 */
+ 4041 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 691 56 first
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4043 "10101111" // /* MW 3 */
+ 4044 "01100011" // /* MW 2 */
+ 4045 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 709 71 first
+ 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4047 "01011001" // /* MW 9 */
+ 4048 "11001000" // /* MW 8 */
+ 4049 "00000111" // /* MW 7 */
+ 4050 "01101101" // /* MW 6 */
+ 4051 "00001000" // /* MW 5 */
+ 4052 "00000111" // /* MW 4 */
+ 4053 "00110000" // /* MW 3 */
+ 4054 "10001100" // /* MW 2 */
+ 4055 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 706 23 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4057 "11001000" // /* MW 11 */
+ 4058 "11000001" // /* MW 10 */
+ 4059 "10101000" // /* MW 9 */
+ 4060 "11101101" // /* MW 8 */
+ 4061 "11110111" // /* MW 7 */
+ 4062 "10100000" // /* MW 6 */
+ 4063 "01100001" // /* MW 5 */
+ 4064 "01001000" // /* MW 4 */
+ 4065 "00000010" // /* MW 3 */
+ 4066 "01100011" // /* MW 2 */
+ 4067 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 682 38 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4069 "01111011" // /* MW 5 */
+ 4070 "11000000" // /* MW 4 */
+ 4071 "00110110" // /* MW 3 */
+ 4072 "00001010" // /* MW 2 */
+ 4073 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+ 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4075 "01000001" // /* MW 5 */
+ 4076 "10001110" // /* MW 4 */
+ 4077 "00111000" // /* MW 3 */
+ 4078 "11011010" // /* MW 2 */
+ 4079 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+ 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4081 "10011100" // /* MW 5 */
+ 4082 "11001000" // /* MW 4 */
+ 4083 "00111000" // /* MW 3 */
+ 4084 "11001010" // /* MW 2 */
+ 4085 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4087 "11011011" // /* MW 5 */
+ 4088 "10010100" // /* MW 4 */
+ 4089 "00110010" // /* MW 3 */
+ 4090 "10010010" // /* MW 2 */
+ 4091 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 706 28 first
+ 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4093 "01011001" // /* MW 9 */
+ 4094 "11111101" // /* MW 8 */
+ 4095 "00001111" // /* MW 7 */
+ 4096 "00000100" // /* MW 6 */
+ 4097 "00111000" // /* MW 5 */
+ 4098 "00011010" // /* MW 4 */
+ 4099 "00110000" // /* MW 3 */
+ 4100 "10001110" // /* MW 2 */
+ 4101 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4103 "00001110" // /* MW 3 */
+ 4104 "11000000" // /* MW 2 */
+ 4105 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 707 66 first
+ 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4107 "00011111" // /* MW 5 */
+ 4108 "00010000" // /* MW 4 */
+ 4109 "00110111" // /* MW 3 */
+ 4110 "11001010" // /* MW 2 */
+ 4111 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 709 96 first
+ 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4113 "00111011" // /* MW 5 */
+ 4114 "00001100" // /* MW 4 */
+ 4115 "00110000" // /* MW 3 */
+ 4116 "10001100" // /* MW 2 */
+ 4117 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 709 90
+ 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4119 "00110001" // /* MW 9 */
+ 4120 "11000110" // /* MW 8 */
+ 4121 "00000011" // /* MW 7 */
+ 4122 "10000000" // /* MW 6 */
+ 4123 "01100001" // /* MW 5 */
+ 4124 "00011100" // /* MW 4 */
+ 4125 "00100010" // /* MW 3 */
+ 4126 "10110110" // /* MW 2 */
+ 4127 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 707 50 first
+.src_ref 2 "conv2d_bf16_params.h" 708 59
+.src_ref 2 "conv2d_bf16_params.h" 710 60 first
+.src_ref 2 "conv2d_bf16_params.h" 710 65 first
+ 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4129 "11001000" // /* MW 11 */
+ 4130 "00111111" // /* MW 10 */
+ 4131 "00101000" // /* MW 9 */
+ 4132 "00110000" // /* MW 8 */
+ 4133 "01110000" // /* MW 7 */
+ 4134 "10111010" // /* MW 6 */
+ 4135 "10010001" // /* MW 5 */
+ 4136 "00011100" // /* MW 4 */
+ 4137 "00100010" // /* MW 3 */
+ 4138 "00111010" // /* MW 2 */
+ 4139 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 708 48 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4141 "10101111" // /* MW 9 */
+ 4142 "01000001" // /* MW 8 */
+ 4143 "00000001" // /* MW 7 */
+ 4144 "10000000" // /* MW 6 */
+ 4145 "00110001" // /* MW 5 */
+ 4146 "00011100" // /* MW 4 */
+ 4147 "00100010" // /* MW 3 */
+ 4148 "10111110" // /* MW 2 */
+ 4149 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 709 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+ 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4151 "00000000" // /* MW 5 */
+ 4152 "01010000" // /* MW 4 */
+ 4153 "00110000" // /* MW 3 */
+ 4154 "10001110" // /* MW 2 */
+ 4155 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 710 50 first
+.delay_slot
+ 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4157 "11110001" // /* MW 3 */
+ 4158 "01011100" // /* MW 2 */
+ 4159 "00001010" // /* MW 1 */
+.delay_slot
+ 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4161 "00010001" // /* MW 3 */
+ 4162 "00011100" // /* MW 2 */
+ 4163 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48 first
+.delay_slot
+ 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4165 "01010001" // /* MW 3 */
+ 4166 "00011100" // /* MW 2 */
+ 4167 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.delay_slot
+ 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4169 "01010001" // /* MW 3 */
+ 4170 "00000100" // /* MW 2 */
+ 4171 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 720 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+.delay_slot
+ 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4173 "01110001" // /* MW 9 */
+ 4174 "00000000" // /* MW 8 */
+ 4175 "00000000" // /* MW 7 */
+ 4176 "00000000" // /* MW 6 */
+ 4177 "11111110" // /* MW 5 */
+ 4178 "00111111" // /* MW 4 */
+ 4179 "00110000" // /* MW 3 */
+ 4180 "10001010" // /* MW 2 */
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0
+ 4181 "01000010" // /* MW 1 */
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 689 first
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 704 12
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.function_start
+ 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4193 "01111000" // /* MW 11 */
+ 4194 "01100000" // /* MW 10 */
+ 4195 "00001010" // /* MW 9 */
+ 4196 "00001000" // /* MW 8 */
+ 4197 "10000000" // /* MW 7 */
+ 4198 "00000001" // /* MW 6 */
+ 4199 "10001011" // /* MW 5 */
+ 4200 "10000100" // /* MW 4 */
+ 4201 "10000010" // /* MW 3 */
+ 4202 "00000011" // /* MW 2 */
+ 4203 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 526 11
+.src_ref 2 "conv2d_bf16.h" 698 28 first
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+ 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4205 "01100000" // /* MW 13 */
+ 4206 "00001001" // /* MW 12 */
+ 4207 "00100000" // /* MW 11 */
+ 4208 "00100001" // /* MW 10 */
+ 4209 "00000000" // /* MW 9 */
+ 4210 "00110110" // /* MW 8 */
+ 4211 "00000001" // /* MW 7 */
+ 4212 "00110100" // /* MW 6 */
+ 4213 "00101000" // /* MW 5 */
+ 4214 "00101000" // /* MW 4 */
+ 4215 "10001000" // /* MW 3 */
+ 4216 "00000110" // /* MW 2 */
+ 4217 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4219 "00010000" // /* MW 9 */
+ 4220 "00110100" // /* MW 8 */
+ 4221 "00110010" // /* MW 7 */
+ 4222 "11110010" // /* MW 6 */
+ 4223 "00000001" // /* MW 5 */
+ 4224 "00000000" // /* MW 4 */
+ 4225 "11010000" // /* MW 3 */
+ 4226 "10010100" // /* MW 2 */
+ 4227 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 43
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+ 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4229 "00010000" // /* MW 9 */
+ 4230 "01111000" // /* MW 8 */
+ 4231 "01111000" // /* MW 7 */
+ 4232 "00000100" // /* MW 6 */
+ 4233 "00000000" // /* MW 5 */
+ 4234 "00000000" // /* MW 4 */
+ 4235 "11010000" // /* MW 3 */
+ 4236 "10010000" // /* MW 2 */
+ 4237 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 699 43 first
+.src_ref 2 "conv2d_bf16.h" 702 4
+ 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4239 "00010000" // /* MW 9 */
+ 4240 "10010000" // /* MW 8 */
+ 4241 "10111000" // /* MW 7 */
+ 4242 "00000101" // /* MW 6 */
+ 4243 "00000000" // /* MW 5 */
+ 4244 "00000000" // /* MW 4 */
+ 4245 "11010000" // /* MW 3 */
+ 4246 "10000000" // /* MW 2 */
+ 4247 "01100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 702 37 first
+ 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4249 "00000001" // /* MW 5 */
+ 4250 "00000000" // /* MW 4 */
+ 4251 "11010001" // /* MW 3 */
+ 4252 "10000010" // /* MW 2 */
+ 4253 "01111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4255 "00100010" // /* MW 3 */
+ 4256 "00000100" // /* MW 2 */
+ 4257 "00000100" // /* MW 1 */
+ 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4259 "00000000" // /* MW 1 */
+ 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4261 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+ 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4263 "00000001" // /* MW 5 */
+ 4264 "10000101" // /* MW 4 */
+ 4265 "10000000" // /* MW 3 */
+ 4266 "00001010" // /* MW 2 */
+ 4267 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+ 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4269 "00010100" // /* MW 3 */
+ 4270 "00110000" // /* MW 2 */
+ 4271 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4273 "00010100" // /* MW 3 */
+ 4274 "00010100" // /* MW 2 */
+ 4275 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4277 "11111101" // /* MW 5 */
+ 4278 "11100000" // /* MW 4 */
+ 4279 "10001010" // /* MW 3 */
+ 4280 "00001010" // /* MW 2 */
+ 4281 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4283 "00000000" // /* MW 5 */
+ 4284 "11110101" // /* MW 4 */
+ 4285 "10000000" // /* MW 3 */
+ 4286 "00000010" // /* MW 2 */
+ 4287 "11000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4289 "00010100" // /* MW 3 */
+ 4290 "00010100" // /* MW 2 */
+ 4291 "00111100" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4293 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4295 "01111110" // /* MW 9 */
+ 4296 "10100101" // /* MW 8 */
+ 4297 "00000001" // /* MW 7 */
+ 4298 "00000000" // /* MW 6 */
+ 4299 "01010100" // /* MW 5 */
+ 4300 "00000000" // /* MW 4 */
+ 4301 "11110000" // /* MW 3 */
+ 4302 "00101100" // /* MW 2 */
+ 4303 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4305 "00000000" // /* MW 15 */
+ 4306 "00000000" // /* MW 14 */
+ 4307 "01111000" // /* MW 13 */
+ 4308 "11000101" // /* MW 12 */
+ 4309 "00000001" // /* MW 11 */
+ 4310 "00000000" // /* MW 10 */
+ 4311 "00000000" // /* MW 9 */
+ 4312 "00000000" // /* MW 8 */
+ 4313 "01011011" // /* MW 7 */
+ 4314 "00000001" // /* MW 6 */
+ 4315 "00101000" // /* MW 5 */
+ 4316 "01100000" // /* MW 4 */
+ 4317 "11111100" // /* MW 3 */
+ 4318 "00101100" // /* MW 2 */
+ 4319 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4321 "00000000" // /* MW 15 */
+ 4322 "00000000" // /* MW 14 */
+ 4323 "01111000" // /* MW 13 */
+ 4324 "11000101" // /* MW 12 */
+ 4325 "01000000" // /* MW 11 */
+ 4326 "00000000" // /* MW 10 */
+ 4327 "00000000" // /* MW 9 */
+ 4328 "00000000" // /* MW 8 */
+ 4329 "01011011" // /* MW 7 */
+ 4330 "00000001" // /* MW 6 */
+ 4331 "00100000" // /* MW 5 */
+ 4332 "00000000" // /* MW 4 */
+ 4333 "11110000" // /* MW 3 */
+ 4334 "00101100" // /* MW 2 */
+ 4335 "00000000" // /* MW 1 */
+.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4337 "00000000" // /* MW 15 */
+ 4338 "00000000" // /* MW 14 */
+ 4339 "01111000" // /* MW 13 */
+ 4340 "10100101" // /* MW 12 */
+ 4341 "00000001" // /* MW 11 */
+ 4342 "00000000" // /* MW 10 */
+ 4343 "00000000" // /* MW 9 */
+ 4344 "00000000" // /* MW 8 */
+ 4345 "01011011" // /* MW 7 */
+ 4346 "00000001" // /* MW 6 */
+ 4347 "00101000" // /* MW 5 */
+ 4348 "00101000" // /* MW 4 */
+ 4349 "11111000" // /* MW 3 */
+ 4350 "00101100" // /* MW 2 */
+ 4351 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4353 "00000000" // /* MW 15 */
+ 4354 "00000000" // /* MW 14 */
+ 4355 "01111000" // /* MW 13 */
+ 4356 "10100101" // /* MW 12 */
+ 4357 "00000001" // /* MW 11 */
+ 4358 "00000000" // /* MW 10 */
+ 4359 "00000000" // /* MW 9 */
+ 4360 "00000000" // /* MW 8 */
+ 4361 "00000011" // /* MW 7 */
+ 4362 "10000000" // /* MW 6 */
+ 4363 "10101101" // /* MW 5 */
+ 4364 "00000000" // /* MW 4 */
+ 4365 "11110000" // /* MW 3 */
+ 4366 "00101100" // /* MW 2 */
+ 4367 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4369 "00000000" // /* MW 15 */
+ 4370 "00000000" // /* MW 14 */
+ 4371 "01111000" // /* MW 13 */
+ 4372 "11000101" // /* MW 12 */
+ 4373 "00000001" // /* MW 11 */
+ 4374 "00000000" // /* MW 10 */
+ 4375 "00000000" // /* MW 9 */
+ 4376 "00000000" // /* MW 8 */
+ 4377 "00000011" // /* MW 7 */
+ 4378 "00000000" // /* MW 6 */
+ 4379 "00101001" // /* MW 5 */
+ 4380 "01100000" // /* MW 4 */
+ 4381 "11111100" // /* MW 3 */
+ 4382 "00101100" // /* MW 2 */
+ 4383 "00000000" // /* MW 1 */
+.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 3 "utils.h" 531 4 first
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4385 "00000000" // /* MW 15 */
+ 4386 "00000000" // /* MW 14 */
+ 4387 "01111000" // /* MW 13 */
+ 4388 "11000101" // /* MW 12 */
+ 4389 "01000000" // /* MW 11 */
+ 4390 "00000000" // /* MW 10 */
+ 4391 "00000000" // /* MW 9 */
+ 4392 "00000000" // /* MW 8 */
+ 4393 "00000011" // /* MW 7 */
+ 4394 "00000000" // /* MW 6 */
+ 4395 "00100011" // /* MW 5 */
+ 4396 "00000000" // /* MW 4 */
+ 4397 "11110000" // /* MW 3 */
+ 4398 "00101100" // /* MW 2 */
+ 4399 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4401 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4403 "00000011" // /* MW 3 */
+ 4404 "10000000" // /* MW 2 */
+ 4405 "00001101" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4407 "01110000" // /* MW 7 */
+ 4408 "11000101" // /* MW 6 */
+ 4409 "00000001" // /* MW 5 */
+ 4410 "00000000" // /* MW 4 */
+ 4411 "01100000" // /* MW 3 */
+ 4412 "00000000" // /* MW 2 */
+ 4413 "00100000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4415 "10001010" // /* MW 3 */
+ 4416 "10000001" // /* MW 2 */
+ 4417 "00011000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4419 "00000011" // /* MW 3 */
+ 4420 "00000000" // /* MW 2 */
+ 4421 "00001011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+ 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4423 "01110000" // /* MW 7 */
+ 4424 "11000101" // /* MW 6 */
+ 4425 "00000001" // /* MW 5 */
+ 4426 "00000000" // /* MW 4 */
+ 4427 "01100000" // /* MW 3 */
+ 4428 "00000000" // /* MW 2 */
+ 4429 "10110000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+ 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4431 "01110000" // /* MW 7 */
+ 4432 "11000101" // /* MW 6 */
+ 4433 "01000000" // /* MW 5 */
+ 4434 "00000000" // /* MW 4 */
+ 4435 "01100000" // /* MW 3 */
+ 4436 "00000000" // /* MW 2 */
+ 4437 "00100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4439 "00000011" // /* MW 3 */
+ 4440 "00000000" // /* MW 2 */
+ 4441 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.src_ref 2 "conv2d_bf16.h" 723 first
+ 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4443 "00000000" // /* MW 5 */
+ 4444 "01010000" // /* MW 4 */
+ 4445 "01100000" // /* MW 3 */
+ 4446 "00000000" // /* MW 2 */
+ 4447 "10110000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.delay_slot
+ 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4449 "00000011" // /* MW 3 */
+ 4450 "00000000" // /* MW 2 */
+ 4451 "00001001" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+.delay_slot
+ 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4453 "00000011" // /* MW 3 */
+ 4454 "00000000" // /* MW 2 */
+ 4455 "00001011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4457 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4459 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0
+ 4461 "00000000" // /* MW 1 */
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.function_start
+ 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4465 "01100000" // /* MW 13 */
+ 4466 "00010001" // /* MW 12 */
+ 4467 "10010001" // /* MW 11 */
+ 4468 "00001110" // /* MW 10 */
+ 4469 "00000000" // /* MW 9 */
+ 4470 "00000000" // /* MW 8 */
+ 4471 "10000000" // /* MW 7 */
+ 4472 "00000000" // /* MW 6 */
+ 4473 "00100000" // /* MW 5 */
+ 4474 "00111111" // /* MW 4 */
+ 4475 "10000110" // /* MW 3 */
+ 4476 "11100000" // /* MW 2 */
+ 4477 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 241 95
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4479 "01111000" // /* MW 11 */
+ 4480 "01100000" // /* MW 10 */
+ 4481 "00101011" // /* MW 9 */
+ 4482 "00001010" // /* MW 8 */
+ 4483 "11000101" // /* MW 7 */
+ 4484 "10111111" // /* MW 6 */
+ 4485 "10010101" // /* MW 5 */
+ 4486 "11110001" // /* MW 4 */
+ 4487 "00000111" // /* MW 3 */
+ 4488 "01110011" // /* MW 2 */
+ 4489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 94
+.src_ref 2 "conv2d_bf16_params.h" 242 100
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 245 28
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4491 "00001000" // /* MW 11 */
+ 4492 "01000111" // /* MW 10 */
+ 4493 "00110100" // /* MW 9 */
+ 4494 "00101001" // /* MW 8 */
+ 4495 "00010000" // /* MW 7 */
+ 4496 "10000001" // /* MW 6 */
+ 4497 "00110101" // /* MW 5 */
+ 4498 "11011010" // /* MW 4 */
+ 4499 "00000111" // /* MW 3 */
+ 4500 "00011001" // /* MW 2 */
+ 4501 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 240 68 first
+ 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4503 "00010000" // /* MW 11 */
+ 4504 "00000000" // /* MW 10 */
+ 4505 "10101000" // /* MW 9 */
+ 4506 "00000011" // /* MW 8 */
+ 4507 "01000000" // /* MW 7 */
+ 4508 "10000000" // /* MW 6 */
+ 4509 "00110101" // /* MW 5 */
+ 4510 "11110101" // /* MW 4 */
+ 4511 "11010111" // /* MW 3 */
+ 4512 "11001010" // /* MW 2 */
+ 4513 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.src_ref 2 "conv2d_bf16_params.h" 245 20
+ 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4515 "10010000" // /* MW 11 */
+ 4516 "11111111" // /* MW 10 */
+ 4517 "11101111" // /* MW 9 */
+ 4518 "11111111" // /* MW 8 */
+ 4519 "01111111" // /* MW 7 */
+ 4520 "10000000" // /* MW 6 */
+ 4521 "11010101" // /* MW 5 */
+ 4522 "11111101" // /* MW 4 */
+ 4523 "10000111" // /* MW 3 */
+ 4524 "00011000" // /* MW 2 */
+ 4525 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4527 "01011000" // /* MW 11 */
+ 4528 "11101100" // /* MW 10 */
+ 4529 "00000111" // /* MW 9 */
+ 4530 "00001010" // /* MW 8 */
+ 4531 "01100001" // /* MW 7 */
+ 4532 "10000001" // /* MW 6 */
+ 4533 "10110101" // /* MW 5 */
+ 4534 "11100001" // /* MW 4 */
+ 4535 "00000111" // /* MW 3 */
+ 4536 "10110100" // /* MW 2 */
+ 4537 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.src_ref 2 "conv2d_bf16_params.h" 250 71
+ 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4539 "01011000" // /* MW 11 */
+ 4540 "11000100" // /* MW 10 */
+ 4541 "10000111" // /* MW 9 */
+ 4542 "11001010" // /* MW 8 */
+ 4543 "01110111" // /* MW 7 */
+ 4544 "10000111" // /* MW 6 */
+ 4545 "11110101" // /* MW 5 */
+ 4546 "11101101" // /* MW 4 */
+ 4547 "00000111" // /* MW 3 */
+ 4548 "10010101" // /* MW 2 */
+ 4549 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44
+ 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4551 "01010000" // /* MW 7 */
+ 4552 "01000000" // /* MW 6 */
+ 4553 "10000000" // /* MW 5 */
+ 4554 "00000011" // /* MW 4 */
+ 4555 "10110000" // /* MW 3 */
+ 4556 "01110011" // /* MW 2 */
+ 4557 "11111111" // /* MW 1 */
+ 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4559 "00111101" // /* MW 3 */
+ 4560 "11100100" // /* MW 2 */
+ 4561 "00001111" // /* MW 1 */
+ 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4563 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+ 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4565 "00100000" // /* MW 3 */
+ 4566 "01011001" // /* MW 2 */
+ 4567 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+ 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4569 "10011011" // /* MW 5 */
+ 4570 "01110111" // /* MW 4 */
+ 4571 "00110110" // /* MW 3 */
+ 4572 "00110010" // /* MW 2 */
+ 4573 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54 first
+.src_ref 2 "conv2d_bf16_params.h" 242 94 first
+ 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4575 "00101111" // /* MW 5 */
+ 4576 "11110010" // /* MW 4 */
+ 4577 "01011110" // /* MW 3 */
+ 4578 "11111001" // /* MW 2 */
+ 4579 "01011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 20 first
+ 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4581 "00101010" // /* MW 3 */
+ 4582 "11001001" // /* MW 2 */
+ 4583 "00000010" // /* MW 1 */
+ 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4585 "00000000" // /* MW 1 */
+ 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4587 "00000000" // /* MW 1 */
+ 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4589 "00000000" // /* MW 1 */
+ 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4591 "00000000" // /* MW 1 */
+ 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4593 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 174 first
+ 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4595 "11101100" // /* MW 3 */
+ 4596 "01110111" // /* MW 2 */
+ 4597 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+ 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4599 "00110010" // /* MW 3 */
+ 4600 "01011101" // /* MW 2 */
+ 4601 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4603 "11001100" // /* MW 3 */
+ 4604 "11110110" // /* MW 2 */
+ 4605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4607 "11001111" // /* MW 5 */
+ 4608 "10110111" // /* MW 4 */
+ 4609 "11101110" // /* MW 3 */
+ 4610 "01110000" // /* MW 2 */
+ 4611 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 100 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4613 "00011101" // /* MW 3 */
+ 4614 "01111111" // /* MW 2 */
+ 4615 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4617 "11100010" // /* MW 3 */
+ 4618 "01011000" // /* MW 2 */
+ 4619 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 98
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4621 "11000101" // /* MW 3 */
+ 4622 "11111001" // /* MW 2 */
+ 4623 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 151
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4625 "01100010" // /* MW 5 */
+ 4626 "00111100" // /* MW 4 */
+ 4627 "10011110" // /* MW 3 */
+ 4628 "11111101" // /* MW 2 */
+ 4629 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4631 "11000010" // /* MW 3 */
+ 4632 "01111001" // /* MW 2 */
+ 4633 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+ 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4635 "11001100" // /* MW 3 */
+ 4636 "01111111" // /* MW 2 */
+ 4637 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 117 first
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+ 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4639 "11010001" // /* MW 5 */
+ 4640 "11110111" // /* MW 4 */
+ 4641 "00111110" // /* MW 3 */
+ 4642 "01111110" // /* MW 2 */
+ 4643 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44 first
+.src_ref 2 "conv2d_bf16_params.h" 245 28 first
+ 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4645 "00110001" // /* MW 5 */
+ 4646 "10110010" // /* MW 4 */
+ 4647 "01010100" // /* MW 3 */
+ 4648 "01111001" // /* MW 2 */
+ 4649 "01011101" // /* MW 1 */
+ 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4651 "00000000" // /* MW 1 */
+ 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4653 "00000000" // /* MW 1 */
+ 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4655 "00000000" // /* MW 1 */
+ 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4657 "00000000" // /* MW 1 */
+ 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4659 "00000000" // /* MW 1 */
+ 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4661 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+ 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4663 "11111100" // /* MW 5 */
+ 4664 "10111110" // /* MW 4 */
+ 4665 "00011111" // /* MW 3 */
+ 4666 "10101101" // /* MW 2 */
+ 4667 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4669 "00000001" // /* MW 5 */
+ 4670 "01000000" // /* MW 4 */
+ 4671 "01000000" // /* MW 3 */
+ 4672 "00001001" // /* MW 2 */
+ 4673 "01100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.delay_slot
+ 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4675 "01001000" // /* MW 3 */
+ 4676 "10010011" // /* MW 2 */
+ 4677 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4679 "10010000" // /* MW 3 */
+ 4680 "11111110" // /* MW 2 */
+ 4681 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4683 "01100100" // /* MW 3 */
+ 4684 "01101101" // /* MW 2 */
+ 4685 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4687 "01111100" // /* MW 3 */
+ 4688 "11101111" // /* MW 2 */
+ 4689 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 132
+.delay_slot
+ 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4691 "01100100" // /* MW 3 */
+ 4692 "11100001" // /* MW 2 */
+ 4693 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4695 "00000001" // /* MW 5 */
+ 4696 "01000000" // /* MW 4 */
+ 4697 "01000000" // /* MW 3 */
+ 4698 "00001001" // /* MW 2 */
+ 4699 "11101000" // /* MW 1 */
+.delay_slot
+ 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4701 "00011101" // /* MW 3 */
+ 4702 "11101011" // /* MW 2 */
+ 4703 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4705 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4707 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4709 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4711 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+ 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */
+ 4713 "00100000" // /* MW 9 */
+ 4714 "00000000" // /* MW 8 */
+ 4715 "00000000" // /* MW 7 */
+ 4716 "01010110" // /* MW 6 */
+ 4717 "00000010" // /* MW 5 */
+ 4718 "00000000" // /* MW 4 */
+ 4719 "00000000" // /* MW 3 */
+ 4720 "00111011" // /* MW 2 */
+ 4721 "00000000" // /* MW 1 */
+.delay_slot
+ 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4723 "10011100" // /* MW 3 */
+ 4724 "00011001" // /* MW 2 */
+ 4725 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1849 12
+.delay_slot
+ 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4727 "00000101" // /* MW 3 */
+ 4728 "00100110" // /* MW 2 */
+ 4729 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4731 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4733 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4735 "00000000" // /* MW 1 */
+.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 2 "conv2d_bf16_params.h" 250 71 first
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4737 "01111000" // /* MW 11 */
+ 4738 "11001110" // /* MW 10 */
+ 4739 "00001100" // /* MW 9 */
+ 4740 "00111100" // /* MW 8 */
+ 4741 "10111111" // /* MW 7 */
+ 4742 "10101011" // /* MW 6 */
+ 4743 "00011101" // /* MW 5 */
+ 4744 "11101011" // /* MW 4 */
+ 4745 "00000111" // /* MW 3 */
+ 4746 "10010101" // /* MW 2 */
+ 4747 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4749 "01011101" // /* MW 3 */
+ 4750 "10101011" // /* MW 2 */
+ 4751 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+ 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4753 "10010010" // /* MW 3 */
+ 4754 "01101011" // /* MW 2 */
+ 4755 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4757 "11100111" // /* MW 3 */
+ 4758 "11110111" // /* MW 2 */
+ 4759 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4761 "01000001" // /* MW 5 */
+ 4762 "10110000" // /* MW 4 */
+ 4763 "01001101" // /* MW 3 */
+ 4764 "11110010" // /* MW 2 */
+ 4765 "10101100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4767 "00110010" // /* MW 3 */
+ 4768 "01100111" // /* MW 2 */
+ 4769 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87 first
+ 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4771 "01000100" // /* MW 3 */
+ 4772 "00101001" // /* MW 2 */
+ 4773 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4775 "11110000" // /* MW 3 */
+ 4776 "00110110" // /* MW 2 */
+ 4777 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 152 first
+ 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4779 "10001011" // /* MW 5 */
+ 4780 "11001111" // /* MW 4 */
+ 4781 "11111001" // /* MW 3 */
+ 4782 "00101100" // /* MW 2 */
+ 4783 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320
+.src_ref 2 "conv2d_bf16_params.h" 258 8 first
+ 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */
+ 4785 "01100000" // /* MW 11 */
+ 4786 "00000000" // /* MW 10 */
+ 4787 "00010000" // /* MW 9 */
+ 4788 "01011100" // /* MW 8 */
+ 4789 "00000010" // /* MW 7 */
+ 4790 "10111010" // /* MW 6 */
+ 4791 "01110001" // /* MW 5 */
+ 4792 "01101111" // /* MW 4 */
+ 4793 "10000010" // /* MW 3 */
+ 4794 "10010000" // /* MW 2 */
+ 4795 "00000001" // /* MW 1 */
+.delay_slot
+ 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4797 "01100111" // /* MW 3 */
+ 4798 "10001010" // /* MW 2 */
+ 4799 "00000010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4801 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4803 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4805 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4807 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+ 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4809 "11111110" // /* MW 5 */
+ 4810 "00111111" // /* MW 4 */
+ 4811 "11111010" // /* MW 3 */
+ 4812 "11111111" // /* MW 2 */
+ 4813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71 first
+ 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4815 "01000100" // /* MW 3 */
+ 4816 "10100101" // /* MW 2 */
+ 4817 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4819 "00011100" // /* MW 13 */
+ 4820 "00000000" // /* MW 12 */
+ 4821 "00000000" // /* MW 11 */
+ 4822 "01010111" // /* MW 10 */
+ 4823 "00011010" // /* MW 9 */
+ 4824 "01000000" // /* MW 8 */
+ 4825 "00000000" // /* MW 7 */
+ 4826 "00000000" // /* MW 6 */
+ 4827 "10100011" // /* MW 5 */
+ 4828 "11101100" // /* MW 4 */
+ 4829 "11110110" // /* MW 3 */
+ 4830 "00101100" // /* MW 2 */
+ 4831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368
+.src_ref 2 "conv2d_bf16.h" 1841 65 first
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16.h" 1849 12 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4833 "01011000" // /* MW 9 */
+ 4834 "11111101" // /* MW 8 */
+ 4835 "11001111" // /* MW 7 */
+ 4836 "10000010" // /* MW 6 */
+ 4837 "01000100" // /* MW 5 */
+ 4838 "00100111" // /* MW 4 */
+ 4839 "11010000" // /* MW 3 */
+ 4840 "11010010" // /* MW 2 */
+ 4841 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1841 34
+.src_ref 2 "conv2d_bf16.h" 1842 36
+.src_ref 2 "conv2d_bf16.h" 1842 67
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4843 "01011000" // /* MW 9 */
+ 4844 "00100100" // /* MW 8 */
+ 4845 "00000000" // /* MW 7 */
+ 4846 "11111010" // /* MW 6 */
+ 4847 "01011111" // /* MW 5 */
+ 4848 "00101001" // /* MW 4 */
+ 4849 "00000000" // /* MW 3 */
+ 4850 "01010010" // /* MW 2 */
+ 4851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 67 first
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4853 "01011000" // /* MW 11 */
+ 4854 "11001100" // /* MW 10 */
+ 4855 "00000111" // /* MW 9 */
+ 4856 "00100110" // /* MW 8 */
+ 4857 "01101011" // /* MW 7 */
+ 4858 "10101011" // /* MW 6 */
+ 4859 "00101101" // /* MW 5 */
+ 4860 "11010000" // /* MW 4 */
+ 4861 "11010111" // /* MW 3 */
+ 4862 "01011010" // /* MW 2 */
+ 4863 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1845 80
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4865 "01011000" // /* MW 11 */
+ 4866 "11000100" // /* MW 10 */
+ 4867 "00000000" // /* MW 9 */
+ 4868 "11101010" // /* MW 8 */
+ 4869 "00110111" // /* MW 7 */
+ 4870 "10111111" // /* MW 6 */
+ 4871 "11010101" // /* MW 5 */
+ 4872 "11011110" // /* MW 4 */
+ 4873 "11010111" // /* MW 3 */
+ 4874 "01011110" // /* MW 2 */
+ 4875 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 63 first
+ 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4877 "10110110" // /* MW 3 */
+ 4878 "11111111" // /* MW 2 */
+ 4879 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52 first
+ 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4881 "11110110" // /* MW 3 */
+ 4882 "10001011" // /* MW 2 */
+ 4883 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4885 "10110110" // /* MW 3 */
+ 4886 "00000110" // /* MW 2 */
+ 4887 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+ 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4889 "01011011" // /* MW 5 */
+ 4890 "00100110" // /* MW 4 */
+ 4891 "11011010" // /* MW 3 */
+ 4892 "11010010" // /* MW 2 */
+ 4893 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4895 "11010110" // /* MW 3 */
+ 4896 "00000111" // /* MW 2 */
+ 4897 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+ 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4899 "00101101" // /* MW 3 */
+ 4900 "10101101" // /* MW 2 */
+ 4901 "00010101" // /* MW 1 */
+ 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4903 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 80 first
+ 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4905 "00111110" // /* MW 3 */
+ 4906 "01100111" // /* MW 2 */
+ 4907 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21 first
+ 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4909 "00011000" // /* MW 3 */
+ 4910 "11100011" // /* MW 2 */
+ 4911 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 12
+ 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */
+ 4913 "00000001" // /* MW 5 */
+ 4914 "01000000" // /* MW 4 */
+ 4915 "11010000" // /* MW 3 */
+ 4916 "00001001" // /* MW 2 */
+ 4917 "10001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+.src_ref 2 "conv2d_bf16.h" 1842 75 first
+.delay_slot
+ 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4919 "10110010" // /* MW 5 */
+ 4920 "10110101" // /* MW 4 */
+ 4921 "10111010" // /* MW 3 */
+ 4922 "10100101" // /* MW 2 */
+ 4923 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4925 "10110010" // /* MW 5 */
+ 4926 "10010101" // /* MW 4 */
+ 4927 "10110000" // /* MW 3 */
+ 4928 "01100101" // /* MW 2 */
+ 4929 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+.delay_slot
+ 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4931 "10100000" // /* MW 7 */
+ 4932 "01101000" // /* MW 6 */
+ 4933 "11001010" // /* MW 5 */
+ 4934 "00000001" // /* MW 4 */
+ 4935 "10110000" // /* MW 3 */
+ 4936 "10000100" // /* MW 2 */
+ 4937 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4939 "10000000" // /* MW 3 */
+ 4940 "11010000" // /* MW 2 */
+ 4941 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4943 "11111001" // /* MW 3 */
+ 4944 "01101010" // /* MW 2 */
+ 4945 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4947 "11010000" // /* MW 5 */
+ 4948 "11001000" // /* MW 4 */
+ 4949 "11001110" // /* MW 3 */
+ 4950 "00000111" // /* MW 2 */
+ 4951 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18 first
+ 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4953 "10000000" // /* MW 5 */
+ 4954 "10110100" // /* MW 4 */
+ 4955 "01010000" // /* MW 3 */
+ 4956 "11000100" // /* MW 2 */
+ 4957 "11100000" // /* MW 1 */
+ 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4959 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4961 "00000000" // /* MW 5 */
+ 4962 "00100000" // /* MW 4 */
+ 4963 "00001010" // /* MW 3 */
+ 4964 "01111111" // /* MW 2 */
+ 4965 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4967 "10010001" // /* MW 3 */
+ 4968 "00000010" // /* MW 2 */
+ 4969 "00011000" // /* MW 1 */
+ 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4971 "11100000" // /* MW 3 */
+ 4972 "00010101" // /* MW 2 */
+ 4973 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4975 "01011111" // /* MW 3 */
+ 4976 "01101010" // /* MW 2 */
+ 4977 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4979 "00100101" // /* MW 5 */
+ 4980 "00000001" // /* MW 4 */
+ 4981 "11100000" // /* MW 3 */
+ 4982 "11000110" // /* MW 2 */
+ 4983 "11100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4985 "10000000" // /* MW 3 */
+ 4986 "01111010" // /* MW 2 */
+ 4987 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4989 "00010110" // /* MW 3 */
+ 4990 "01000000" // /* MW 2 */
+ 4991 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4993 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4995 "00000001" // /* MW 3 */
+ 4996 "01000001" // /* MW 2 */
+ 4997 "00011100" // /* MW 1 */
+ 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4999 "00000000" // /* MW 1 */
+ 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5001 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5003 "00110010" // /* MW 3 */
+ 5004 "00000110" // /* MW 2 */
+ 5005 "00000111" // /* MW 1 */
+ 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5007 "00000000" // /* MW 1 */
+ 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5009 "00000000" // /* MW 1 */
+ 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5011 "00000000" // /* MW 1 */
+ 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5013 "00000000" // /* MW 1 */
+ 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5015 "00000000" // /* MW 1 */
+ 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5017 "00000000" // /* MW 1 */
+ 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5019 "01101011" // /* MW 5 */
+ 5020 "10100100" // /* MW 4 */
+ 5021 "11111111" // /* MW 3 */
+ 5022 "00101100" // /* MW 2 */
+ 5023 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560
+.src_ref 2 "conv2d_bf16.h" 881 76
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5025 "00010000" // /* MW 11 */
+ 5026 "00110100" // /* MW 10 */
+ 5027 "10110010" // /* MW 9 */
+ 5028 "11110001" // /* MW 8 */
+ 5029 "00000001" // /* MW 7 */
+ 5030 "00000000" // /* MW 6 */
+ 5031 "00001011" // /* MW 5 */
+ 5032 "10001110" // /* MW 4 */
+ 5033 "10000001" // /* MW 3 */
+ 5034 "10010000" // /* MW 2 */
+ 5035 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.src_ref 2 "conv2d_bf16.h" 876 51 first
+.src_ref 2 "conv2d_bf16.h" 881 76 first
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5037 "01011000" // /* MW 11 */
+ 5038 "00001011" // /* MW 10 */
+ 5039 "01101000" // /* MW 9 */
+ 5040 "10010010" // /* MW 8 */
+ 5041 "00011001" // /* MW 7 */
+ 5042 "00110011" // /* MW 6 */
+ 5043 "10001011" // /* MW 5 */
+ 5044 "10000100" // /* MW 4 */
+ 5045 "01010000" // /* MW 3 */
+ 5046 "01000101" // /* MW 2 */
+ 5047 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5049 "01111000" // /* MW 9 */
+ 5050 "01100000" // /* MW 8 */
+ 5051 "10101010" // /* MW 7 */
+ 5052 "01100101" // /* MW 6 */
+ 5053 "10111001" // /* MW 5 */
+ 5054 "00111001" // /* MW 4 */
+ 5055 "00000000" // /* MW 3 */
+ 5056 "10010110" // /* MW 2 */
+ 5057 "01100001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 883 4 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5059 "01100111" // /* MW 3 */
+ 5060 "00000110" // /* MW 2 */
+ 5061 "00000011" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5063 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 884 4 first
+.aggressive_scheduled_block_id 4
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5065 "00000001" // /* MW 5 */
+ 5066 "00000000" // /* MW 4 */
+ 5067 "00110000" // /* MW 3 */
+ 5068 "00001000" // /* MW 2 */
+ 5069 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5071 "00101101" // /* MW 3 */
+ 5072 "01101011" // /* MW 2 */
+ 5073 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.delay_slot
+ 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5075 "11111001" // /* MW 3 */
+ 5076 "01101010" // /* MW 2 */
+ 5077 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45 first
+.delay_slot
+ 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5079 "00010001" // /* MW 3 */
+ 5080 "01100011" // /* MW 2 */
+ 5081 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.delay_slot
+ 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5083 "00110101" // /* MW 5 */
+ 5084 "00101100" // /* MW 4 */
+ 5085 "10111010" // /* MW 3 */
+ 5086 "01100101" // /* MW 2 */
+ 5087 "10001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.delay_slot
+ 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5089 "00000000" // /* MW 15 */
+ 5090 "00000000" // /* MW 14 */
+ 5091 "10101000" // /* MW 13 */
+ 5092 "11100010" // /* MW 12 */
+ 5093 "10001011" // /* MW 11 */
+ 5094 "00010001" // /* MW 10 */
+ 5095 "10011010" // /* MW 9 */
+ 5096 "00101100" // /* MW 8 */
+ 5097 "01011011" // /* MW 7 */
+ 5098 "00000001" // /* MW 6 */
+ 5099 "00100000" // /* MW 5 */
+ 5100 "00000000" // /* MW 4 */
+ 5101 "11110000" // /* MW 3 */
+ 5102 "00101100" // /* MW 2 */
+ 5103 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.return_address
+ 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5105 "10011001" // /* MW 3 */
+ 5106 "11010100" // /* MW 2 */
+ 5107 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4 first
+.no_stack_arguments
+ 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5109 "00000001" // /* MW 5 */
+ 5110 "00000000" // /* MW 4 */
+ 5111 "00110000" // /* MW 3 */
+ 5112 "00001000" // /* MW 2 */
+ 5113 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5115 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5117 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.delay_slot
+ 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5119 "10010000" // /* MW 3 */
+ 5120 "01010110" // /* MW 2 */
+ 5121 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5123 "10100000" // /* MW 3 */
+ 5124 "01100110" // /* MW 2 */
+ 5125 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5127 "00000000" // /* MW 9 */
+ 5128 "00000000" // /* MW 8 */
+ 5129 "00000000" // /* MW 7 */
+ 5130 "00000000" // /* MW 6 */
+ 5131 "00001011" // /* MW 5 */
+ 5132 "10001111" // /* MW 4 */
+ 5133 "11110000" // /* MW 3 */
+ 5134 "00101100" // /* MW 2 */
+ 5135 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.return_address
+ 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5137 "00001000" // /* MW 9 */
+ 5138 "01100011" // /* MW 8 */
+ 5139 "00110011" // /* MW 7 */
+ 5140 "11101010" // /* MW 6 */
+ 5141 "00110111" // /* MW 5 */
+ 5142 "00000001" // /* MW 4 */
+ 5143 "10000000" // /* MW 3 */
+ 5144 "10011010" // /* MW 2 */
+ 5145 "11010110" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 886 4
+.src_ref 2 "conv2d_bf16.h" 896 23 first
+.src_ref 2 "conv2d_bf16.h" 1123 71
+ 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5147 "01100010" // /* MW 5 */
+ 5148 "00110100" // /* MW 4 */
+ 5149 "11010000" // /* MW 3 */
+ 5150 "10000100" // /* MW 2 */
+ 5151 "10000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5153 "01000110" // /* MW 3 */
+ 5154 "00011100" // /* MW 2 */
+ 5155 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5157 "00100110" // /* MW 3 */
+ 5158 "00011110" // /* MW 2 */
+ 5159 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5161 "01000110" // /* MW 3 */
+ 5162 "00011110" // /* MW 2 */
+ 5163 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5165 "00000110" // /* MW 3 */
+ 5166 "00011100" // /* MW 2 */
+ 5167 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5169 "01100110" // /* MW 3 */
+ 5170 "00011100" // /* MW 2 */
+ 5171 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5173 "01100110" // /* MW 3 */
+ 5174 "00011110" // /* MW 2 */
+ 5175 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23 first
+ 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5177 "11010110" // /* MW 3 */
+ 5178 "00011110" // /* MW 2 */
+ 5179 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5181 "00110110" // /* MW 3 */
+ 5182 "00011110" // /* MW 2 */
+ 5183 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5185 "10010110" // /* MW 3 */
+ 5186 "00011111" // /* MW 2 */
+ 5187 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5189 "10110110" // /* MW 3 */
+ 5190 "00011110" // /* MW 2 */
+ 5191 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5193 "11110110" // /* MW 3 */
+ 5194 "00011110" // /* MW 2 */
+ 5195 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5197 "10011110" // /* MW 3 */
+ 5198 "00011101" // /* MW 2 */
+ 5199 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5201 "00100110" // /* MW 3 */
+ 5202 "00011101" // /* MW 2 */
+ 5203 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5205 "10100110" // /* MW 3 */
+ 5206 "00011100" // /* MW 2 */
+ 5207 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5209 "11000110" // /* MW 3 */
+ 5210 "00011100" // /* MW 2 */
+ 5211 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5213 "10100110" // /* MW 3 */
+ 5214 "00011110" // /* MW 2 */
+ 5215 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5217 "11010110" // /* MW 3 */
+ 5218 "00011111" // /* MW 2 */
+ 5219 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5221 "10110110" // /* MW 3 */
+ 5222 "00011111" // /* MW 2 */
+ 5223 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5225 "11100110" // /* MW 3 */
+ 5226 "00011100" // /* MW 2 */
+ 5227 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5229 "01001010" // /* MW 3 */
+ 5230 "11000010" // /* MW 2 */
+ 5231 "00000100" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25
+ 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5233 "10010001" // /* MW 3 */
+ 5234 "11010010" // /* MW 2 */
+ 5235 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5237 "01010110" // /* MW 3 */
+ 5238 "00000100" // /* MW 2 */
+ 5239 "00000100" // /* MW 1 */
+ 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5241 "00000000" // /* MW 1 */
+ 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5243 "00000000" // /* MW 1 */
+ 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5245 "00000000" // /* MW 1 */
+ 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5247 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5249 "00101100" // /* MW 3 */
+ 5250 "11100111" // /* MW 2 */
+ 5251 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 12
+ 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */
+ 5253 "00000001" // /* MW 5 */
+ 5254 "01000000" // /* MW 4 */
+ 5255 "00010000" // /* MW 3 */
+ 5256 "00001100" // /* MW 2 */
+ 5257 "10011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4
+.delay_slot
+ 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5259 "11010000" // /* MW 5 */
+ 5260 "11001000" // /* MW 4 */
+ 5261 "11000100" // /* MW 3 */
+ 5262 "00000111" // /* MW 2 */
+ 5263 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4 first
+.delay_slot
+ 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5265 "10100111" // /* MW 3 */
+ 5266 "00000101" // /* MW 2 */
+ 5267 "00000010" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5269 "01110010" // /* MW 3 */
+ 5270 "11010001" // /* MW 2 */
+ 5271 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5273 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5275 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 71 first
+ 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5277 "01011000" // /* MW 9 */
+ 5278 "10000100" // /* MW 8 */
+ 5279 "10000000" // /* MW 7 */
+ 5280 "00111111" // /* MW 6 */
+ 5281 "10111001" // /* MW 5 */
+ 5282 "00011011" // /* MW 4 */
+ 5283 "00100000" // /* MW 3 */
+ 5284 "01000011" // /* MW 2 */
+ 5285 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+.src_ref 2 "conv2d_bf16.h" 1154 80
+ 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5287 "01011000" // /* MW 9 */
+ 5288 "00111100" // /* MW 8 */
+ 5289 "00000000" // /* MW 7 */
+ 5290 "00001010" // /* MW 6 */
+ 5291 "00100000" // /* MW 5 */
+ 5292 "00111101" // /* MW 4 */
+ 5293 "00000000" // /* MW 3 */
+ 5294 "00010011" // /* MW 2 */
+ 5295 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5297 "01111000" // /* MW 9 */
+ 5298 "11010000" // /* MW 8 */
+ 5299 "11100100" // /* MW 7 */
+ 5300 "00001011" // /* MW 6 */
+ 5301 "10100000" // /* MW 5 */
+ 5302 "00000001" // /* MW 4 */
+ 5303 "10000000" // /* MW 3 */
+ 5304 "00010100" // /* MW 2 */
+ 5305 "11111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 746 83
+ 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5307 "01111000" // /* MW 11 */
+ 5308 "11000000" // /* MW 10 */
+ 5309 "10100111" // /* MW 9 */
+ 5310 "00000001" // /* MW 8 */
+ 5311 "11010100" // /* MW 7 */
+ 5312 "00011011" // /* MW 6 */
+ 5313 "01001011" // /* MW 5 */
+ 5314 "00011100" // /* MW 4 */
+ 5315 "10000010" // /* MW 3 */
+ 5316 "10011000" // /* MW 2 */
+ 5317 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.src_ref 2 "conv2d_bf16.h" 1199 26
+.src_ref 2 "conv2d_bf16.h" 1200 26
+.src_ref 2 "conv2d_bf16.h" 1201 26
+.src_ref 2 "conv2d_bf16.h" 1202 26
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5319 "01011000" // /* MW 11 */
+ 5320 "00000111" // /* MW 10 */
+ 5321 "11101000" // /* MW 9 */
+ 5322 "10001001" // /* MW 8 */
+ 5323 "11110111" // /* MW 7 */
+ 5324 "00000001" // /* MW 6 */
+ 5325 "01001011" // /* MW 5 */
+ 5326 "00011100" // /* MW 4 */
+ 5327 "00100110" // /* MW 3 */
+ 5328 "10010110" // /* MW 2 */
+ 5329 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 12
+.src_ref 2 "conv2d_bf16.h" 1218 20
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5331 "00010000" // /* MW 9 */
+ 5332 "10100000" // /* MW 8 */
+ 5333 "00110010" // /* MW 7 */
+ 5334 "00000101" // /* MW 6 */
+ 5335 "00000000" // /* MW 5 */
+ 5336 "00000000" // /* MW 4 */
+ 5337 "00100000" // /* MW 3 */
+ 5338 "11001010" // /* MW 2 */
+ 5339 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 749 26
+.src_ref 2 "conv2d_bf16.h" 750 26
+.src_ref 2 "conv2d_bf16.h" 751 26
+.src_ref 2 "conv2d_bf16.h" 752 26
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5341 "01011000" // /* MW 9 */
+ 5342 "00001100" // /* MW 8 */
+ 5343 "10001011" // /* MW 7 */
+ 5344 "00010010" // /* MW 6 */
+ 5345 "01101001" // /* MW 5 */
+ 5346 "00110100" // /* MW 4 */
+ 5347 "00100000" // /* MW 3 */
+ 5348 "00110110" // /* MW 2 */
+ 5349 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1873
+ 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5351 "01011000" // /* MW 11 */
+ 5352 "00000000" // /* MW 10 */
+ 5353 "00001000" // /* MW 9 */
+ 5354 "00001011" // /* MW 8 */
+ 5355 "10010000" // /* MW 7 */
+ 5356 "00000001" // /* MW 6 */
+ 5357 "00100000" // /* MW 5 */
+ 5358 "11010111" // /* MW 4 */
+ 5359 "00101001" // /* MW 3 */
+ 5360 "10000111" // /* MW 2 */
+ 5361 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5363 "00010110" // /* MW 3 */
+ 5364 "10001000" // /* MW 2 */
+ 5365 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5367 "00100110" // /* MW 3 */
+ 5368 "10101011" // /* MW 2 */
+ 5369 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5371 "01110110" // /* MW 3 */
+ 5372 "00101111" // /* MW 2 */
+ 5373 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 80 first
+ 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5375 "10000110" // /* MW 3 */
+ 5376 "00011110" // /* MW 2 */
+ 5377 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 80 first
+ 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5379 "11000110" // /* MW 3 */
+ 5380 "10001010" // /* MW 2 */
+ 5381 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 87 first
+ 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5383 "00000110" // /* MW 3 */
+ 5384 "10011110" // /* MW 2 */
+ 5385 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 83 first
+ 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5387 "00110110" // /* MW 3 */
+ 5388 "00011100" // /* MW 2 */
+ 5389 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 83 first
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5391 "00000010" // /* MW 5 */
+ 5392 "00000110" // /* MW 4 */
+ 5393 "11011101" // /* MW 3 */
+ 5394 "00000010" // /* MW 2 */
+ 5395 "10011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 66 first
+ 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5397 "01110110" // /* MW 3 */
+ 5398 "00010100" // /* MW 2 */
+ 5399 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1206 63 first
+ 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5401 "10010110" // /* MW 3 */
+ 5402 "00000100" // /* MW 2 */
+ 5403 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89
+ 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5405 "00000000" // /* MW 3 */
+ 5406 "11011010" // /* MW 2 */
+ 5407 "00011001" // /* MW 1 */
+ 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5409 "10011001" // /* MW 3 */
+ 5410 "10000011" // /* MW 2 */
+ 5411 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89
+ 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5413 "00000000" // /* MW 3 */
+ 5414 "00011011" // /* MW 2 */
+ 5415 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5417 "10011001" // /* MW 3 */
+ 5418 "00001101" // /* MW 2 */
+ 5419 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89 first
+ 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5421 "11100000" // /* MW 3 */
+ 5422 "00000011" // /* MW 2 */
+ 5423 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89 first
+ 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5425 "11000000" // /* MW 5 */
+ 5426 "00010000" // /* MW 4 */
+ 5427 "11101110" // /* MW 3 */
+ 5428 "11111111" // /* MW 2 */
+ 5429 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5431 "01111110" // /* MW 9 */
+ 5432 "10000000" // /* MW 8 */
+ 5433 "10000010" // /* MW 7 */
+ 5434 "00000000" // /* MW 6 */
+ 5435 "00010000" // /* MW 5 */
+ 5436 "00000000" // /* MW 4 */
+ 5437 "11110000" // /* MW 3 */
+ 5438 "00101100" // /* MW 2 */
+ 5439 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1147 31 first
+.src_ref 2 "conv2d_bf16.h" 1187 40 first
+.loop_nesting 1
+ 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5441 "01111000" // /* MW 11 */
+ 5442 "10010000" // /* MW 10 */
+ 5443 "00110011" // /* MW 9 */
+ 5444 "11101100" // /* MW 8 */
+ 5445 "11100111" // /* MW 7 */
+ 5446 "00000100" // /* MW 6 */
+ 5447 "00001011" // /* MW 5 */
+ 5448 "10000101" // /* MW 4 */
+ 5449 "01110001" // /* MW 3 */
+ 5450 "10000101" // /* MW 2 */
+ 5451 "11000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1188 50 first
+ 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5453 "10100000" // /* MW 11 */
+ 5454 "10011000" // /* MW 10 */
+ 5455 "00110011" // /* MW 9 */
+ 5456 "00000010" // /* MW 8 */
+ 5457 "01001011" // /* MW 7 */
+ 5458 "00001110" // /* MW 6 */
+ 5459 "00101011" // /* MW 5 */
+ 5460 "00101000" // /* MW 4 */
+ 5461 "01111000" // /* MW 3 */
+ 5462 "10000001" // /* MW 2 */
+ 5463 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+ 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5465 "01110000" // /* MW 11 */
+ 5466 "10000000" // /* MW 10 */
+ 5467 "11000110" // /* MW 9 */
+ 5468 "00000011" // /* MW 8 */
+ 5469 "01001011" // /* MW 7 */
+ 5470 "01011010" // /* MW 6 */
+ 5471 "00101111" // /* MW 5 */
+ 5472 "00101000" // /* MW 4 */
+ 5473 "01111000" // /* MW 3 */
+ 5474 "00111001" // /* MW 2 */
+ 5475 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1149 31 first
+ 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5477 "01110000" // /* MW 11 */
+ 5478 "00000000" // /* MW 10 */
+ 5479 "10000010" // /* MW 9 */
+ 5480 "00000001" // /* MW 8 */
+ 5481 "00001011" // /* MW 7 */
+ 5482 "01010011" // /* MW 6 */
+ 5483 "00101011" // /* MW 5 */
+ 5484 "00000011" // /* MW 4 */
+ 5485 "01110100" // /* MW 3 */
+ 5486 "00001101" // /* MW 2 */
+ 5487 "11011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+ 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5489 "01011110" // /* MW 9 */
+ 5490 "00000000" // /* MW 8 */
+ 5491 "11000000" // /* MW 7 */
+ 5492 "00000001" // /* MW 6 */
+ 5493 "11010100" // /* MW 5 */
+ 5494 "00010010" // /* MW 4 */
+ 5495 "01110100" // /* MW 3 */
+ 5496 "01000001" // /* MW 2 */
+ 5497 "01110001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1152 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+ 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5499 "00010000" // /* MW 11 */
+ 5500 "01000000" // /* MW 10 */
+ 5501 "10111011" // /* MW 9 */
+ 5502 "00000101" // /* MW 8 */
+ 5503 "00000000" // /* MW 7 */
+ 5504 "00000000" // /* MW 6 */
+ 5505 "00101000" // /* MW 5 */
+ 5506 "00101000" // /* MW 4 */
+ 5507 "01111000" // /* MW 3 */
+ 5508 "10010101" // /* MW 2 */
+ 5509 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 1154 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8
+ 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5511 "00010000" // /* MW 11 */
+ 5512 "00101000" // /* MW 10 */
+ 5513 "01111011" // /* MW 9 */
+ 5514 "00000100" // /* MW 8 */
+ 5515 "00000000" // /* MW 7 */
+ 5516 "00000000" // /* MW 6 */
+ 5517 "00101000" // /* MW 5 */
+ 5518 "00101000" // /* MW 4 */
+ 5519 "01111000" // /* MW 3 */
+ 5520 "00011101" // /* MW 2 */
+ 5521 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+ 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5523 "00101000" // /* MW 5 */
+ 5524 "00000001" // /* MW 4 */
+ 5525 "01110100" // /* MW 3 */
+ 5526 "10110101" // /* MW 2 */
+ 5527 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1157 31 first
+ 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5529 "00101000" // /* MW 5 */
+ 5530 "00100010" // /* MW 4 */
+ 5531 "01111000" // /* MW 3 */
+ 5532 "10100101" // /* MW 2 */
+ 5533 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1159 31 first
+ 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5535 "00101000" // /* MW 5 */
+ 5536 "00101000" // /* MW 4 */
+ 5537 "01111000" // /* MW 3 */
+ 5538 "00101101" // /* MW 2 */
+ 5539 "11011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5541 "00101000" // /* MW 5 */
+ 5542 "00101000" // /* MW 4 */
+ 5543 "01111000" // /* MW 3 */
+ 5544 "10000001" // /* MW 2 */
+ 5545 "00100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1192 29 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5547 "00101000" // /* MW 5 */
+ 5548 "00000001" // /* MW 4 */
+ 5549 "01110100" // /* MW 3 */
+ 5550 "10111101" // /* MW 2 */
+ 5551 "10000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5553 "11101110" // /* MW 9 */
+ 5554 "11000011" // /* MW 8 */
+ 5555 "10011010" // /* MW 7 */
+ 5556 "00000010" // /* MW 6 */
+ 5557 "00010100" // /* MW 5 */
+ 5558 "00010001" // /* MW 4 */
+ 5559 "01110100" // /* MW 3 */
+ 5560 "11001101" // /* MW 2 */
+ 5561 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1162 81
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5563 "11100000" // /* MW 11 */
+ 5564 "11000001" // /* MW 10 */
+ 5565 "10011010" // /* MW 9 */
+ 5566 "00000001" // /* MW 8 */
+ 5567 "10001011" // /* MW 7 */
+ 5568 "10011000" // /* MW 6 */
+ 5569 "00101100" // /* MW 5 */
+ 5570 "00101000" // /* MW 4 */
+ 5571 "01111000" // /* MW 3 */
+ 5572 "11000101" // /* MW 2 */
+ 5573 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5575 "11101001" // /* MW 9 */
+ 5576 "00010100" // /* MW 8 */
+ 5577 "01001000" // /* MW 7 */
+ 5578 "00011101" // /* MW 6 */
+ 5579 "01010100" // /* MW 5 */
+ 5580 "00000000" // /* MW 4 */
+ 5581 "01110011" // /* MW 3 */
+ 5582 "10000001" // /* MW 2 */
+ 5583 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5585 "11101001" // /* MW 13 */
+ 5586 "00101100" // /* MW 12 */
+ 5587 "01001001" // /* MW 11 */
+ 5588 "00000111" // /* MW 10 */
+ 5589 "01011000" // /* MW 9 */
+ 5590 "01011100" // /* MW 8 */
+ 5591 "00000000" // /* MW 7 */
+ 5592 "00000000" // /* MW 6 */
+ 5593 "10010110" // /* MW 5 */
+ 5594 "10010100" // /* MW 4 */
+ 5595 "01110110" // /* MW 3 */
+ 5596 "00110101" // /* MW 2 */
+ 5597 "11001111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1162 81 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5599 "00001001" // /* MW 13 */
+ 5600 "01010101" // /* MW 12 */
+ 5601 "01001010" // /* MW 11 */
+ 5602 "00111110" // /* MW 10 */
+ 5603 "10010000" // /* MW 9 */
+ 5604 "01001100" // /* MW 8 */
+ 5605 "00000000" // /* MW 7 */
+ 5606 "00000000" // /* MW 6 */
+ 5607 "10010110" // /* MW 5 */
+ 5608 "00111000" // /* MW 4 */
+ 5609 "01111010" // /* MW 3 */
+ 5610 "10111101" // /* MW 2 */
+ 5611 "10000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1199 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5613 "00111101" // /* MW 13 */
+ 5614 "01100000" // /* MW 12 */
+ 5615 "11111000" // /* MW 11 */
+ 5616 "00011110" // /* MW 10 */
+ 5617 "10010000" // /* MW 9 */
+ 5618 "01010100" // /* MW 8 */
+ 5619 "00000000" // /* MW 7 */
+ 5620 "00000000" // /* MW 6 */
+ 5621 "10010110" // /* MW 5 */
+ 5622 "10011000" // /* MW 4 */
+ 5623 "01110100" // /* MW 3 */
+ 5624 "00000001" // /* MW 2 */
+ 5625 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1200 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5627 "00111101" // /* MW 7 */
+ 5628 "01100100" // /* MW 6 */
+ 5629 "11111001" // /* MW 5 */
+ 5630 "00000100" // /* MW 4 */
+ 5631 "01110000" // /* MW 3 */
+ 5632 "10000001" // /* MW 2 */
+ 5633 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1201 26 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5635 "00111101" // /* MW 7 */
+ 5636 "10001000" // /* MW 6 */
+ 5637 "11111010" // /* MW 5 */
+ 5638 "00000100" // /* MW 4 */
+ 5639 "01110000" // /* MW 3 */
+ 5640 "00001001" // /* MW 2 */
+ 5641 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5643 "00001001" // /* MW 7 */
+ 5644 "01101101" // /* MW 6 */
+ 5645 "01001011" // /* MW 5 */
+ 5646 "00000100" // /* MW 4 */
+ 5647 "01110000" // /* MW 3 */
+ 5648 "00000001" // /* MW 2 */
+ 5649 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5651 "00101000" // /* MW 5 */
+ 5652 "00000001" // /* MW 4 */
+ 5653 "01110100" // /* MW 3 */
+ 5654 "10000001" // /* MW 2 */
+ 5655 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5657 "00010100" // /* MW 3 */
+ 5658 "00010001" // /* MW 2 */
+ 5659 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1202 26 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5661 "00111101" // /* MW 11 */
+ 5662 "10001100" // /* MW 10 */
+ 5663 "11111011" // /* MW 9 */
+ 5664 "10000010" // /* MW 8 */
+ 5665 "01111101" // /* MW 7 */
+ 5666 "01110010" // /* MW 6 */
+ 5667 "00101101" // /* MW 5 */
+ 5668 "00101000" // /* MW 4 */
+ 5669 "01111000" // /* MW 3 */
+ 5670 "00001001" // /* MW 2 */
+ 5671 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5673 "00101001" // /* MW 9 */
+ 5674 "00000110" // /* MW 8 */
+ 5675 "10100000" // /* MW 7 */
+ 5676 "00011101" // /* MW 6 */
+ 5677 "00010100" // /* MW 5 */
+ 5678 "00010100" // /* MW 4 */
+ 5679 "01110100" // /* MW 3 */
+ 5680 "00000001" // /* MW 2 */
+ 5681 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5683 "00001001" // /* MW 13 */
+ 5684 "01000110" // /* MW 12 */
+ 5685 "10100010" // /* MW 11 */
+ 5686 "00001111" // /* MW 10 */
+ 5687 "10101010" // /* MW 9 */
+ 5688 "01011000" // /* MW 8 */
+ 5689 "00000000" // /* MW 7 */
+ 5690 "00000000" // /* MW 6 */
+ 5691 "00101000" // /* MW 5 */
+ 5692 "00000001" // /* MW 4 */
+ 5693 "01110100" // /* MW 3 */
+ 5694 "10000001" // /* MW 2 */
+ 5695 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5697 "01010001" // /* MW 15 */
+ 5698 "00001001" // /* MW 14 */
+ 5699 "11101101" // /* MW 13 */
+ 5700 "00000011" // /* MW 12 */
+ 5701 "11001001" // /* MW 11 */
+ 5702 "00000000" // /* MW 10 */
+ 5703 "00000000" // /* MW 9 */
+ 5704 "00000000" // /* MW 8 */
+ 5705 "01011011" // /* MW 7 */
+ 5706 "00000001" // /* MW 6 */
+ 5707 "00101000" // /* MW 5 */
+ 5708 "00100010" // /* MW 4 */
+ 5709 "11111000" // /* MW 3 */
+ 5710 "00101100" // /* MW 2 */
+ 5711 "00000000" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.begin_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5713 "01010000" // /* MW 15 */
+ 5714 "00011011" // /* MW 14 */
+ 5715 "11101101" // /* MW 13 */
+ 5716 "00000001" // /* MW 12 */
+ 5717 "01001001" // /* MW 11 */
+ 5718 "00000001" // /* MW 10 */
+ 5719 "00000000" // /* MW 9 */
+ 5720 "00000000" // /* MW 8 */
+ 5721 "01011011" // /* MW 7 */
+ 5722 "00000001" // /* MW 6 */
+ 5723 "00101000" // /* MW 5 */
+ 5724 "00101000" // /* MW 4 */
+ 5725 "01111000" // /* MW 3 */
+ 5726 "00001001" // /* MW 2 */
+ 5727 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5729 "00110001" // /* MW 15 */
+ 5730 "00000000" // /* MW 14 */
+ 5731 "01111101" // /* MW 13 */
+ 5732 "10100101" // /* MW 12 */
+ 5733 "00000001" // /* MW 11 */
+ 5734 "00000000" // /* MW 10 */
+ 5735 "00000000" // /* MW 9 */
+ 5736 "00000000" // /* MW 8 */
+ 5737 "01011011" // /* MW 7 */
+ 5738 "00000001" // /* MW 6 */
+ 5739 "00101000" // /* MW 5 */
+ 5740 "00101000" // /* MW 4 */
+ 5741 "01111000" // /* MW 3 */
+ 5742 "00000001" // /* MW 2 */
+ 5743 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5745 "00110000" // /* MW 15 */
+ 5746 "00010010" // /* MW 14 */
+ 5747 "01111101" // /* MW 13 */
+ 5748 "10100101" // /* MW 12 */
+ 5749 "00000001" // /* MW 11 */
+ 5750 "00000000" // /* MW 10 */
+ 5751 "00000000" // /* MW 9 */
+ 5752 "00000000" // /* MW 8 */
+ 5753 "01011011" // /* MW 7 */
+ 5754 "00000001" // /* MW 6 */
+ 5755 "00101000" // /* MW 5 */
+ 5756 "00000001" // /* MW 4 */
+ 5757 "01110100" // /* MW 3 */
+ 5758 "10000001" // /* MW 2 */
+ 5759 "00100010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.end_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5761 "01010001" // /* MW 15 */
+ 5762 "00001001" // /* MW 14 */
+ 5763 "11101101" // /* MW 13 */
+ 5764 "00000011" // /* MW 12 */
+ 5765 "11001001" // /* MW 11 */
+ 5766 "00000000" // /* MW 10 */
+ 5767 "00000000" // /* MW 9 */
+ 5768 "00000000" // /* MW 8 */
+ 5769 "01011011" // /* MW 7 */
+ 5770 "00000001" // /* MW 6 */
+ 5771 "00101000" // /* MW 5 */
+ 5772 "00100010" // /* MW 4 */
+ 5773 "11111000" // /* MW 3 */
+ 5774 "00101100" // /* MW 2 */
+ 5775 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5777 "00001001" // /* MW 13 */
+ 5778 "01101010" // /* MW 12 */
+ 5779 "10100011" // /* MW 11 */
+ 5780 "00011110" // /* MW 10 */
+ 5781 "10010000" // /* MW 9 */
+ 5782 "01010100" // /* MW 8 */
+ 5783 "00000000" // /* MW 7 */
+ 5784 "00000000" // /* MW 6 */
+ 5785 "10010110" // /* MW 5 */
+ 5786 "10111100" // /* MW 4 */
+ 5787 "01111100" // /* MW 3 */
+ 5788 "00001001" // /* MW 2 */
+ 5789 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5791 "00101001" // /* MW 13 */
+ 5792 "00000110" // /* MW 12 */
+ 5793 "10100000" // /* MW 11 */
+ 5794 "00000111" // /* MW 10 */
+ 5795 "00111000" // /* MW 9 */
+ 5796 "01111100" // /* MW 8 */
+ 5797 "00000000" // /* MW 7 */
+ 5798 "00000000" // /* MW 6 */
+ 5799 "10010110" // /* MW 5 */
+ 5800 "00011100" // /* MW 4 */
+ 5801 "01111110" // /* MW 3 */
+ 5802 "00000001" // /* MW 2 */
+ 5803 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5805 "00001001" // /* MW 9 */
+ 5806 "01000110" // /* MW 8 */
+ 5807 "10100010" // /* MW 7 */
+ 5808 "11100100" // /* MW 6 */
+ 5809 "00000000" // /* MW 5 */
+ 5810 "01010101" // /* MW 4 */
+ 5811 "01100001" // /* MW 3 */
+ 5812 "10010001" // /* MW 2 */
+ 5813 "01100001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5815 "00101001" // /* MW 9 */
+ 5816 "00101010" // /* MW 8 */
+ 5817 "10100001" // /* MW 7 */
+ 5818 "11000100" // /* MW 6 */
+ 5819 "00000111" // /* MW 5 */
+ 5820 "10010010" // /* MW 4 */
+ 5821 "01100001" // /* MW 3 */
+ 5822 "11000001" // /* MW 2 */
+ 5823 "01101010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5825 "00001001" // /* MW 9 */
+ 5826 "01101010" // /* MW 8 */
+ 5827 "10100011" // /* MW 7 */
+ 5828 "11000100" // /* MW 6 */
+ 5829 "00000011" // /* MW 5 */
+ 5830 "10010010" // /* MW 4 */
+ 5831 "01100010" // /* MW 3 */
+ 5832 "10000001" // /* MW 2 */
+ 5833 "11101011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1285 32 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5835 "00101001" // /* MW 11 */
+ 5836 "00000110" // /* MW 10 */
+ 5837 "10100000" // /* MW 9 */
+ 5838 "11100110" // /* MW 8 */
+ 5839 "00000000" // /* MW 7 */
+ 5840 "10001111" // /* MW 6 */
+ 5841 "00100010" // /* MW 5 */
+ 5842 "01010111" // /* MW 4 */
+ 5843 "01101111" // /* MW 3 */
+ 5844 "10010001" // /* MW 2 */
+ 5845 "10110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5847 "00001001" // /* MW 9 */
+ 5848 "01000110" // /* MW 8 */
+ 5849 "10100010" // /* MW 7 */
+ 5850 "11100100" // /* MW 6 */
+ 5851 "00000000" // /* MW 5 */
+ 5852 "00000110" // /* MW 4 */
+ 5853 "01100010" // /* MW 3 */
+ 5854 "10010001" // /* MW 2 */
+ 5855 "10010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5857 "00101001" // /* MW 7 */
+ 5858 "00101010" // /* MW 6 */
+ 5859 "10100001" // /* MW 5 */
+ 5860 "11000110" // /* MW 4 */
+ 5861 "00000011" // /* MW 3 */
+ 5862 "10010010" // /* MW 2 */
+ 5863 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5865 "00001001" // /* MW 7 */
+ 5866 "01101010" // /* MW 6 */
+ 5867 "10100011" // /* MW 5 */
+ 5868 "11000110" // /* MW 4 */
+ 5869 "00000111" // /* MW 3 */
+ 5870 "10010010" // /* MW 2 */
+ 5871 "00000001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+ 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5873 "00000000" // /* MW 3 */
+ 5874 "10001011" // /* MW 2 */
+ 5875 "00011111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+ 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5877 "00101001" // /* MW 7 */
+ 5878 "00101010" // /* MW 6 */
+ 5879 "10100001" // /* MW 5 */
+ 5880 "11100110" // /* MW 4 */
+ 5881 "10100000" // /* MW 3 */
+ 5882 "00001011" // /* MW 2 */
+ 5883 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+ 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5885 "00101001" // /* MW 7 */
+ 5886 "00000110" // /* MW 6 */
+ 5887 "10100000" // /* MW 5 */
+ 5888 "11100110" // /* MW 4 */
+ 5889 "10100000" // /* MW 3 */
+ 5890 "10001000" // /* MW 2 */
+ 5891 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+ 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5893 "00001001" // /* MW 9 */
+ 5894 "01101010" // /* MW 8 */
+ 5895 "10100011" // /* MW 7 */
+ 5896 "11100110" // /* MW 6 */
+ 5897 "00000000" // /* MW 5 */
+ 5898 "00000101" // /* MW 4 */
+ 5899 "00100011" // /* MW 3 */
+ 5900 "11110111" // /* MW 2 */
+ 5901 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32 first
+ 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5903 "00001001" // /* MW 11 */
+ 5904 "01000110" // /* MW 10 */
+ 5905 "10100010" // /* MW 9 */
+ 5906 "11100110" // /* MW 8 */
+ 5907 "10100000" // /* MW 7 */
+ 5908 "10000010" // /* MW 6 */
+ 5909 "00100101" // /* MW 5 */
+ 5910 "11010111" // /* MW 4 */
+ 5911 "01101110" // /* MW 3 */
+ 5912 "10001001" // /* MW 2 */
+ 5913 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+ 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5915 "01110000" // /* MW 7 */
+ 5916 "10000000" // /* MW 6 */
+ 5917 "11000101" // /* MW 5 */
+ 5918 "00000011" // /* MW 4 */
+ 5919 "01100000" // /* MW 3 */
+ 5920 "10001001" // /* MW 2 */
+ 5921 "01100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5923 "01110000" // /* MW 7 */
+ 5924 "00000000" // /* MW 6 */
+ 5925 "10000001" // /* MW 5 */
+ 5926 "00000001" // /* MW 4 */
+ 5927 "01100000" // /* MW 3 */
+ 5928 "01000001" // /* MW 2 */
+ 5929 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5931 "01110000" // /* MW 7 */
+ 5932 "01010000" // /* MW 6 */
+ 5933 "10000111" // /* MW 5 */
+ 5934 "00000000" // /* MW 4 */
+ 5935 "11000000" // /* MW 3 */
+ 5936 "00010010" // /* MW 2 */
+ 5937 "10110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5939 "01110000" // /* MW 7 */
+ 5940 "10010000" // /* MW 6 */
+ 5941 "11000111" // /* MW 5 */
+ 5942 "00000010" // /* MW 4 */
+ 5943 "11000000" // /* MW 3 */
+ 5944 "00000010" // /* MW 2 */
+ 5945 "10100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5947 "01110110" // /* MW 9 */
+ 5948 "01100000" // /* MW 8 */
+ 5949 "11001000" // /* MW 7 */
+ 5950 "00000001" // /* MW 6 */
+ 5951 "10010000" // /* MW 5 */
+ 5952 "00111011" // /* MW 4 */
+ 5953 "01100001" // /* MW 3 */
+ 5954 "10010001" // /* MW 2 */
+ 5955 "00010011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5957 "01110000" // /* MW 7 */
+ 5958 "00000000" // /* MW 6 */
+ 5959 "10000011" // /* MW 5 */
+ 5960 "00000000" // /* MW 4 */
+ 5961 "11000000" // /* MW 3 */
+ 5962 "00001010" // /* MW 2 */
+ 5963 "01100010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1218 20 first
+.src_ref 2 "conv2d_bf16.h" 1287 37 first
+ 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */
+ 5965 "01100000" // /* MW 11 */
+ 5966 "00000000" // /* MW 10 */
+ 5967 "00000000" // /* MW 9 */
+ 5968 "11111010" // /* MW 8 */
+ 5969 "00000010" // /* MW 7 */
+ 5970 "00100100" // /* MW 6 */
+ 5971 "00100000" // /* MW 5 */
+ 5972 "01010111" // /* MW 4 */
+ 5973 "11000000" // /* MW 3 */
+ 5974 "00100010" // /* MW 2 */
+ 5975 "01010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 738 8
+.delay_slot
+ 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5977 "01110000" // /* MW 7 */
+ 5978 "01100000" // /* MW 6 */
+ 5979 "10101001" // /* MW 5 */
+ 5980 "00000000" // /* MW 4 */
+ 5981 "11000000" // /* MW 3 */
+ 5982 "00011010" // /* MW 2 */
+ 5983 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5985 "01110000" // /* MW 7 */
+ 5986 "11000000" // /* MW 6 */
+ 5987 "10100111" // /* MW 5 */
+ 5988 "00000011" // /* MW 4 */
+ 5989 "11000000" // /* MW 3 */
+ 5990 "00110010" // /* MW 2 */
+ 5991 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5993 "01110110" // /* MW 9 */
+ 5994 "01100000" // /* MW 8 */
+ 5995 "10110101" // /* MW 7 */
+ 5996 "00000000" // /* MW 6 */
+ 5997 "10010000" // /* MW 5 */
+ 5998 "00101011" // /* MW 4 */
+ 5999 "11000101" // /* MW 3 */
+ 6000 "00111010" // /* MW 2 */
+ 6001 "00010010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.delay_slot
+ 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6003 "01110000" // /* MW 7 */
+ 6004 "10000000" // /* MW 6 */
+ 6005 "11000010" // /* MW 5 */
+ 6006 "00000010" // /* MW 4 */
+ 6007 "11000000" // /* MW 3 */
+ 6008 "00101010" // /* MW 2 */
+ 6009 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.delay_slot
+ 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6011 "01110000" // /* MW 7 */
+ 6012 "11000000" // /* MW 6 */
+ 6013 "01001101" // /* MW 5 */
+ 6014 "00000000" // /* MW 4 */
+ 6015 "01100000" // /* MW 3 */
+ 6016 "10001001" // /* MW 2 */
+ 6017 "11100001" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6019 "11101100" // /* MW 3 */
+ 6020 "11011100" // /* MW 2 */
+ 6021 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6023 "11101100" // /* MW 3 */
+ 6024 "10111100" // /* MW 2 */
+ 6025 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6027 "01110000" // /* MW 7 */
+ 6028 "01110110" // /* MW 6 */
+ 6029 "10101010" // /* MW 5 */
+ 6030 "00000010" // /* MW 4 */
+ 6031 "01100000" // /* MW 3 */
+ 6032 "01011010" // /* MW 2 */
+ 6033 "00111100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6035 "01110000" // /* MW 7 */
+ 6036 "01110110" // /* MW 6 */
+ 6037 "11011010" // /* MW 5 */
+ 6038 "00000001" // /* MW 4 */
+ 6039 "01100000" // /* MW 3 */
+ 6040 "10111010" // /* MW 2 */
+ 6041 "10100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */
+ 6043 "00100001" // /* MW 9 */
+ 6044 "00000000" // /* MW 8 */
+ 6045 "00000000" // /* MW 7 */
+ 6046 "11111110" // /* MW 6 */
+ 6047 "00000010" // /* MW 5 */
+ 6048 "00000000" // /* MW 4 */
+ 6049 "01100000" // /* MW 3 */
+ 6050 "11010010" // /* MW 2 */
+ 6051 "00100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6053 "01110000" // /* MW 7 */
+ 6054 "01110110" // /* MW 6 */
+ 6055 "10100010" // /* MW 5 */
+ 6056 "00000010" // /* MW 4 */
+ 6057 "01100000" // /* MW 3 */
+ 6058 "10111010" // /* MW 2 */
+ 6059 "00100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6061 "11101100" // /* MW 3 */
+ 6062 "10001100" // /* MW 2 */
+ 6063 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6065 "01110000" // /* MW 7 */
+ 6066 "01110110" // /* MW 6 */
+ 6067 "10010110" // /* MW 5 */
+ 6068 "00000010" // /* MW 4 */
+ 6069 "01100000" // /* MW 3 */
+ 6070 "11010010" // /* MW 2 */
+ 6071 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6073 "01110000" // /* MW 7 */
+ 6074 "01110110" // /* MW 6 */
+ 6075 "10001010" // /* MW 5 */
+ 6076 "00000000" // /* MW 4 */
+ 6077 "01100000" // /* MW 3 */
+ 6078 "10111010" // /* MW 2 */
+ 6079 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6081 "00000000" // /* MW 15 */
+ 6082 "00000000" // /* MW 14 */
+ 6083 "01111000" // /* MW 13 */
+ 6084 "10100101" // /* MW 12 */
+ 6085 "00000001" // /* MW 11 */
+ 6086 "00000000" // /* MW 10 */
+ 6087 "00000000" // /* MW 9 */
+ 6088 "00000000" // /* MW 8 */
+ 6089 "10010011" // /* MW 7 */
+ 6090 "10100010" // /* MW 6 */
+ 6091 "00100100" // /* MW 5 */
+ 6092 "00000000" // /* MW 4 */
+ 6093 "11110000" // /* MW 3 */
+ 6094 "00101100" // /* MW 2 */
+ 6095 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632
+.src_ref 4 "vector.hpp" 1152 43
+ 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6097 "10100011" // /* MW 3 */
+ 6098 "11100000" // /* MW 2 */
+ 6099 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6101 "11100011" // /* MW 3 */
+ 6102 "00010100" // /* MW 2 */
+ 6103 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6105 "00100011" // /* MW 3 */
+ 6106 "00000100" // /* MW 2 */
+ 6107 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6109 "01100011" // /* MW 3 */
+ 6110 "00010100" // /* MW 2 */
+ 6111 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6113 "00010011" // /* MW 3 */
+ 6114 "00000110" // /* MW 2 */
+ 6115 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6117 "11100011" // /* MW 3 */
+ 6118 "00010101" // /* MW 2 */
+ 6119 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6121 "01110000" // /* MW 7 */
+ 6122 "10100101" // /* MW 6 */
+ 6123 "00000001" // /* MW 5 */
+ 6124 "00000000" // /* MW 4 */
+ 6125 "01100000" // /* MW 3 */
+ 6126 "00100100" // /* MW 2 */
+ 6127 "10010100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1143 12 first
+ 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6129 "01011000" // /* MW 11 */
+ 6130 "00000000" // /* MW 10 */
+ 6131 "01000000" // /* MW 9 */
+ 6132 "00000001" // /* MW 8 */
+ 6133 "00110101" // /* MW 7 */
+ 6134 "00000110" // /* MW 6 */
+ 6135 "00100000" // /* MW 5 */
+ 6136 "01010111" // /* MW 4 */
+ 6137 "01101111" // /* MW 3 */
+ 6138 "10010010" // /* MW 2 */
+ 6139 "11100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.delay_slot
+ 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6141 "10000000" // /* MW 3 */
+ 6142 "01000100" // /* MW 2 */
+ 6143 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.delay_slot
+ 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6145 "10100000" // /* MW 3 */
+ 6146 "01001001" // /* MW 2 */
+ 6147 "00011010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.delay_slot
+ 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6149 "00000001" // /* MW 5 */
+ 6150 "00011110" // /* MW 4 */
+ 6151 "00000101" // /* MW 3 */
+ 6152 "01110010" // /* MW 2 */
+ 6153 "11101011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.delay_slot
+ 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6155 "10000000" // /* MW 3 */
+ 6156 "01001110" // /* MW 2 */
+ 6157 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6159 "00000000" // /* MW 1 */
+.loop_nesting 0
+ 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */
+ 6161 "00000000" // /* MW 5 */
+ 6162 "00000000" // /* MW 4 */
+ 6163 "01011000" // /* MW 3 */
+ 6164 "00001101" // /* MW 2 */
+ 6165 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6167 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6169 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6171 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6173 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6175 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 1364 80
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6177 "01111000" // /* MW 11 */
+ 6178 "10010000" // /* MW 10 */
+ 6179 "10110011" // /* MW 9 */
+ 6180 "00001000" // /* MW 8 */
+ 6181 "11100001" // /* MW 7 */
+ 6182 "00000100" // /* MW 6 */
+ 6183 "10001011" // /* MW 5 */
+ 6184 "00001100" // /* MW 4 */
+ 6185 "00100010" // /* MW 3 */
+ 6186 "01111110" // /* MW 2 */
+ 6187 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1369 80
+ 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6189 "01111000" // /* MW 11 */
+ 6190 "01000000" // /* MW 10 */
+ 6191 "01100010" // /* MW 9 */
+ 6192 "00000011" // /* MW 8 */
+ 6193 "11010100" // /* MW 7 */
+ 6194 "00011011" // /* MW 6 */
+ 6195 "00001011" // /* MW 5 */
+ 6196 "01010110" // /* MW 4 */
+ 6197 "10000010" // /* MW 3 */
+ 6198 "10010000" // /* MW 2 */
+ 6199 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 807 26
+.src_ref 2 "conv2d_bf16.h" 808 26
+.src_ref 2 "conv2d_bf16.h" 809 26
+.src_ref 2 "conv2d_bf16.h" 810 26
+.src_ref 2 "conv2d_bf16.h" 1436 26
+.src_ref 2 "conv2d_bf16.h" 1437 26
+.src_ref 2 "conv2d_bf16.h" 1438 26
+.src_ref 2 "conv2d_bf16.h" 1439 26
+ 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6201 "01111000" // /* MW 9 */
+ 6202 "11010000" // /* MW 8 */
+ 6203 "00000101" // /* MW 7 */
+ 6204 "10001001" // /* MW 6 */
+ 6205 "00110001" // /* MW 5 */
+ 6206 "00011001" // /* MW 4 */
+ 6207 "00000000" // /* MW 3 */
+ 6208 "10010100" // /* MW 2 */
+ 6209 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 802 83
+.src_ref 2 "conv2d_bf16.h" 1428 39
+ 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6211 "01111000" // /* MW 11 */
+ 6212 "10010000" // /* MW 10 */
+ 6213 "11000111" // /* MW 9 */
+ 6214 "11001010" // /* MW 8 */
+ 6215 "00100000" // /* MW 7 */
+ 6216 "00000001" // /* MW 6 */
+ 6217 "00001011" // /* MW 5 */
+ 6218 "01011100" // /* MW 4 */
+ 6219 "10000110" // /* MW 3 */
+ 6220 "10011000" // /* MW 2 */
+ 6221 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 794 8
+ 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6223 "01111000" // /* MW 11 */
+ 6224 "01010000" // /* MW 10 */
+ 6225 "10000111" // /* MW 9 */
+ 6226 "00001000" // /* MW 8 */
+ 6227 "10010000" // /* MW 7 */
+ 6228 "00000001" // /* MW 6 */
+ 6229 "00001011" // /* MW 5 */
+ 6230 "00000010" // /* MW 4 */
+ 6231 "00100101" // /* MW 3 */
+ 6232 "10000011" // /* MW 2 */
+ 6233 "11111010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 794 8
+.src_ref 2 "conv2d_bf16.h" 1455 20
+ 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6235 "01111000" // /* MW 9 */
+ 6236 "01010000" // /* MW 8 */
+ 6237 "01000101" // /* MW 7 */
+ 6238 "00001011" // /* MW 6 */
+ 6239 "10000000" // /* MW 5 */
+ 6240 "00000001" // /* MW 4 */
+ 6241 "00100000" // /* MW 3 */
+ 6242 "11010110" // /* MW 2 */
+ 6243 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 12
+ 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6245 "00010000" // /* MW 9 */
+ 6246 "01011000" // /* MW 8 */
+ 6247 "00110100" // /* MW 7 */
+ 6248 "00000101" // /* MW 6 */
+ 6249 "00000000" // /* MW 5 */
+ 6250 "00000000" // /* MW 4 */
+ 6251 "00100000" // /* MW 3 */
+ 6252 "00110110" // /* MW 2 */
+ 6253 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80 first
+.src_ref 2 "conv2d_bf16.h" 1873
+ 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6255 "01110010" // /* MW 5 */
+ 6256 "11011111" // /* MW 4 */
+ 6257 "00100110" // /* MW 3 */
+ 6258 "10000111" // /* MW 2 */
+ 6259 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6261 "11000110" // /* MW 3 */
+ 6262 "00011101" // /* MW 2 */
+ 6263 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 80 first
+ 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6265 "00000110" // /* MW 3 */
+ 6266 "10001010" // /* MW 2 */
+ 6267 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 799 87 first
+ 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6269 "10000110" // /* MW 3 */
+ 6270 "10011110" // /* MW 2 */
+ 6271 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 83 first
+ 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6273 "11010110" // /* MW 3 */
+ 6274 "00011110" // /* MW 2 */
+ 6275 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 83 first
+ 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6277 "11110110" // /* MW 3 */
+ 6278 "11001010" // /* MW 2 */
+ 6279 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 66 first
+ 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6281 "10110110" // /* MW 3 */
+ 6282 "00010111" // /* MW 2 */
+ 6283 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1443 71 first
+ 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6285 "10010110" // /* MW 3 */
+ 6286 "00000111" // /* MW 2 */
+ 6287 "00000011" // /* MW 1 */
+ 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6289 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1369 89
+ 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6291 "00000000" // /* MW 3 */
+ 6292 "10011000" // /* MW 2 */
+ 6293 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+.src_ref 2 "conv2d_bf16.h" 1518 37
+ 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6295 "00000000" // /* MW 3 */
+ 6296 "00000111" // /* MW 2 */
+ 6297 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+ 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6299 "00000000" // /* MW 3 */
+ 6300 "11011100" // /* MW 2 */
+ 6301 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89 first
+ 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6303 "11100000" // /* MW 3 */
+ 6304 "00001111" // /* MW 2 */
+ 6305 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 89 first
+ 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6307 "11000000" // /* MW 5 */
+ 6308 "00011110" // /* MW 4 */
+ 6309 "11101110" // /* MW 3 */
+ 6310 "01111111" // /* MW 2 */
+ 6311 "11101111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+ 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6313 "01110000" // /* MW 7 */
+ 6314 "10010000" // /* MW 6 */
+ 6315 "11000111" // /* MW 5 */
+ 6316 "00000011" // /* MW 4 */
+ 6317 "01100000" // /* MW 3 */
+ 6318 "00101011" // /* MW 2 */
+ 6319 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1362 31 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+.loop_nesting 1
+ 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6321 "01100000" // /* MW 13 */
+ 6322 "10000001" // /* MW 12 */
+ 6323 "01110001" // /* MW 11 */
+ 6324 "00000010" // /* MW 10 */
+ 6325 "10010110" // /* MW 9 */
+ 6326 "10001111" // /* MW 8 */
+ 6327 "00000000" // /* MW 7 */
+ 6328 "00000000" // /* MW 6 */
+ 6329 "00101000" // /* MW 5 */
+ 6330 "00101000" // /* MW 4 */
+ 6331 "01111010" // /* MW 3 */
+ 6332 "10000101" // /* MW 2 */
+ 6333 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1364 31 first
+.src_ref 2 "conv2d_bf16.h" 1443 16
+ 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6335 "00010000" // /* MW 11 */
+ 6336 "11001000" // /* MW 10 */
+ 6337 "10111100" // /* MW 9 */
+ 6338 "00000101" // /* MW 8 */
+ 6339 "00000000" // /* MW 7 */
+ 6340 "00000000" // /* MW 6 */
+ 6341 "00101000" // /* MW 5 */
+ 6342 "00101000" // /* MW 4 */
+ 6343 "01111010" // /* MW 3 */
+ 6344 "00001101" // /* MW 2 */
+ 6345 "11001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1428 39 first
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+ 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6347 "01001000" // /* MW 11 */
+ 6348 "00111111" // /* MW 10 */
+ 6349 "10111111" // /* MW 9 */
+ 6350 "01101110" // /* MW 8 */
+ 6351 "11101001" // /* MW 7 */
+ 6352 "00000101" // /* MW 6 */
+ 6353 "00101000" // /* MW 5 */
+ 6354 "00000101" // /* MW 4 */
+ 6355 "01110110" // /* MW 3 */
+ 6356 "10000001" // /* MW 2 */
+ 6357 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6359 "01111110" // /* MW 9 */
+ 6360 "10010000" // /* MW 8 */
+ 6361 "01000111" // /* MW 7 */
+ 6362 "00000001" // /* MW 6 */
+ 6363 "00010100" // /* MW 5 */
+ 6364 "00000001" // /* MW 4 */
+ 6365 "01110011" // /* MW 3 */
+ 6366 "01011001" // /* MW 2 */
+ 6367 "01010101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1367 31 first
+ 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6369 "00101000" // /* MW 5 */
+ 6370 "00000001" // /* MW 4 */
+ 6371 "01110110" // /* MW 3 */
+ 6372 "10010101" // /* MW 2 */
+ 6373 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1369 31 first
+ 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6375 "10101000" // /* MW 5 */
+ 6376 "00100001" // /* MW 4 */
+ 6377 "01111010" // /* MW 3 */
+ 6378 "00011101" // /* MW 2 */
+ 6379 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1372 31 first
+ 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6381 "00101000" // /* MW 5 */
+ 6382 "00101000" // /* MW 4 */
+ 6383 "01111010" // /* MW 3 */
+ 6384 "10100101" // /* MW 2 */
+ 6385 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1374 31 first
+ 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6387 "00101000" // /* MW 5 */
+ 6388 "00101000" // /* MW 4 */
+ 6389 "01111010" // /* MW 3 */
+ 6390 "00101101" // /* MW 2 */
+ 6391 "11001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1377 31 first
+ 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6393 "10101000" // /* MW 5 */
+ 6394 "00000000" // /* MW 4 */
+ 6395 "01110110" // /* MW 3 */
+ 6396 "10110101" // /* MW 2 */
+ 6397 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1379 31 first
+ 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6399 "00101000" // /* MW 5 */
+ 6400 "00000011" // /* MW 4 */
+ 6401 "01110110" // /* MW 3 */
+ 6402 "00111101" // /* MW 2 */
+ 6403 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50 first
+ 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6405 "10101000" // /* MW 5 */
+ 6406 "00000011" // /* MW 4 */
+ 6407 "01110110" // /* MW 3 */
+ 6408 "01000101" // /* MW 2 */
+ 6409 "01101000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6411 "11101110" // /* MW 9 */
+ 6412 "00101101" // /* MW 8 */
+ 6413 "01101001" // /* MW 7 */
+ 6414 "00000001" // /* MW 6 */
+ 6415 "00010100" // /* MW 5 */
+ 6416 "00010010" // /* MW 4 */
+ 6417 "01110101" // /* MW 3 */
+ 6418 "01001101" // /* MW 2 */
+ 6419 "01101000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6421 "11101110" // /* MW 9 */
+ 6422 "00101111" // /* MW 8 */
+ 6423 "10101001" // /* MW 7 */
+ 6424 "00000010" // /* MW 6 */
+ 6425 "00010100" // /* MW 5 */
+ 6426 "00010100" // /* MW 4 */
+ 6427 "01110101" // /* MW 3 */
+ 6428 "10000001" // /* MW 2 */
+ 6429 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6431 "01101001" // /* MW 11 */
+ 6432 "00001011" // /* MW 10 */
+ 6433 "01001000" // /* MW 9 */
+ 6434 "11000010" // /* MW 8 */
+ 6435 "11011011" // /* MW 7 */
+ 6436 "00010001" // /* MW 6 */
+ 6437 "00101010" // /* MW 5 */
+ 6438 "00101000" // /* MW 4 */
+ 6439 "01111010" // /* MW 3 */
+ 6440 "00000001" // /* MW 2 */
+ 6441 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6443 "01101001" // /* MW 9 */
+ 6444 "00110101" // /* MW 8 */
+ 6445 "01001001" // /* MW 7 */
+ 6446 "11000010" // /* MW 6 */
+ 6447 "11011111" // /* MW 5 */
+ 6448 "00010001" // /* MW 4 */
+ 6449 "01110101" // /* MW 3 */
+ 6450 "10000001" // /* MW 2 */
+ 6451 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6453 "01101001" // /* MW 3 */
+ 6454 "01001001" // /* MW 2 */
+ 6455 "01001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6457 "01101001" // /* MW 3 */
+ 6458 "01110101" // /* MW 2 */
+ 6459 "01001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.src_ref 2 "conv2d_bf16.h" 1437 26 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6461 "00111101" // /* MW 9 */
+ 6462 "10000100" // /* MW 8 */
+ 6463 "10100001" // /* MW 7 */
+ 6464 "11000110" // /* MW 6 */
+ 6465 "01011111" // /* MW 5 */
+ 6466 "10001011" // /* MW 4 */
+ 6467 "10101010" // /* MW 3 */
+ 6468 "00000000" // /* MW 2 */
+ 6469 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1436 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6471 "00111101" // /* MW 7 */
+ 6472 "10000000" // /* MW 6 */
+ 6473 "10100000" // /* MW 5 */
+ 6474 "00000000" // /* MW 4 */
+ 6475 "10010100" // /* MW 3 */
+ 6476 "00000001" // /* MW 2 */
+ 6477 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1438 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6479 "00111101" // /* MW 7 */
+ 6480 "10001000" // /* MW 6 */
+ 6481 "10100010" // /* MW 5 */
+ 6482 "00000000" // /* MW 4 */
+ 6483 "11010100" // /* MW 3 */
+ 6484 "00000001" // /* MW 2 */
+ 6485 "00000011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1439 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6487 "00111101" // /* MW 9 */
+ 6488 "10001100" // /* MW 8 */
+ 6489 "10100011" // /* MW 7 */
+ 6490 "00011101" // /* MW 6 */
+ 6491 "00010100" // /* MW 5 */
+ 6492 "00010010" // /* MW 4 */
+ 6493 "01110101" // /* MW 3 */
+ 6494 "00000001" // /* MW 2 */
+ 6495 "01010101" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.begin_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6497 "10110111" // /* MW 5 */
+ 6498 "00010110" // /* MW 4 */
+ 6499 "10000010" // /* MW 3 */
+ 6500 "10000010" // /* MW 2 */
+ 6501 "10100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6503 "00001001" // /* MW 9 */
+ 6504 "00101010" // /* MW 8 */
+ 6505 "10011001" // /* MW 7 */
+ 6506 "11000110" // /* MW 6 */
+ 6507 "01011111" // /* MW 5 */
+ 6508 "00111100" // /* MW 4 */
+ 6509 "00101010" // /* MW 3 */
+ 6510 "00101000" // /* MW 2 */
+ 6511 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6513 "00001001" // /* MW 9 */
+ 6514 "00000100" // /* MW 8 */
+ 6515 "10011000" // /* MW 7 */
+ 6516 "11000110" // /* MW 6 */
+ 6517 "01011011" // /* MW 5 */
+ 6518 "10111100" // /* MW 4 */
+ 6519 "10101001" // /* MW 3 */
+ 6520 "00000000" // /* MW 2 */
+ 6521 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6523 "00001001" // /* MW 7 */
+ 6524 "01101000" // /* MW 6 */
+ 6525 "10011011" // /* MW 5 */
+ 6526 "00000000" // /* MW 4 */
+ 6527 "10010100" // /* MW 3 */
+ 6528 "00000001" // /* MW 2 */
+ 6529 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6531 "00001001" // /* MW 13 */
+ 6532 "01000110" // /* MW 12 */
+ 6533 "10011010" // /* MW 11 */
+ 6534 "01101100" // /* MW 10 */
+ 6535 "00000101" // /* MW 9 */
+ 6536 "00000000" // /* MW 8 */
+ 6537 "00000000" // /* MW 7 */
+ 6538 "00000000" // /* MW 6 */
+ 6539 "10101000" // /* MW 5 */
+ 6540 "00000011" // /* MW 4 */
+ 6541 "01110110" // /* MW 3 */
+ 6542 "10000001" // /* MW 2 */
+ 6543 "00000010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.end_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6545 "00000000" // /* MW 15 */
+ 6546 "00000000" // /* MW 14 */
+ 6547 "11101000" // /* MW 13 */
+ 6548 "10101111" // /* MW 12 */
+ 6549 "01000101" // /* MW 11 */
+ 6550 "00000001" // /* MW 10 */
+ 6551 "00000000" // /* MW 9 */
+ 6552 "00000000" // /* MW 8 */
+ 6553 "01011011" // /* MW 7 */
+ 6554 "00000001" // /* MW 6 */
+ 6555 "00101000" // /* MW 5 */
+ 6556 "00100100" // /* MW 4 */
+ 6557 "01111010" // /* MW 3 */
+ 6558 "00000001" // /* MW 2 */
+ 6559 "01010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6561 "11100000" // /* MW 11 */
+ 6562 "10101101" // /* MW 10 */
+ 6563 "10000101" // /* MW 9 */
+ 6564 "00000000" // /* MW 8 */
+ 6565 "10001011" // /* MW 7 */
+ 6566 "10011100" // /* MW 6 */
+ 6567 "00100101" // /* MW 5 */
+ 6568 "10010111" // /* MW 4 */
+ 6569 "11111111" // /* MW 3 */
+ 6570 "00001100" // /* MW 2 */
+ 6571 "00000111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.src_ref 2 "conv2d_bf16.h" 1517 32 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6573 "00001001" // /* MW 11 */
+ 6574 "00101010" // /* MW 10 */
+ 6575 "10011001" // /* MW 9 */
+ 6576 "11000110" // /* MW 8 */
+ 6577 "01011111" // /* MW 7 */
+ 6578 "00111100" // /* MW 6 */
+ 6579 "00100010" // /* MW 5 */
+ 6580 "00010111" // /* MW 4 */
+ 6581 "01101111" // /* MW 3 */
+ 6582 "10010001" // /* MW 2 */
+ 6583 "10010011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.src_ref 2 "conv2d_bf16.h" 1518 37 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6585 "00001001" // /* MW 11 */
+ 6586 "00000100" // /* MW 10 */
+ 6587 "10011000" // /* MW 9 */
+ 6588 "11000110" // /* MW 8 */
+ 6589 "01011011" // /* MW 7 */
+ 6590 "10111100" // /* MW 6 */
+ 6591 "00100001" // /* MW 5 */
+ 6592 "10010111" // /* MW 4 */
+ 6593 "01101111" // /* MW 3 */
+ 6594 "10010001" // /* MW 2 */
+ 6595 "01110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6597 "00001001" // /* MW 7 */
+ 6598 "01101000" // /* MW 6 */
+ 6599 "10011011" // /* MW 5 */
+ 6600 "11100110" // /* MW 4 */
+ 6601 "10100000" // /* MW 3 */
+ 6602 "10001000" // /* MW 2 */
+ 6603 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.src_ref 2 "conv2d_bf16.h" 1428 39
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6605 "00001001" // /* MW 9 */
+ 6606 "01000110" // /* MW 8 */
+ 6607 "10011010" // /* MW 7 */
+ 6608 "11100110" // /* MW 6 */
+ 6609 "10000000" // /* MW 5 */
+ 6610 "10011011" // /* MW 4 */
+ 6611 "00100000" // /* MW 3 */
+ 6612 "10110111" // /* MW 2 */
+ 6613 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+ 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6615 "01011011" // /* MW 3 */
+ 6616 "00001011" // /* MW 2 */
+ 6617 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6619 "01011111" // /* MW 3 */
+ 6620 "10001011" // /* MW 2 */
+ 6621 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6623 "00001001" // /* MW 7 */
+ 6624 "00000100" // /* MW 6 */
+ 6625 "10011000" // /* MW 5 */
+ 6626 "11000110" // /* MW 4 */
+ 6627 "01011011" // /* MW 3 */
+ 6628 "10111100" // /* MW 2 */
+ 6629 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6631 "00001001" // /* MW 7 */
+ 6632 "00101010" // /* MW 6 */
+ 6633 "10011001" // /* MW 5 */
+ 6634 "11000110" // /* MW 4 */
+ 6635 "01011111" // /* MW 3 */
+ 6636 "00111100" // /* MW 2 */
+ 6637 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6639 "00001001" // /* MW 3 */
+ 6640 "01000110" // /* MW 2 */
+ 6641 "10011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+ 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6643 "00001001" // /* MW 3 */
+ 6644 "01101000" // /* MW 2 */
+ 6645 "10011011" // /* MW 1 */
+ 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6647 "00000000" // /* MW 1 */
+ 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6649 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6651 "00010110" // /* MW 3 */
+ 6652 "00010000" // /* MW 2 */
+ 6653 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6655 "10010110" // /* MW 3 */
+ 6656 "10010000" // /* MW 2 */
+ 6657 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1455 20 first
+ 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */
+ 6659 "01100001" // /* MW 9 */
+ 6660 "00000000" // /* MW 8 */
+ 6661 "00000000" // /* MW 7 */
+ 6662 "01001110" // /* MW 6 */
+ 6663 "00000011" // /* MW 5 */
+ 6664 "00101010" // /* MW 4 */
+ 6665 "11000000" // /* MW 3 */
+ 6666 "00011010" // /* MW 2 */
+ 6667 "00010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.delay_slot
+ 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6669 "01010110" // /* MW 3 */
+ 6670 "00010000" // /* MW 2 */
+ 6671 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6673 "10010110" // /* MW 3 */
+ 6674 "00010001" // /* MW 2 */
+ 6675 "00001001" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6677 "11010110" // /* MW 3 */
+ 6678 "10010001" // /* MW 2 */
+ 6679 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6681 "00010110" // /* MW 3 */
+ 6682 "10010001" // /* MW 2 */
+ 6683 "00001010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6685 "01010110" // /* MW 3 */
+ 6686 "00010001" // /* MW 2 */
+ 6687 "00001100" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6689 "11101100" // /* MW 3 */
+ 6690 "11011100" // /* MW 2 */
+ 6691 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6693 "11101100" // /* MW 3 */
+ 6694 "10001100" // /* MW 2 */
+ 6695 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6697 "01110000" // /* MW 7 */
+ 6698 "01110110" // /* MW 6 */
+ 6699 "10101010" // /* MW 5 */
+ 6700 "00000010" // /* MW 4 */
+ 6701 "01100000" // /* MW 3 */
+ 6702 "01011010" // /* MW 2 */
+ 6703 "10101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6705 "01110000" // /* MW 7 */
+ 6706 "01110110" // /* MW 6 */
+ 6707 "01011010" // /* MW 5 */
+ 6708 "00000000" // /* MW 4 */
+ 6709 "01100000" // /* MW 3 */
+ 6710 "10001010" // /* MW 2 */
+ 6711 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */
+ 6713 "00100001" // /* MW 9 */
+ 6714 "00000000" // /* MW 8 */
+ 6715 "00000000" // /* MW 7 */
+ 6716 "01010010" // /* MW 6 */
+ 6717 "00000011" // /* MW 5 */
+ 6718 "00000000" // /* MW 4 */
+ 6719 "01100000" // /* MW 3 */
+ 6720 "11010010" // /* MW 2 */
+ 6721 "10100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6723 "01110000" // /* MW 7 */
+ 6724 "01110110" // /* MW 6 */
+ 6725 "10001010" // /* MW 5 */
+ 6726 "00000010" // /* MW 4 */
+ 6727 "01100000" // /* MW 3 */
+ 6728 "10001010" // /* MW 2 */
+ 6729 "10100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6731 "11101100" // /* MW 3 */
+ 6732 "10111100" // /* MW 2 */
+ 6733 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6735 "01110000" // /* MW 7 */
+ 6736 "01110110" // /* MW 6 */
+ 6737 "10010110" // /* MW 5 */
+ 6738 "00000010" // /* MW 4 */
+ 6739 "01100000" // /* MW 3 */
+ 6740 "01010010" // /* MW 2 */
+ 6741 "01101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6743 "01110010" // /* MW 9 */
+ 6744 "01110110" // /* MW 8 */
+ 6745 "00100010" // /* MW 7 */
+ 6746 "00000010" // /* MW 6 */
+ 6747 "01010011" // /* MW 5 */
+ 6748 "00010100" // /* MW 4 */
+ 6749 "11110111" // /* MW 3 */
+ 6750 "00101100" // /* MW 2 */
+ 6751 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6753 "00000000" // /* MW 15 */
+ 6754 "00000000" // /* MW 14 */
+ 6755 "01111000" // /* MW 13 */
+ 6756 "10100101" // /* MW 12 */
+ 6757 "00000001" // /* MW 11 */
+ 6758 "00000000" // /* MW 10 */
+ 6759 "00000000" // /* MW 9 */
+ 6760 "00000000" // /* MW 8 */
+ 6761 "10010011" // /* MW 7 */
+ 6762 "11100010" // /* MW 6 */
+ 6763 "00100100" // /* MW 5 */
+ 6764 "00000000" // /* MW 4 */
+ 6765 "11110000" // /* MW 3 */
+ 6766 "00101100" // /* MW 2 */
+ 6767 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304
+.src_ref 4 "vector.hpp" 1152 43
+ 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6769 "10100011" // /* MW 3 */
+ 6770 "01100000" // /* MW 2 */
+ 6771 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6773 "11100011" // /* MW 3 */
+ 6774 "00010100" // /* MW 2 */
+ 6775 "00001100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6777 "00100011" // /* MW 3 */
+ 6778 "00000100" // /* MW 2 */
+ 6779 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6781 "01100011" // /* MW 3 */
+ 6782 "00010100" // /* MW 2 */
+ 6783 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6785 "10100011" // /* MW 3 */
+ 6786 "01100001" // /* MW 2 */
+ 6787 "00001011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6789 "11100011" // /* MW 3 */
+ 6790 "00010101" // /* MW 2 */
+ 6791 "00001111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6793 "01110000" // /* MW 7 */
+ 6794 "10100101" // /* MW 6 */
+ 6795 "00000001" // /* MW 5 */
+ 6796 "00000000" // /* MW 4 */
+ 6797 "01100000" // /* MW 3 */
+ 6798 "00100100" // /* MW 2 */
+ 6799 "10011100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1337 12 first
+ 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6801 "01000000" // /* MW 5 */
+ 6802 "11110101" // /* MW 4 */
+ 6803 "01101110" // /* MW 3 */
+ 6804 "11000010" // /* MW 2 */
+ 6805 "01100010" // /* MW 1 */
+.delay_slot
+ 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6807 "10010000" // /* MW 3 */
+ 6808 "10001011" // /* MW 2 */
+ 6809 "00111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6811 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6813 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6815 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6817 "00000000" // /* MW 15 */
+ 6818 "00000000" // /* MW 14 */
+ 6819 "01111000" // /* MW 13 */
+ 6820 "10100101" // /* MW 12 */
+ 6821 "00000001" // /* MW 11 */
+ 6822 "00000000" // /* MW 10 */
+ 6823 "00000000" // /* MW 9 */
+ 6824 "00000000" // /* MW 8 */
+ 6825 "01011011" // /* MW 7 */
+ 6826 "00000001" // /* MW 6 */
+ 6827 "00100000" // /* MW 5 */
+ 6828 "00000000" // /* MW 4 */
+ 6829 "11110000" // /* MW 3 */
+ 6830 "00101100" // /* MW 2 */
+ 6831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368
+.loop_nesting 0
+ 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6833 "11110001" // /* MW 3 */
+ 6834 "11101101" // /* MW 2 */
+ 6835 "00000111" // /* MW 1 */
+ 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6837 "10010001" // /* MW 3 */
+ 6838 "11110001" // /* MW 2 */
+ 6839 "00000111" // /* MW 1 */
+ 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6841 "00110001" // /* MW 3 */
+ 6842 "11110101" // /* MW 2 */
+ 6843 "00000111" // /* MW 1 */
+ 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6845 "00011001" // /* MW 3 */
+ 6846 "11101011" // /* MW 2 */
+ 6847 "00000111" // /* MW 1 */
+ 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6849 "10011001" // /* MW 3 */
+ 6850 "11111011" // /* MW 2 */
+ 6851 "00000111" // /* MW 1 */
+ 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6853 "11010001" // /* MW 3 */
+ 6854 "11111101" // /* MW 2 */
+ 6855 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873 first
+ 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 6857 "00000000" // /* MW 3 */
+ 6858 "00101000" // /* MW 2 */
+ 6859 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873
+.delay_slot
+ 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6861 "00000001" // /* MW 5 */
+ 6862 "00000000" // /* MW 4 */
+ 6863 "00000000" // /* MW 3 */
+ 6864 "11110000" // /* MW 2 */
+ 6865 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6869 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6871 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0
+ 6873 "00000000" // /* MW 1 */
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 74 first
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 81 4
+.function_start
+ 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6881 "00010000" // /* MW 9 */
+ 6882 "00100000" // /* MW 8 */
+ 6883 "00110010" // /* MW 7 */
+ 6884 "11110010" // /* MW 6 */
+ 6885 "00000001" // /* MW 5 */
+ 6886 "00000000" // /* MW 4 */
+ 6887 "00000000" // /* MW 3 */
+ 6888 "00100000" // /* MW 2 */
+ 6889 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6 first
+.src_ref 7 "superkernels.cpp" 81 4
+ 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6891 "01111000" // /* MW 9 */
+ 6892 "11010000" // /* MW 8 */
+ 6893 "01001011" // /* MW 7 */
+ 6894 "00001000" // /* MW 6 */
+ 6895 "00010000" // /* MW 5 */
+ 6896 "00000000" // /* MW 4 */
+ 6897 "11010000" // /* MW 3 */
+ 6898 "11000010" // /* MW 2 */
+ 6899 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 74
+ 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6901 "00000001" // /* MW 5 */
+ 6902 "00000000" // /* MW 4 */
+ 6903 "00000000" // /* MW 3 */
+ 6904 "00001000" // /* MW 2 */
+ 6905 "00000000" // /* MW 1 */
+ 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6907 "01010101" // /* MW 3 */
+ 6908 "11110000" // /* MW 2 */
+ 6909 "00001111" // /* MW 1 */
+ 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6911 "00000000" // /* MW 1 */
+ 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6913 "00000000" // /* MW 1 */
+ 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6915 "00000000" // /* MW 1 */
+ 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6917 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 79 16
+ 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */
+ 6919 "00000001" // /* MW 5 */
+ 6920 "01000000" // /* MW 4 */
+ 6921 "11011000" // /* MW 3 */
+ 6922 "00001101" // /* MW 2 */
+ 6923 "10000000" // /* MW 1 */
+.delay_slot
+ 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6925 "10011101" // /* MW 3 */
+ 6926 "11111011" // /* MW 2 */
+ 6927 "00001111" // /* MW 1 */
+.delay_slot
+ 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6929 "00011101" // /* MW 3 */
+ 6930 "11111111" // /* MW 2 */
+ 6931 "00001111" // /* MW 1 */
+.delay_slot
+ 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6933 "10011101" // /* MW 3 */
+ 6934 "11101101" // /* MW 2 */
+ 6935 "00001111" // /* MW 1 */
+.delay_slot
+ 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6937 "00111101" // /* MW 3 */
+ 6938 "11110100" // /* MW 2 */
+ 6939 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6940 "01000100" // MOVXM r15, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6941 "10000000" // /* MW 5 */
+ 6942 "10101011" // /* MW 4 */
+ 6943 "11000111" // /* MW 3 */
+ 6944 "00000111" // /* MW 2 */
+ 6945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6947 "00010001" // /* MW 9 */
+ 6948 "00110100" // /* MW 8 */
+ 6949 "10110010" // /* MW 7 */
+ 6950 "11110011" // /* MW 6 */
+ 6951 "00000001" // /* MW 5 */
+ 6952 "00000000" // /* MW 4 */
+ 6953 "01100000" // /* MW 3 */
+ 6954 "10010001" // /* MW 2 */
+ 6955 "11010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6957 "00010000" // /* MW 11 */
+ 6958 "00110010" // /* MW 10 */
+ 6959 "10110010" // /* MW 9 */
+ 6960 "11110011" // /* MW 8 */
+ 6961 "00000001" // /* MW 7 */
+ 6962 "00000000" // /* MW 6 */
+ 6963 "00001011" // /* MW 5 */
+ 6964 "10001111" // /* MW 4 */
+ 6965 "11100001" // /* MW 3 */
+ 6966 "11000000" // /* MW 2 */
+ 6967 "11100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6969 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6971 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */
+ 6973 "00000001" // /* MW 5 */
+ 6974 "00000000" // /* MW 4 */
+ 6975 "01100000" // /* MW 3 */
+ 6976 "00000101" // /* MW 2 */
+ 6977 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6979 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6981 "00110001" // /* MW 3 */
+ 6982 "00100000" // /* MW 2 */
+ 6983 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6985 "00000101" // /* MW 3 */
+ 6986 "00100000" // /* MW 2 */
+ 6987 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6989 "01110000" // /* MW 7 */
+ 6990 "01100000" // /* MW 6 */
+ 6991 "10110000" // /* MW 5 */
+ 6992 "00000011" // /* MW 4 */
+ 6993 "00110000" // /* MW 3 */
+ 6994 "11000010" // /* MW 2 */
+ 6995 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6997 "01110000" // /* MW 11 */
+ 6998 "01100000" // /* MW 10 */
+ 6999 "00110010" // /* MW 9 */
+ 7000 "00000000" // /* MW 8 */
+ 7001 "01011011" // /* MW 7 */
+ 7002 "00000001" // /* MW 6 */
+ 7003 "00100000" // /* MW 5 */
+ 7004 "00000000" // /* MW 4 */
+ 7005 "11110000" // /* MW 3 */
+ 7006 "00101100" // /* MW 2 */
+ 7007 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.return_address
+ 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7009 "10000101" // /* MW 3 */
+ 7010 "01100111" // /* MW 2 */
+ 7011 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19
+.src_ref 7 "superkernels.cpp" 87 35 first
+ 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7013 "00010000" // /* MW 9 */
+ 7014 "00100010" // /* MW 8 */
+ 7015 "10110010" // /* MW 7 */
+ 7016 "11110000" // /* MW 6 */
+ 7017 "00000001" // /* MW 5 */
+ 7018 "00000000" // /* MW 4 */
+ 7019 "01010000" // /* MW 3 */
+ 7020 "11000001" // /* MW 2 */
+ 7021 "01001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 37 first
+.src_ref 7 "superkernels.cpp" 89 13
+ 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7023 "00010000" // /* MW 9 */
+ 7024 "00110000" // /* MW 8 */
+ 7025 "00110010" // /* MW 7 */
+ 7026 "11110000" // /* MW 6 */
+ 7027 "00000001" // /* MW 5 */
+ 7028 "00000000" // /* MW 4 */
+ 7029 "01010000" // /* MW 3 */
+ 7030 "11001111" // /* MW 2 */
+ 7031 "01000011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 73
+ 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7033 "00111010" // /* MW 3 */
+ 7034 "00000110" // /* MW 2 */
+ 7035 "00000010" // /* MW 1 */
+ 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7037 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 110
+ 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7039 "01011010" // /* MW 3 */
+ 7040 "00010110" // /* MW 2 */
+ 7041 "00000010" // /* MW 1 */
+ 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7043 "00000000" // /* MW 1 */
+ 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7045 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19 first
+.src_ref 7 "superkernels.cpp" 113 2
+ 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7047 "01110000" // /* MW 7 */
+ 7048 "01100000" // /* MW 6 */
+ 7049 "10110110" // /* MW 5 */
+ 7050 "00000000" // /* MW 4 */
+ 7051 "00110000" // /* MW 3 */
+ 7052 "11000010" // /* MW 2 */
+ 7053 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 57 first
+ 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7055 "00001111" // /* MW 3 */
+ 7056 "11100001" // /* MW 2 */
+ 7057 "00010100" // /* MW 1 */
+ 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7059 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 94
+ 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7061 "00001111" // /* MW 3 */
+ 7062 "01100001" // /* MW 2 */
+ 7063 "00010100" // /* MW 1 */
+ 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7065 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 28 first
+ 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7067 "00001111" // /* MW 3 */
+ 7068 "10100001" // /* MW 2 */
+ 7069 "00010100" // /* MW 1 */
+ 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7071 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 13
+.src_ref 7 "superkernels.cpp" 113 2
+ 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7073 "00000000" // /* MW 15 */
+ 7074 "00000000" // /* MW 14 */
+ 7075 "01111000" // /* MW 13 */
+ 7076 "01100000" // /* MW 12 */
+ 7077 "00110111" // /* MW 11 */
+ 7078 "00000000" // /* MW 10 */
+ 7079 "00000000" // /* MW 9 */
+ 7080 "10000000" // /* MW 8 */
+ 7081 "00010001" // /* MW 7 */
+ 7082 "00000110" // /* MW 6 */
+ 7083 "00100000" // /* MW 5 */
+ 7084 "00000000" // /* MW 4 */
+ 7085 "11110000" // /* MW 3 */
+ 7086 "00101100" // /* MW 2 */
+ 7087 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 106 12
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 7 "superkernels.cpp" 117 6
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7089 "00010000" // /* MW 9 */
+ 7090 "00100100" // /* MW 8 */
+ 7091 "00110010" // /* MW 7 */
+ 7092 "11110011" // /* MW 6 */
+ 7093 "00000001" // /* MW 5 */
+ 7094 "00000000" // /* MW 4 */
+ 7095 "00100000" // /* MW 3 */
+ 7096 "10111110" // /* MW 2 */
+ 7097 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.src_ref 7 "superkernels.cpp" 108 13
+ 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7099 "00010000" // /* MW 9 */
+ 7100 "00100110" // /* MW 8 */
+ 7101 "00110010" // /* MW 7 */
+ 7102 "11110001" // /* MW 6 */
+ 7103 "00000001" // /* MW 5 */
+ 7104 "00000000" // /* MW 4 */
+ 7105 "11010000" // /* MW 3 */
+ 7106 "11000010" // /* MW 2 */
+ 7107 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11
+.src_ref 7 "superkernels.cpp" 108 13 first
+.src_ref 7 "superkernels.cpp" 139 6
+.src_ref 7 "superkernels.cpp" 140 14
+ 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7109 "00010000" // /* MW 9 */
+ 7110 "00100000" // /* MW 8 */
+ 7111 "10110010" // /* MW 7 */
+ 7112 "11110011" // /* MW 6 */
+ 7113 "00000001" // /* MW 5 */
+ 7114 "00000000" // /* MW 4 */
+ 7115 "11010000" // /* MW 3 */
+ 7116 "11000110" // /* MW 2 */
+ 7117 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+ 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7119 "01010110" // /* MW 3 */
+ 7120 "00000110" // /* MW 2 */
+ 7121 "00000111" // /* MW 1 */
+ 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7123 "00000000" // /* MW 1 */
+ 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7125 "00000000" // /* MW 1 */
+ 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7127 "00000000" // /* MW 1 */
+ 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7129 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 110 6 first
+.src_ref 7 "superkernels.cpp" 110 17 first
+ 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */
+ 7131 "00000001" // /* MW 5 */
+ 7132 "01000000" // /* MW 4 */
+ 7133 "00011000" // /* MW 3 */
+ 7134 "00001110" // /* MW 2 */
+ 7135 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 108 13 first
+.delay_slot
+ 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7137 "00000111" // /* MW 3 */
+ 7138 "01100010" // /* MW 2 */
+ 7139 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.src_ref 7 "superkernels.cpp" 108 13
+.delay_slot
+ 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7141 "00001110" // /* MW 5 */
+ 7142 "01000100" // /* MW 4 */
+ 7143 "00111001" // /* MW 3 */
+ 7144 "11000110" // /* MW 2 */
+ 7145 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.delay_slot
+ 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7147 "00000111" // /* MW 3 */
+ 7148 "00100110" // /* MW 2 */
+ 7149 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12
+.delay_slot
+ 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7151 "01110001" // /* MW 3 */
+ 7152 "00000110" // /* MW 2 */
+ 7153 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.delay_slot
+ 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7155 "00110001" // /* MW 3 */
+ 7156 "00000110" // /* MW 2 */
+ 7157 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7159 "10000110" // /* MW 3 */
+ 7160 "01100111" // /* MW 2 */
+ 7161 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7163 "01110110" // /* MW 3 */
+ 7164 "11111111" // /* MW 2 */
+ 7165 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7167 "00010110" // /* MW 3 */
+ 7168 "11111110" // /* MW 2 */
+ 7169 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7171 "00110110" // /* MW 3 */
+ 7172 "11111110" // /* MW 2 */
+ 7173 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7175 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7177 "00010110" // /* MW 3 */
+ 7178 "01000110" // /* MW 2 */
+ 7179 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7181 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7183 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7185 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7187 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7189 "00000010" // /* MW 3 */
+ 7190 "01100001" // /* MW 2 */
+ 7191 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7193 "00010001" // /* MW 3 */
+ 7194 "00000110" // /* MW 2 */
+ 7195 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7197 "11111101" // /* MW 3 */
+ 7198 "11100010" // /* MW 2 */
+ 7199 "00010111" // /* MW 1 */
+ 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7201 "00000000" // /* MW 1 */
+ 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7203 "00000000" // /* MW 1 */
+ 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7205 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7207 "00011000" // /* MW 9 */
+ 7208 "00010011" // /* MW 8 */
+ 7209 "00000100" // /* MW 7 */
+ 7210 "00000000" // /* MW 6 */
+ 7211 "01011011" // /* MW 5 */
+ 7212 "00000001" // /* MW 4 */
+ 7213 "11110000" // /* MW 3 */
+ 7214 "00101100" // /* MW 2 */
+ 7215 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336
+.src_ref 7 "superkernels.cpp" 113 2 first
+.no_stack_arguments
+ 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */
+ 7217 "00000001" // /* MW 5 */
+ 7218 "00000000" // /* MW 4 */
+ 7219 "10111000" // /* MW 3 */
+ 7220 "00001000" // /* MW 2 */
+ 7221 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7222 "01000100" // MOVXM p3, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7223 "10000000" // /* MW 5 */
+ 7224 "11001011" // /* MW 4 */
+ 7225 "11000110" // /* MW 3 */
+ 7226 "00000111" // /* MW 2 */
+ 7227 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7229 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7233 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7235 "00011100" // /* MW 13 */
+ 7236 "00000000" // /* MW 12 */
+ 7237 "00000000" // /* MW 11 */
+ 7238 "00000111" // /* MW 10 */
+ 7239 "00111101" // /* MW 9 */
+ 7240 "01010011" // /* MW 8 */
+ 7241 "00000000" // /* MW 7 */
+ 7242 "00000000" // /* MW 6 */
+ 7243 "10110110" // /* MW 5 */
+ 7244 "00000010" // /* MW 4 */
+ 7245 "11110000" // /* MW 3 */
+ 7246 "00101100" // /* MW 2 */
+ 7247 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6 first
+.src_ref 7 "superkernels.cpp" 117 20
+.return_address
+ 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7249 "00010000" // /* MW 9 */
+ 7250 "00100010" // /* MW 8 */
+ 7251 "10110010" // /* MW 7 */
+ 7252 "11110000" // /* MW 6 */
+ 7253 "00000001" // /* MW 5 */
+ 7254 "00000000" // /* MW 4 */
+ 7255 "11010000" // /* MW 3 */
+ 7256 "11000010" // /* MW 2 */
+ 7257 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 20
+ 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7259 "00110110" // /* MW 3 */
+ 7260 "00000110" // /* MW 2 */
+ 7261 "00000001" // /* MW 1 */
+ 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7263 "00010001" // /* MW 3 */
+ 7264 "11110000" // /* MW 2 */
+ 7265 "00000111" // /* MW 1 */
+ 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7267 "00000000" // /* MW 1 */
+ 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7269 "00000000" // /* MW 1 */
+ 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7271 "00000000" // /* MW 1 */
+ 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7273 "00000000" // /* MW 1 */
+ 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7275 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 17
+ 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7277 "00001000" // /* MW 3 */
+ 7278 "01100001" // /* MW 2 */
+ 7279 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6
+ 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */
+ 7281 "00000001" // /* MW 5 */
+ 7282 "01000000" // /* MW 4 */
+ 7283 "01100000" // /* MW 3 */
+ 7284 "00001110" // /* MW 2 */
+ 7285 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 7 "superkernels.cpp" 140 14
+.delay_slot
+ 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7287 "00000001" // /* MW 3 */
+ 7288 "00110000" // /* MW 2 */
+ 7289 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7293 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7295 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7297 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7299 "00010100" // /* MW 5 */
+ 7300 "11001111" // /* MW 4 */
+ 7301 "10100010" // /* MW 3 */
+ 7302 "00000000" // /* MW 2 */
+ 7303 "00000100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7305 "00110110" // /* MW 3 */
+ 7306 "00000110" // /* MW 2 */
+ 7307 "00000001" // /* MW 1 */
+ 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7309 "00000000" // /* MW 1 */
+ 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7311 "00000000" // /* MW 1 */
+ 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7313 "00000000" // /* MW 1 */
+ 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7315 "00000000" // /* MW 1 */
+ 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7317 "00000000" // /* MW 1 */
+ 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7319 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7321 "00001000" // /* MW 3 */
+ 7322 "01010001" // /* MW 2 */
+ 7323 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15 first
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7325 "00100011" // /* MW 5 */
+ 7326 "00001110" // /* MW 4 */
+ 7327 "11011100" // /* MW 3 */
+ 7328 "11000110" // /* MW 2 */
+ 7329 "00111100" // /* MW 1 */
+ 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7331 "00000000" // /* MW 1 */
+ 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7333 "00000000" // /* MW 1 */
+ 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7335 "00000000" // /* MW 1 */
+ 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7337 "00000000" // /* MW 1 */
+ 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7339 "00000000" // /* MW 1 */
+ 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7341 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7343 "00010001" // /* MW 3 */
+ 7344 "00100001" // /* MW 2 */
+ 7345 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7347 "00011100" // /* MW 13 */
+ 7348 "00000000" // /* MW 12 */
+ 7349 "00000000" // /* MW 11 */
+ 7350 "01010111" // /* MW 10 */
+ 7351 "00011010" // /* MW 9 */
+ 7352 "01000000" // /* MW 8 */
+ 7353 "00000000" // /* MW 7 */
+ 7354 "00000000" // /* MW 6 */
+ 7355 "00100011" // /* MW 5 */
+ 7356 "11001100" // /* MW 4 */
+ 7357 "11110011" // /* MW 3 */
+ 7358 "00101100" // /* MW 2 */
+ 7359 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480
+.src_ref 7 "superkernels.cpp" 139 6 first
+.src_ref 7 "superkernels.cpp" 139 19
+ 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7361 "00010000" // /* MW 9 */
+ 7362 "00110000" // /* MW 8 */
+ 7363 "00110010" // /* MW 7 */
+ 7364 "11110011" // /* MW 6 */
+ 7365 "00000001" // /* MW 5 */
+ 7366 "00000000" // /* MW 4 */
+ 7367 "11010000" // /* MW 3 */
+ 7368 "11000010" // /* MW 2 */
+ 7369 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 19
+ 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7371 "00110110" // /* MW 3 */
+ 7372 "00000110" // /* MW 2 */
+ 7373 "00000110" // /* MW 1 */
+ 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7375 "10011001" // /* MW 3 */
+ 7376 "11111000" // /* MW 2 */
+ 7377 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+ 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7379 "00111001" // /* MW 3 */
+ 7380 "11110100" // /* MW 2 */
+ 7381 "00000111" // /* MW 1 */
+ 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7383 "00000000" // /* MW 1 */
+ 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7385 "00000000" // /* MW 1 */
+ 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7387 "00000000" // /* MW 1 */
+ 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7389 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 16
+ 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7391 "00001000" // /* MW 3 */
+ 7392 "01100001" // /* MW 2 */
+ 7393 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 6
+ 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */
+ 7395 "00000001" // /* MW 5 */
+ 7396 "01000000" // /* MW 4 */
+ 7397 "10000000" // /* MW 3 */
+ 7398 "00001110" // /* MW 2 */
+ 7399 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7407 "00000000" // /* MW 1 */
+.delay_slot
+ 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7409 "00100000" // /* MW 3 */
+ 7410 "11010000" // /* MW 2 */
+ 7411 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 140 14 first
+ 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7413 "11000001" // /* MW 11 */
+ 7414 "10001000" // /* MW 10 */
+ 7415 "10000011" // /* MW 9 */
+ 7416 "00000011" // /* MW 8 */
+ 7417 "00000000" // /* MW 7 */
+ 7418 "00000000" // /* MW 6 */
+ 7419 "00100000" // /* MW 5 */
+ 7420 "00000000" // /* MW 4 */
+ 7421 "11110000" // /* MW 3 */
+ 7422 "00101100" // /* MW 2 */
+ 7423 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544
+ 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7425 "00011001" // /* MW 3 */
+ 7426 "11111111" // /* MW 2 */
+ 7427 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142 first
+ 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7429 "00000000" // /* MW 3 */
+ 7430 "00101000" // /* MW 2 */
+ 7431 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+.delay_slot
+ 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7433 "00000001" // /* MW 5 */
+ 7434 "00000000" // /* MW 4 */
+ 7435 "00000000" // /* MW 3 */
+ 7436 "11111000" // /* MW 2 */
+ 7437 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7443 "00000000" // /* MW 1 */
+.delay_slot
+ 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7445 "10001011" // /* MW 3 */
+ 7446 "10000100" // /* MW 2 */
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 7447 "00001111" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7457 "00000001" // /* MW 5 */
+ 7458 "00100001" // /* MW 4 */
+ 7459 "00000000" // /* MW 3 */
+ 7460 "00000000" // /* MW 2 */
+ 7461 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7463 "11000000" // /* MW 3 */
+ 7464 "01010000" // /* MW 2 */
+ 7465 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7467 "10010000" // /* MW 3 */
+ 7468 "01100000" // /* MW 2 */
+ 7469 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7471 "00010001" // /* MW 3 */
+ 7472 "00000100" // /* MW 2 */
+ 7473 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7475 "00010001" // /* MW 3 */
+ 7476 "00010100" // /* MW 2 */
+ 7477 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7479 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7489 "00101110" // /* MW 3 */
+ 7490 "00011100" // /* MW 2 */
+ 7491 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7493 "00000001" // /* MW 5 */
+ 7494 "00000000" // /* MW 4 */
+ 7495 "00000000" // /* MW 3 */
+ 7496 "00001000" // /* MW 2 */
+ 7497 "00000000" // /* MW 1 */
+ 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7499 "00111101" // /* MW 3 */
+ 7500 "11111000" // /* MW 2 */
+ 7501 "00001111" // /* MW 1 */
+ 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7503 "11110101" // /* MW 3 */
+ 7504 "11111101" // /* MW 2 */
+ 7505 "00001111" // /* MW 1 */
+ 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7507 "00000000" // /* MW 1 */
+ 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7509 "00000000" // /* MW 1 */
+ 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7511 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7513 "00101001" // /* MW 3 */
+ 7514 "00011100" // /* MW 2 */
+ 7515 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7517 "00101110" // /* MW 3 */
+ 7518 "00011100" // /* MW 2 */
+ 7519 "00000001" // /* MW 1 */
+ 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7521 "00000000" // /* MW 1 */
+ 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7523 "00000000" // /* MW 1 */
+ 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7525 "00000000" // /* MW 1 */
+ 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7527 "00000000" // /* MW 1 */
+ 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7529 "00000000" // /* MW 1 */
+ 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7531 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7533 "00101001" // /* MW 3 */
+ 7534 "00011100" // /* MW 2 */
+ 7535 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7537 "00101110" // /* MW 3 */
+ 7538 "00000100" // /* MW 2 */
+ 7539 "00000001" // /* MW 1 */
+ 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7541 "00000000" // /* MW 1 */
+ 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7543 "00000000" // /* MW 1 */
+ 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7545 "00000000" // /* MW 1 */
+ 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7547 "00000000" // /* MW 1 */
+ 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7549 "00000000" // /* MW 1 */
+ 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7551 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7553 "00101001" // /* MW 3 */
+ 7554 "00011100" // /* MW 2 */
+ 7555 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7557 "00101110" // /* MW 3 */
+ 7558 "00010100" // /* MW 2 */
+ 7559 "00000001" // /* MW 1 */
+ 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7561 "00000000" // /* MW 1 */
+ 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7563 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */
+ 7565 "00000001" // /* MW 5 */
+ 7566 "00000000" // /* MW 4 */
+ 7567 "10010000" // /* MW 3 */
+ 7568 "00001110" // /* MW 2 */
+ 7569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7571 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7573 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7575 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7577 "00101001" // /* MW 3 */
+ 7578 "11011100" // /* MW 2 */
+ 7579 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.delay_slot
+ 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7581 "11000000" // /* MW 3 */
+ 7582 "11010000" // /* MW 2 */
+ 7583 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "add_impl.h" 146 29
+.return_address
+ 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7585 "00001000" // /* MW 9 */
+ 7586 "11000100" // /* MW 8 */
+ 7587 "00110011" // /* MW 7 */
+ 7588 "01101000" // /* MW 6 */
+ 7589 "00000000" // /* MW 5 */
+ 7590 "00000001" // /* MW 4 */
+ 7591 "00100000" // /* MW 3 */
+ 7592 "00000111" // /* MW 2 */
+ 7593 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29
+.src_ref 8 "add_impl.h" 147 37
+.src_ref 8 "add_impl.h" 147 39
+ 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7595 "01011000" // /* MW 9 */
+ 7596 "11111101" // /* MW 8 */
+ 7597 "00000111" // /* MW 7 */
+ 7598 "00001000" // /* MW 6 */
+ 7599 "10000000" // /* MW 5 */
+ 7600 "00000001" // /* MW 4 */
+ 7601 "10000000" // /* MW 3 */
+ 7602 "11100010" // /* MW 2 */
+ 7603 "00000001" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29 first
+.src_ref 8 "add_impl.h" 147 39
+ 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7605 "00000001" // /* MW 9 */
+ 7606 "10100000" // /* MW 8 */
+ 7607 "00000111" // /* MW 7 */
+ 7608 "10000000" // /* MW 6 */
+ 7609 "00010001" // /* MW 5 */
+ 7610 "00001010" // /* MW 4 */
+ 7611 "00100000" // /* MW 3 */
+ 7612 "10111110" // /* MW 2 */
+ 7613 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 50 first
+ 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7615 "01001010" // /* MW 3 */
+ 7616 "00000110" // /* MW 2 */
+ 7617 "00000000" // /* MW 1 */
+ 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7619 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7621 "00000000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 37
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7623 "00010111" // /* MW 3 */
+ 7624 "00000010" // /* MW 2 */
+ 7625 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7627 "00000000" // /* MW 3 */
+ 7628 "00101000" // /* MW 2 */
+ 7629 "00010000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7631 "00000101" // /* MW 3 */
+ 7632 "00100010" // /* MW 2 */
+ 7633 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7635 "00000001" // /* MW 5 */
+ 7636 "00000000" // /* MW 4 */
+ 7637 "00000000" // /* MW 3 */
+ 7638 "11111000" // /* MW 2 */
+ 7639 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7641 "00100111" // /* MW 3 */
+ 7642 "01110111" // /* MW 2 */
+ 7643 "00010100" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 39
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7645 "10000010" // /* MW 3 */
+ 7646 "00100001" // /* MW 2 */
+ 7647 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7649 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 81 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25
+.function_start
+ 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7665 "01111000" // /* MW 9 */
+ 7666 "01100000" // /* MW 8 */
+ 7667 "00001000" // /* MW 7 */
+ 7668 "11001000" // /* MW 6 */
+ 7669 "00010000" // /* MW 5 */
+ 7670 "00000000" // /* MW 4 */
+ 7671 "10000000" // /* MW 3 */
+ 7672 "10000000" // /* MW 2 */
+ 7673 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+ 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7675 "00001100" // /* MW 5 */
+ 7676 "11000000" // /* MW 4 */
+ 7677 "10100000" // /* MW 3 */
+ 7678 "00000000" // /* MW 2 */
+ 7679 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+ 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7681 "01001010" // /* MW 3 */
+ 7682 "00001000" // /* MW 2 */
+ 7683 "00000000" // /* MW 1 */
+ 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7685 "00000000" // /* MW 1 */
+ 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7687 "00000000" // /* MW 1 */
+ 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7689 "00000000" // /* MW 1 */
+ 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7691 "00000000" // /* MW 1 */
+ 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7693 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first
+ 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7695 "00000000" // /* MW 3 */
+ 7696 "00101000" // /* MW 2 */
+ 7697 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.delay_slot
+ 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7699 "00001000" // /* MW 3 */
+ 7700 "10000000" // /* MW 2 */
+ 7701 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first
+.delay_slot
+ 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7703 "00011101" // /* MW 3 */
+ 7704 "00000000" // /* MW 2 */
+ 7705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 23
+.delay_slot
+ 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7707 "11100000" // /* MW 5 */
+ 7708 "00001101" // /* MW 4 */
+ 7709 "00110001" // /* MW 3 */
+ 7710 "10000010" // /* MW 2 */
+ 7711 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.delay_slot
+ 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7713 "00011101" // /* MW 3 */
+ 7714 "11000100" // /* MW 2 */
+ 7715 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 23
+.delay_slot
+ 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7717 "01010001" // /* MW 3 */
+ 7718 "00000100" // /* MW 2 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7719 "00001000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_broadcasting.h" 76
+.src_ref 3 "elementwise_binary_broadcasting.h" 76 first
+.function_start
+ 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7729 "00000001" // /* MW 5 */
+ 7730 "00000000" // /* MW 4 */
+ 7731 "00000000" // /* MW 3 */
+ 7732 "00001000" // /* MW 2 */
+ 7733 "00000000" // /* MW 1 */
+ 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7735 "00111101" // /* MW 3 */
+ 7736 "11111100" // /* MW 2 */
+ 7737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first
+.no_stack_arguments
+ 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */
+ 7739 "00000001" // /* MW 5 */
+ 7740 "00000000" // /* MW 4 */
+ 7741 "10100000" // /* MW 3 */
+ 7742 "00001110" // /* MW 2 */
+ 7743 "00000000" // /* MW 1 */
+.delay_slot
+ 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7745 "10011101" // /* MW 3 */
+ 7746 "11111011" // /* MW 2 */
+ 7747 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+ 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7749 "11000000" // /* MW 3 */
+ 7750 "01100000" // /* MW 2 */
+ 7751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7757 "01100111" // /* MW 3 */
+ 7758 "00000001" // /* MW 2 */
+ 7759 "00000000" // /* MW 1 */
+.return_address
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7761 "10011001" // /* MW 3 */
+ 7762 "11111011" // /* MW 2 */
+ 7763 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7765 "00111001" // /* MW 3 */
+ 7766 "11111100" // /* MW 2 */
+ 7767 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first
+.tail_call
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */
+ 7769 "00000000" // /* MW 5 */
+ 7770 "00000000" // /* MW 4 */
+ 7771 "11111000" // /* MW 3 */
+ 7772 "00001110" // /* MW 2 */
+ 7773 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7775 "11000000" // /* MW 3 */
+ 7776 "01101110" // /* MW 2 */
+ 7777 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first
+.delay_slot
+ 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7779 "00000001" // /* MW 5 */
+ 7780 "00000000" // /* MW 4 */
+ 7781 "00000000" // /* MW 3 */
+ 7782 "11111000" // /* MW 2 */
+ 7783 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7789 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 89 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19
+.function_start
+ 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7793 "01010001" // /* MW 5 */
+ 7794 "00000000" // /* MW 4 */
+ 7795 "11010000" // /* MW 3 */
+ 7796 "10000010" // /* MW 2 */
+ 7797 "01100111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7799 "10000001" // /* MW 5 */
+ 7800 "11001101" // /* MW 4 */
+ 7801 "01011000" // /* MW 3 */
+ 7802 "00000101" // /* MW 2 */
+ 7803 "01100001" // /* MW 1 */
+ 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7805 "00000000" // /* MW 1 */
+ 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7807 "00000000" // /* MW 1 */
+ 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7809 "00000000" // /* MW 1 */
+ 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7811 "00000000" // /* MW 1 */
+ 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7813 "00000000" // /* MW 1 */
+ 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7815 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 12
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 35
+ 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */
+ 7817 "00000001" // /* MW 5 */
+ 7818 "01000000" // /* MW 4 */
+ 7819 "01100000" // /* MW 3 */
+ 7820 "00001111" // /* MW 2 */
+ 7821 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78
+.delay_slot
+ 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7823 "11101001" // /* MW 3 */
+ 7824 "11000100" // /* MW 2 */
+ 7825 "00010111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first
+.delay_slot
+ 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7827 "00101101" // /* MW 3 */
+ 7828 "00000000" // /* MW 2 */
+ 7829 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7835 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first
+ 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7837 "00110010" // /* MW 3 */
+ 7838 "00000100" // /* MW 2 */
+ 7839 "00000000" // /* MW 1 */
+ 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7841 "00000000" // /* MW 1 */
+ 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7843 "00000000" // /* MW 1 */
+ 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7845 "00000000" // /* MW 1 */
+ 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */
+ 7847 "00000000" // /* MW 5 */
+ 7848 "00000000" // /* MW 4 */
+ 7849 "01110000" // /* MW 3 */
+ 7850 "00001111" // /* MW 2 */
+ 7851 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7853 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7857 "01110010" // /* MW 3 */
+ 7858 "00000101" // /* MW 2 */
+ 7859 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7861 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7863 "00000000" // /* MW 9 */
+ 7864 "00000000" // /* MW 8 */
+ 7865 "00000000" // /* MW 7 */
+ 7866 "00000000" // /* MW 6 */
+ 7867 "00010011" // /* MW 5 */
+ 7868 "00000100" // /* MW 4 */
+ 7869 "11110000" // /* MW 3 */
+ 7870 "00101100" // /* MW 2 */
+ 7871 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80
+.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first
+ 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7873 "00110010" // /* MW 3 */
+ 7874 "00000100" // /* MW 2 */
+ 7875 "00000001" // /* MW 1 */
+ 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7877 "00000000" // /* MW 1 */
+ 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7879 "00000000" // /* MW 1 */
+ 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7881 "00000000" // /* MW 1 */
+ 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7883 "00000000" // /* MW 1 */
+ 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7885 "00000000" // /* MW 1 */
+ 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7887 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7889 "01110010" // /* MW 3 */
+ 7890 "00000101" // /* MW 2 */
+ 7891 "00011000" // /* MW 1 */
+ 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7893 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7895 "00000000" // /* MW 9 */
+ 7896 "00000000" // /* MW 8 */
+ 7897 "00000000" // /* MW 7 */
+ 7898 "00000000" // /* MW 6 */
+ 7899 "00010011" // /* MW 5 */
+ 7900 "00000100" // /* MW 4 */
+ 7901 "11110001" // /* MW 3 */
+ 7902 "00101100" // /* MW 2 */
+ 7903 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+ 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7905 "01001000" // /* MW 9 */
+ 7906 "00111111" // /* MW 8 */
+ 7907 "10111000" // /* MW 7 */
+ 7908 "10001010" // /* MW 6 */
+ 7909 "00000111" // /* MW 5 */
+ 7910 "00000000" // /* MW 4 */
+ 7911 "11010000" // /* MW 3 */
+ 7912 "10000000" // /* MW 2 */
+ 7913 "10001010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7915 "00010000" // /* MW 9 */
+ 7916 "10101000" // /* MW 8 */
+ 7917 "01111111" // /* MW 7 */
+ 7918 "00000100" // /* MW 6 */
+ 7919 "00000000" // /* MW 5 */
+ 7920 "00000000" // /* MW 4 */
+ 7921 "11010000" // /* MW 3 */
+ 7922 "10010000" // /* MW 2 */
+ 7923 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7925 "11100000" // /* MW 5 */
+ 7926 "11111110" // /* MW 4 */
+ 7927 "00010110" // /* MW 3 */
+ 7928 "00000000" // /* MW 2 */
+ 7929 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7931 "11010000" // /* MW 5 */
+ 7932 "11001000" // /* MW 4 */
+ 7933 "11001000" // /* MW 3 */
+ 7934 "00000111" // /* MW 2 */
+ 7935 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7937 "00100010" // /* MW 3 */
+ 7938 "00000100" // /* MW 2 */
+ 7939 "00000100" // /* MW 1 */
+ 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7941 "00000000" // /* MW 1 */
+ 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7945 "10101011" // /* MW 3 */
+ 7946 "00001000" // /* MW 2 */
+ 7947 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 189 20 first
+ 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7949 "00101011" // /* MW 3 */
+ 7950 "00101001" // /* MW 2 */
+ 7951 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+ 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7953 "00101011" // /* MW 3 */
+ 7954 "00001000" // /* MW 2 */
+ 7955 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7957 "00101011" // /* MW 3 */
+ 7958 "00101010" // /* MW 2 */
+ 7959 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7961 "00000000" // /* MW 5 */
+ 7962 "11110101" // /* MW 4 */
+ 7963 "01110000" // /* MW 3 */
+ 7964 "00010101" // /* MW 2 */
+ 7965 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7967 "00111101" // /* MW 7 */
+ 7968 "00101000" // /* MW 6 */
+ 7969 "00000011" // /* MW 5 */
+ 7970 "00000100" // /* MW 4 */
+ 7971 "01110000" // /* MW 3 */
+ 7972 "00100101" // /* MW 2 */
+ 7973 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7975 "00101011" // /* MW 3 */
+ 7976 "00001000" // /* MW 2 */
+ 7977 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7979 "00111101" // /* MW 7 */
+ 7980 "00010000" // /* MW 6 */
+ 7981 "00000100" // /* MW 5 */
+ 7982 "00000100" // /* MW 4 */
+ 7983 "01110000" // /* MW 3 */
+ 7984 "01000101" // /* MW 2 */
+ 7985 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7987 "10101011" // /* MW 3 */
+ 7988 "00001000" // /* MW 2 */
+ 7989 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7991 "00111101" // /* MW 7 */
+ 7992 "00101000" // /* MW 6 */
+ 7993 "00000011" // /* MW 5 */
+ 7994 "00000100" // /* MW 4 */
+ 7995 "01110000" // /* MW 3 */
+ 7996 "00100101" // /* MW 2 */
+ 7997 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7999 "00101011" // /* MW 3 */
+ 8000 "00001000" // /* MW 2 */
+ 8001 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8003 "00111101" // /* MW 13 */
+ 8004 "00010000" // /* MW 12 */
+ 8005 "00000100" // /* MW 11 */
+ 8006 "01010111" // /* MW 10 */
+ 8007 "00011010" // /* MW 9 */
+ 8008 "01000000" // /* MW 8 */
+ 8009 "00000000" // /* MW 7 */
+ 8010 "00000000" // /* MW 6 */
+ 8011 "01000110" // /* MW 5 */
+ 8012 "00111011" // /* MW 4 */
+ 8013 "01110100" // /* MW 3 */
+ 8014 "01000101" // /* MW 2 */
+ 8015 "00100101" // /* MW 1 */
+.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8017 "10101011" // /* MW 3 */
+ 8018 "00001000" // /* MW 2 */
+ 8019 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8021 "00111101" // /* MW 11 */
+ 8022 "00101000" // /* MW 10 */
+ 8023 "00000011" // /* MW 9 */
+ 8024 "10001110" // /* MW 8 */
+ 8025 "00010001" // /* MW 7 */
+ 8026 "00001111" // /* MW 6 */
+ 8027 "00100001" // /* MW 5 */
+ 8028 "00000000" // /* MW 4 */
+ 8029 "01110000" // /* MW 3 */
+ 8030 "00100101" // /* MW 2 */
+ 8031 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8033 "00000000" // /* MW 15 */
+ 8034 "00000000" // /* MW 14 */
+ 8035 "01111000" // /* MW 13 */
+ 8036 "10100101" // /* MW 12 */
+ 8037 "00000001" // /* MW 11 */
+ 8038 "00000000" // /* MW 10 */
+ 8039 "00000000" // /* MW 9 */
+ 8040 "00000000" // /* MW 8 */
+ 8041 "01011011" // /* MW 7 */
+ 8042 "00000001" // /* MW 6 */
+ 8043 "00100000" // /* MW 5 */
+ 8044 "00000000" // /* MW 4 */
+ 8045 "01110000" // /* MW 3 */
+ 8046 "00000101" // /* MW 2 */
+ 8047 "00000001" // /* MW 1 */
+.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8049 "10000001" // /* MW 15 */
+ 8050 "00100000" // /* MW 14 */
+ 8051 "01111000" // /* MW 13 */
+ 8052 "10100101" // /* MW 12 */
+ 8053 "00000001" // /* MW 11 */
+ 8054 "00000000" // /* MW 10 */
+ 8055 "00000000" // /* MW 9 */
+ 8056 "00000000" // /* MW 8 */
+ 8057 "10100011" // /* MW 7 */
+ 8058 "00011101" // /* MW 6 */
+ 8059 "00100010" // /* MW 5 */
+ 8060 "00000000" // /* MW 4 */
+ 8061 "01110000" // /* MW 3 */
+ 8062 "01000101" // /* MW 2 */
+ 8063 "00100101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8065 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8067 "00111101" // /* MW 7 */
+ 8068 "00101000" // /* MW 6 */
+ 8069 "00000011" // /* MW 5 */
+ 8070 "00000010" // /* MW 4 */
+ 8071 "01100000" // /* MW 3 */
+ 8072 "11000100" // /* MW 2 */
+ 8073 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8075 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8077 "00111101" // /* MW 7 */
+ 8078 "00010000" // /* MW 6 */
+ 8079 "00000100" // /* MW 5 */
+ 8080 "00000010" // /* MW 4 */
+ 8081 "01100000" // /* MW 3 */
+ 8082 "10110100" // /* MW 2 */
+ 8083 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8085 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8087 "00000000" // /* MW 5 */
+ 8088 "01010000" // /* MW 4 */
+ 8089 "01100000" // /* MW 3 */
+ 8090 "11000100" // /* MW 2 */
+ 8091 "01000011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8093 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8095 "10100011" // /* MW 3 */
+ 8096 "00011101" // /* MW 2 */
+ 8097 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8099 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8101 "00100011" // /* MW 3 */
+ 8102 "00011110" // /* MW 2 */
+ 8103 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8105 "00000000" // /* MW 1 */
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first
+.function_start
+ 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8113 "00000001" // /* MW 5 */
+ 8114 "00000000" // /* MW 4 */
+ 8115 "00000000" // /* MW 3 */
+ 8116 "00010000" // /* MW 2 */
+ 8117 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24
+ 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8119 "01110000" // /* MW 7 */
+ 8120 "01100000" // /* MW 6 */
+ 8121 "00001010" // /* MW 5 */
+ 8122 "00000010" // /* MW 4 */
+ 8123 "10110000" // /* MW 3 */
+ 8124 "10000111" // /* MW 2 */
+ 8125 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+ 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8127 "00000000" // /* MW 7 */
+ 8128 "00000011" // /* MW 6 */
+ 8129 "10110100" // /* MW 5 */
+ 8130 "00000001" // /* MW 4 */
+ 8131 "01100000" // /* MW 3 */
+ 8132 "10010001" // /* MW 2 */
+ 8133 "01010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+ 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8135 "10000001" // /* MW 5 */
+ 8136 "00100001" // /* MW 4 */
+ 8137 "01011000" // /* MW 3 */
+ 8138 "11101101" // /* MW 2 */
+ 8139 "01100101" // /* MW 1 */
+ 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8141 "11000001" // /* MW 5 */
+ 8142 "10101011" // /* MW 4 */
+ 8143 "01011000" // /* MW 3 */
+ 8144 "11001010" // /* MW 2 */
+ 8145 "01110011" // /* MW 1 */
+ 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8147 "11000000" // /* MW 3 */
+ 8148 "01101000" // /* MW 2 */
+ 8149 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+ 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8151 "00101011" // /* MW 3 */
+ 8152 "00000111" // /* MW 2 */
+ 8153 "00001000" // /* MW 1 */
+ 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8155 "01010111" // /* MW 3 */
+ 8156 "00000110" // /* MW 2 */
+ 8157 "00000000" // /* MW 1 */
+ 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8159 "00000000" // /* MW 1 */
+ 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8161 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first
+.no_stack_arguments
+ 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */
+ 8163 "00000001" // /* MW 5 */
+ 8164 "00000000" // /* MW 4 */
+ 8165 "00111000" // /* MW 3 */
+ 8166 "00001111" // /* MW 2 */
+ 8167 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.delay_slot
+ 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8169 "11000000" // /* MW 3 */
+ 8170 "01010000" // /* MW 2 */
+ 8171 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first
+.delay_slot
+ 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8175 "00010010" // /* MW 3 */
+ 8176 "00100101" // /* MW 2 */
+ 8177 "00010100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8179 "01000001" // /* MW 5 */
+ 8180 "11010010" // /* MW 4 */
+ 8181 "01000010" // /* MW 3 */
+ 8182 "00100000" // /* MW 2 */
+ 8183 "10001100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8185 "01110000" // /* MW 7 */
+ 8186 "00010000" // /* MW 6 */
+ 8187 "00110100" // /* MW 5 */
+ 8188 "00000000" // /* MW 4 */
+ 8189 "01100000" // /* MW 3 */
+ 8190 "00101011" // /* MW 2 */
+ 8191 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.return_address
+ 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8193 "00111001" // /* MW 3 */
+ 8194 "11111100" // /* MW 2 */
+ 8195 "00000111" // /* MW 1 */
+ 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8197 "00000000" // /* MW 1 */
+ 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8199 "00000000" // /* MW 1 */
+ 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8201 "00000000" // /* MW 1 */
+ 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8203 "00000000" // /* MW 1 */
+ 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8205 "00000000" // /* MW 1 */
+ 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8207 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first
+ 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8209 "00000000" // /* MW 3 */
+ 8210 "00101000" // /* MW 2 */
+ 8211 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.delay_slot
+ 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8213 "00000001" // /* MW 5 */
+ 8214 "00000000" // /* MW 4 */
+ 8215 "00000000" // /* MW 3 */
+ 8216 "11110000" // /* MW 2 */
+ 8217 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8219 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8221 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8223 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8225 "00000000" // /* MW 1 */
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 147 first
+.src_ref 7 "superkernels.cpp" 152 6
+.function_start
+ 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8241 "10000000" // /* MW 5 */
+ 8242 "11001000" // /* MW 4 */
+ 8243 "11000110" // /* MW 3 */
+ 8244 "00000111" // /* MW 2 */
+ 8245 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6 first
+ 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8247 "11000001" // /* MW 5 */
+ 8248 "10110101" // /* MW 4 */
+ 8249 "11011000" // /* MW 3 */
+ 8250 "11000010" // /* MW 2 */
+ 8251 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 147
+ 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8253 "00000001" // /* MW 5 */
+ 8254 "00000000" // /* MW 4 */
+ 8255 "00000000" // /* MW 3 */
+ 8256 "00001000" // /* MW 2 */
+ 8257 "00000000" // /* MW 1 */
+ 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8259 "01110000" // /* MW 7 */
+ 8260 "11010000" // /* MW 6 */
+ 8261 "00001011" // /* MW 5 */
+ 8262 "00000000" // /* MW 4 */
+ 8263 "10110000" // /* MW 3 */
+ 8264 "01100011" // /* MW 2 */
+ 8265 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+ 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8267 "00010001" // /* MW 9 */
+ 8268 "00101000" // /* MW 8 */
+ 8269 "00110010" // /* MW 7 */
+ 8270 "11110011" // /* MW 6 */
+ 8271 "00000001" // /* MW 5 */
+ 8272 "00000000" // /* MW 4 */
+ 8273 "10110000" // /* MW 3 */
+ 8274 "10000010" // /* MW 2 */
+ 8275 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8277 "11000000" // /* MW 3 */
+ 8278 "11010100" // /* MW 2 */
+ 8279 "00011011" // /* MW 1 */
+ 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8281 "00000000" // /* MW 1 */
+ 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8283 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6
+.src_ref 7 "superkernels.cpp" 152 16
+ 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */
+ 8285 "00000001" // /* MW 5 */
+ 8286 "01000000" // /* MW 4 */
+ 8287 "10000000" // /* MW 3 */
+ 8288 "00010000" // /* MW 2 */
+ 8289 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 22 first
+.delay_slot
+ 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8291 "10010000" // /* MW 3 */
+ 8292 "01100010" // /* MW 2 */
+ 8293 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 30
+.delay_slot
+ 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8295 "11111011" // /* MW 3 */
+ 8296 "01100011" // /* MW 2 */
+ 8297 "00010100" // /* MW 1 */
+.delay_slot
+ 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8299 "00111101" // /* MW 3 */
+ 8300 "11110100" // /* MW 2 */
+ 8301 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8303 "01110000" // /* MW 7 */
+ 8304 "01100000" // /* MW 6 */
+ 8305 "00110000" // /* MW 5 */
+ 8306 "00000011" // /* MW 4 */
+ 8307 "00110000" // /* MW 3 */
+ 8308 "11000110" // /* MW 2 */
+ 8309 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4
+.src_ref 7 "superkernels.cpp" 166 2
+.delay_slot
+ 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8311 "10000000" // /* MW 5 */
+ 8312 "11001001" // /* MW 4 */
+ 8313 "11000000" // /* MW 3 */
+ 8314 "00000111" // /* MW 2 */
+ 8315 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8317 "11010000" // /* MW 5 */
+ 8318 "11001000" // /* MW 4 */
+ 8319 "11000100" // /* MW 3 */
+ 8320 "00000111" // /* MW 2 */
+ 8321 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8323 "00010000" // /* MW 9 */
+ 8324 "00110010" // /* MW 8 */
+ 8325 "00110010" // /* MW 7 */
+ 8326 "11110001" // /* MW 6 */
+ 8327 "00000001" // /* MW 5 */
+ 8328 "00000000" // /* MW 4 */
+ 8329 "11100000" // /* MW 3 */
+ 8330 "11000000" // /* MW 2 */
+ 8331 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8333 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */
+ 8335 "00000001" // /* MW 5 */
+ 8336 "00000000" // /* MW 4 */
+ 8337 "00011000" // /* MW 3 */
+ 8338 "00001111" // /* MW 2 */
+ 8339 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8341 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8343 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8345 "00110001" // /* MW 3 */
+ 8346 "00100000" // /* MW 2 */
+ 8347 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8349 "00000101" // /* MW 3 */
+ 8350 "00100000" // /* MW 2 */
+ 8351 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8353 "00000000" // /* MW 15 */
+ 8354 "00000000" // /* MW 14 */
+ 8355 "01111000" // /* MW 13 */
+ 8356 "10100101" // /* MW 12 */
+ 8357 "00000001" // /* MW 11 */
+ 8358 "00000000" // /* MW 10 */
+ 8359 "00000000" // /* MW 9 */
+ 8360 "10000000" // /* MW 8 */
+ 8361 "00010001" // /* MW 7 */
+ 8362 "00000110" // /* MW 6 */
+ 8363 "00100010" // /* MW 5 */
+ 8364 "00000000" // /* MW 4 */
+ 8365 "11110000" // /* MW 3 */
+ 8366 "00101100" // /* MW 2 */
+ 8367 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18
+.return_address
+ 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8369 "10100000" // /* MW 5 */
+ 8370 "11001000" // /* MW 4 */
+ 8371 "11000100" // /* MW 3 */
+ 8372 "00000111" // /* MW 2 */
+ 8373 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18 first
+.src_ref 7 "superkernels.cpp" 159 65
+ 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8375 "00010000" // /* MW 9 */
+ 8376 "01100000" // /* MW 8 */
+ 8377 "00110010" // /* MW 7 */
+ 8378 "11110001" // /* MW 6 */
+ 8379 "00000001" // /* MW 5 */
+ 8380 "00000000" // /* MW 4 */
+ 8381 "11010000" // /* MW 3 */
+ 8382 "11000010" // /* MW 2 */
+ 8383 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51
+.src_ref 7 "superkernels.cpp" 159 65
+.src_ref 7 "superkernels.cpp" 166 2
+ 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8385 "00010000" // /* MW 9 */
+ 8386 "01100000" // /* MW 8 */
+ 8387 "00110010" // /* MW 7 */
+ 8388 "11110001" // /* MW 6 */
+ 8389 "00000001" // /* MW 5 */
+ 8390 "00000000" // /* MW 4 */
+ 8391 "11010000" // /* MW 3 */
+ 8392 "11000110" // /* MW 2 */
+ 8393 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51 first
+.src_ref 7 "superkernels.cpp" 159 16
+.src_ref 7 "superkernels.cpp" 164 47
+ 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8395 "00010000" // /* MW 9 */
+ 8396 "00101010" // /* MW 8 */
+ 8397 "10110010" // /* MW 7 */
+ 8398 "11110000" // /* MW 6 */
+ 8399 "00000001" // /* MW 5 */
+ 8400 "00000000" // /* MW 4 */
+ 8401 "01010000" // /* MW 3 */
+ 8402 "11001011" // /* MW 2 */
+ 8403 "01001010" // /* MW 1 */
+ 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8405 "00000000" // /* MW 1 */
+ 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8407 "00000000" // /* MW 1 */
+ 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */
+ 8409 "00000000" // /* MW 5 */
+ 8410 "00000000" // /* MW 4 */
+ 8411 "10001000" // /* MW 3 */
+ 8412 "00010000" // /* MW 2 */
+ 8413 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13
+.delay_slot
+ 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8415 "11000000" // /* MW 5 */
+ 8416 "11001000" // /* MW 4 */
+ 8417 "11000000" // /* MW 3 */
+ 8418 "00000111" // /* MW 2 */
+ 8419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8421 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 27 first
+.delay_slot
+ 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8423 "00001111" // /* MW 3 */
+ 8424 "01100001" // /* MW 2 */
+ 8425 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13 first
+.delay_slot
+ 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8427 "10100011" // /* MW 5 */
+ 8428 "00001100" // /* MW 4 */
+ 8429 "11110000" // /* MW 3 */
+ 8430 "00101100" // /* MW 2 */
+ 8431 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 16 first
+.delay_slot
+ 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8433 "00000000" // /* MW 15 */
+ 8434 "00000000" // /* MW 14 */
+ 8435 "01111000" // /* MW 13 */
+ 8436 "10100101" // /* MW 12 */
+ 8437 "00000001" // /* MW 11 */
+ 8438 "00000000" // /* MW 10 */
+ 8439 "00000000" // /* MW 9 */
+ 8440 "10000000" // /* MW 8 */
+ 8441 "00010001" // /* MW 7 */
+ 8442 "00000110" // /* MW 6 */
+ 8443 "00100001" // /* MW 5 */
+ 8444 "00000000" // /* MW 4 */
+ 8445 "11110000" // /* MW 3 */
+ 8446 "00101100" // /* MW 2 */
+ 8447 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 164 47
+.src_ref 7 "superkernels.cpp" 166 2
+ 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8449 "00000000" // /* MW 15 */
+ 8450 "00000000" // /* MW 14 */
+ 8451 "00010000" // /* MW 13 */
+ 8452 "00101010" // /* MW 12 */
+ 8453 "10110010" // /* MW 11 */
+ 8454 "11110000" // /* MW 10 */
+ 8455 "00000001" // /* MW 9 */
+ 8456 "00000000" // /* MW 8 */
+ 8457 "10001011" // /* MW 7 */
+ 8458 "10000000" // /* MW 6 */
+ 8459 "00100010" // /* MW 5 */
+ 8460 "00000000" // /* MW 4 */
+ 8461 "11110000" // /* MW 3 */
+ 8462 "00101100" // /* MW 2 */
+ 8463 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8465 "00000000" // /* MW 7 */
+ 8466 "11000011" // /* MW 6 */
+ 8467 "10110011" // /* MW 5 */
+ 8468 "00000011" // /* MW 4 */
+ 8469 "01100000" // /* MW 3 */
+ 8470 "10010001" // /* MW 2 */
+ 8471 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8473 "00010000" // /* MW 9 */
+ 8474 "00100000" // /* MW 8 */
+ 8475 "00110010" // /* MW 7 */
+ 8476 "11110000" // /* MW 6 */
+ 8477 "00000001" // /* MW 5 */
+ 8478 "00000000" // /* MW 4 */
+ 8479 "11010000" // /* MW 3 */
+ 8480 "11101110" // /* MW 2 */
+ 8481 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8483 "00010110" // /* MW 3 */
+ 8484 "11111110" // /* MW 2 */
+ 8485 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8487 "00110110" // /* MW 3 */
+ 8488 "11111110" // /* MW 2 */
+ 8489 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8491 "01010110" // /* MW 3 */
+ 8492 "01000110" // /* MW 2 */
+ 8493 "00000111" // /* MW 1 */
+ 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8495 "00000000" // /* MW 1 */
+ 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8497 "00000000" // /* MW 1 */
+ 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8499 "00000000" // /* MW 1 */
+ 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8501 "00000000" // /* MW 1 */
+ 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8503 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8505 "00000010" // /* MW 3 */
+ 8506 "01100001" // /* MW 2 */
+ 8507 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8509 "00010001" // /* MW 3 */
+ 8510 "00000110" // /* MW 2 */
+ 8511 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8513 "11111101" // /* MW 3 */
+ 8514 "11100000" // /* MW 2 */
+ 8515 "00010111" // /* MW 1 */
+ 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8517 "00000000" // /* MW 1 */
+ 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8519 "00000000" // /* MW 1 */
+ 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8521 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8523 "00001000" // /* MW 3 */
+ 8524 "10010011" // /* MW 2 */
+ 8525 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+ 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8527 "10000001" // /* MW 5 */
+ 8528 "10101101" // /* MW 4 */
+ 8529 "10100111" // /* MW 3 */
+ 8530 "00000000" // /* MW 2 */
+ 8531 "00000100" // /* MW 1 */
+ 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8533 "00000000" // /* MW 1 */
+ 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8535 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+ 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8537 "00110110" // /* MW 3 */
+ 8538 "00000110" // /* MW 2 */
+ 8539 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8541 "10000001" // /* MW 5 */
+ 8542 "11011101" // /* MW 4 */
+ 8543 "11011100" // /* MW 3 */
+ 8544 "11001010" // /* MW 2 */
+ 8545 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 47 first
+ 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8547 "01110110" // /* MW 3 */
+ 8548 "00000110" // /* MW 2 */
+ 8549 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8551 "10011110" // /* MW 3 */
+ 8552 "01011100" // /* MW 2 */
+ 8553 "00000111" // /* MW 1 */
+ 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 166 2 first
+.no_stack_arguments
+ 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */
+ 8557 "00000001" // /* MW 5 */
+ 8558 "00000000" // /* MW 4 */
+ 8559 "11011000" // /* MW 3 */
+ 8560 "00001111" // /* MW 2 */
+ 8561 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8563 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+.delay_slot
+ 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8565 "00000111" // /* MW 3 */
+ 8566 "01100010" // /* MW 2 */
+ 8567 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.delay_slot
+ 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8569 "00110001" // /* MW 3 */
+ 8570 "00000110" // /* MW 2 */
+ 8571 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45 first
+.delay_slot
+ 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8573 "00001101" // /* MW 3 */
+ 8574 "11100001" // /* MW 2 */
+ 8575 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+.delay_slot
+ 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8577 "00000000" // /* MW 15 */
+ 8578 "00000000" // /* MW 14 */
+ 8579 "10101000" // /* MW 13 */
+ 8580 "10100000" // /* MW 12 */
+ 8581 "00110100" // /* MW 11 */
+ 8582 "00000000" // /* MW 10 */
+ 8583 "00000000" // /* MW 9 */
+ 8584 "00000000" // /* MW 8 */
+ 8585 "01011011" // /* MW 7 */
+ 8586 "00000001" // /* MW 6 */
+ 8587 "00100000" // /* MW 5 */
+ 8588 "00000000" // /* MW 4 */
+ 8589 "11110000" // /* MW 3 */
+ 8590 "00101100" // /* MW 2 */
+ 8591 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+.src_ref 7 "superkernels.cpp" 169 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8593 "00010000" // /* MW 9 */
+ 8594 "00100000" // /* MW 8 */
+ 8595 "00110010" // /* MW 7 */
+ 8596 "11110011" // /* MW 6 */
+ 8597 "00000001" // /* MW 5 */
+ 8598 "00000000" // /* MW 4 */
+ 8599 "11010000" // /* MW 3 */
+ 8600 "11000110" // /* MW 2 */
+ 8601 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8603 "00000101" // /* MW 3 */
+ 8604 "00100000" // /* MW 2 */
+ 8605 "00010000" // /* MW 1 */
+ 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8607 "00000000" // /* MW 1 */
+ 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8609 "00000000" // /* MW 1 */
+ 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8611 "00000000" // /* MW 1 */
+ 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8613 "00000000" // /* MW 1 */
+ 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8617 "00001000" // /* MW 3 */
+ 8618 "01010001" // /* MW 2 */
+ 8619 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8621 "00010000" // /* MW 9 */
+ 8622 "00110000" // /* MW 8 */
+ 8623 "00110010" // /* MW 7 */
+ 8624 "11110001" // /* MW 6 */
+ 8625 "00000001" // /* MW 5 */
+ 8626 "00000000" // /* MW 4 */
+ 8627 "11010000" // /* MW 3 */
+ 8628 "11001110" // /* MW 2 */
+ 8629 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6 first
+ 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8631 "00110110" // /* MW 3 */
+ 8632 "00000110" // /* MW 2 */
+ 8633 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+ 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8635 "01010110" // /* MW 3 */
+ 8636 "00000110" // /* MW 2 */
+ 8637 "00000010" // /* MW 1 */
+ 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8639 "00000000" // /* MW 1 */
+ 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8641 "00000000" // /* MW 1 */
+ 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8643 "00000000" // /* MW 1 */
+ 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8645 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8647 "00110001" // /* MW 3 */
+ 8648 "00100001" // /* MW 2 */
+ 8649 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8651 "00010001" // /* MW 3 */
+ 8652 "11100110" // /* MW 2 */
+ 8653 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 16 first
+ 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8655 "00101000" // /* MW 3 */
+ 8656 "01100001" // /* MW 2 */
+ 8657 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+ 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */
+ 8659 "00000001" // /* MW 5 */
+ 8660 "01000000" // /* MW 4 */
+ 8661 "11111000" // /* MW 3 */
+ 8662 "00010000" // /* MW 2 */
+ 8663 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8665 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8671 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8673 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14
+ 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8675 "00000001" // /* MW 3 */
+ 8676 "00100000" // /* MW 2 */
+ 8677 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14 first
+ 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8679 "00000000" // /* MW 9 */
+ 8680 "00000000" // /* MW 8 */
+ 8681 "00000000" // /* MW 7 */
+ 8682 "10000000" // /* MW 6 */
+ 8683 "00010001" // /* MW 5 */
+ 8684 "00000110" // /* MW 4 */
+ 8685 "11110110" // /* MW 3 */
+ 8686 "00101100" // /* MW 2 */
+ 8687 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 171
+ 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8689 "00111001" // /* MW 3 */
+ 8690 "11110100" // /* MW 2 */
+ 8691 "00000111" // /* MW 1 */
+ 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8693 "00011001" // /* MW 3 */
+ 8694 "11111011" // /* MW 2 */
+ 8695 "00000111" // /* MW 1 */
+ 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8697 "00000000" // /* MW 1 */
+ 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8699 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8701 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8703 "11110001" // /* MW 3 */
+ 8704 "11111101" // /* MW 2 */
+ 8705 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8707 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8709 "00000000" // /* MW 3 */
+ 8710 "00101000" // /* MW 2 */
+ 8711 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8713 "10100000" // /* MW 3 */
+ 8714 "01100111" // /* MW 2 */
+ 8715 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171
+.delay_slot
+ 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8717 "00000001" // /* MW 5 */
+ 8718 "00000000" // /* MW 4 */
+ 8719 "00000000" // /* MW 3 */
+ 8720 "11111000" // /* MW 2 */
+ 8721 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8723 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 8727 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.src_ref 3 "elementwise_unary.h" 124 first
+.src_ref 3 "elementwise_unary.h" 126 24 first
+.function_start
+ 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8737 "00101110" // /* MW 3 */
+ 8738 "00011100" // /* MW 2 */
+ 8739 "00000001" // /* MW 1 */
+ 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8741 "00000000" // /* MW 1 */
+ 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8743 "00000000" // /* MW 1 */
+ 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8745 "00000000" // /* MW 1 */
+ 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8747 "00000000" // /* MW 1 */
+ 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8749 "00000000" // /* MW 1 */
+ 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8751 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 126 22 first
+ 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8753 "00101001" // /* MW 3 */
+ 8754 "00011100" // /* MW 2 */
+ 8755 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 24 first
+ 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8757 "00101110" // /* MW 3 */
+ 8758 "00011100" // /* MW 2 */
+ 8759 "00000001" // /* MW 1 */
+ 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8761 "00000000" // /* MW 1 */
+ 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8763 "00000000" // /* MW 1 */
+ 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8765 "00000000" // /* MW 1 */
+ 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8767 "00000000" // /* MW 1 */
+ 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8769 "00000000" // /* MW 1 */
+ 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8771 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 22
+ 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8773 "00101001" // /* MW 3 */
+ 8774 "00011100" // /* MW 2 */
+ 8775 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 24 first
+ 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8777 "00101110" // /* MW 3 */
+ 8778 "01101100" // /* MW 2 */
+ 8779 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8781 "00010010" // /* MW 3 */
+ 8782 "00000100" // /* MW 2 */
+ 8783 "00000001" // /* MW 1 */
+ 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8785 "00000000" // /* MW 1 */
+ 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8787 "00000000" // /* MW 1 */
+ 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8789 "00000000" // /* MW 1 */
+ 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8791 "00000000" // /* MW 1 */
+ 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8793 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 22 first
+ 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8795 "00101001" // /* MW 3 */
+ 8796 "01101100" // /* MW 2 */
+ 8797 "00001000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8799 "00010111" // /* MW 3 */
+ 8800 "00000100" // /* MW 2 */
+ 8801 "00000000" // /* MW 1 */
+ 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8803 "00000000" // /* MW 1 */
+ 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8805 "00000000" // /* MW 1 */
+ 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8807 "00000000" // /* MW 1 */
+ 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8809 "00000000" // /* MW 1 */
+ 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8811 "00000000" // /* MW 1 */
+ 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8813 "00000000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33 first
+ 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8815 "00010010" // /* MW 3 */
+ 8816 "00100100" // /* MW 2 */
+ 8817 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33
+ 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8819 "00010111" // /* MW 3 */
+ 8820 "00010100" // /* MW 2 */
+ 8821 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 130 4 first
+ 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8823 "00000000" // /* MW 3 */
+ 8824 "00101000" // /* MW 2 */
+ 8825 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8827 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8829 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0
+ 8835 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 136 first
+.src_ref 3 "elementwise_unary.h" 142 37
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 171 19
+.function_start
+ 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8849 "00010000" // /* MW 11 */
+ 8850 "10001000" // /* MW 10 */
+ 8851 "01111001" // /* MW 9 */
+ 8852 "00001000" // /* MW 8 */
+ 8853 "00000000" // /* MW 7 */
+ 8854 "00000000" // /* MW 6 */
+ 8855 "01101000" // /* MW 5 */
+ 8856 "00111010" // /* MW 4 */
+ 8857 "10000000" // /* MW 3 */
+ 8858 "11000010" // /* MW 2 */
+ 8859 "11111011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 142 78
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+ 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8861 "00010000" // /* MW 11 */
+ 8862 "10100000" // /* MW 10 */
+ 8863 "10111001" // /* MW 9 */
+ 8864 "00001001" // /* MW 8 */
+ 8865 "00000000" // /* MW 7 */
+ 8866 "00000000" // /* MW 6 */
+ 8867 "01101000" // /* MW 5 */
+ 8868 "00111001" // /* MW 4 */
+ 8869 "00000000" // /* MW 3 */
+ 8870 "01010001" // /* MW 2 */
+ 8871 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136
+ 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8873 "11000000" // /* MW 3 */
+ 8874 "00010100" // /* MW 2 */
+ 8875 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136 first
+ 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8877 "00010000" // /* MW 3 */
+ 8878 "01100000" // /* MW 2 */
+ 8879 "00011010" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 103 16 first
+ 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8881 "01010010" // /* MW 3 */
+ 8882 "00011100" // /* MW 2 */
+ 8883 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 142 37 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8885 "00010110" // /* MW 3 */
+ 8886 "00000000" // /* MW 2 */
+ 8887 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 8 "clip_impl.h" 104 16 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8889 "01101000" // /* MW 5 */
+ 8890 "00111010" // /* MW 4 */
+ 8891 "01010000" // /* MW 3 */
+ 8892 "10000110" // /* MW 2 */
+ 8893 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8895 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8897 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8899 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8901 "10110100" // /* MW 3 */
+ 8902 "00011100" // /* MW 2 */
+ 8903 "00111000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8905 "01110010" // /* MW 3 */
+ 8906 "00001001" // /* MW 2 */
+ 8907 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 142 78 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8909 "01111000" // /* MW 9 */
+ 8910 "00110110" // /* MW 8 */
+ 8911 "01010000" // /* MW 7 */
+ 8912 "11101101" // /* MW 6 */
+ 8913 "00011000" // /* MW 5 */
+ 8914 "00000001" // /* MW 4 */
+ 8915 "01101000" // /* MW 3 */
+ 8916 "00111010" // /* MW 2 */
+ 8917 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8919 "11111110" // /* MW 3 */
+ 8920 "01111000" // /* MW 2 */
+ 8921 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8923 "01110010" // /* MW 3 */
+ 8924 "10000101" // /* MW 2 */
+ 8925 "00011000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8927 "10101100" // /* MW 3 */
+ 8928 "10101000" // /* MW 2 */
+ 8929 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8931 "01100000" // /* MW 13 */
+ 8932 "00101011" // /* MW 12 */
+ 8933 "00000000" // /* MW 11 */
+ 8934 "11001111" // /* MW 10 */
+ 8935 "00000110" // /* MW 9 */
+ 8936 "00110001" // /* MW 8 */
+ 8937 "00000000" // /* MW 7 */
+ 8938 "00000000" // /* MW 6 */
+ 8939 "01101000" // /* MW 5 */
+ 8940 "00111001" // /* MW 4 */
+ 8941 "11110000" // /* MW 3 */
+ 8942 "00101100" // /* MW 2 */
+ 8943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8945 "00000000" // /* MW 15 */
+ 8946 "00000000" // /* MW 14 */
+ 8947 "01111000" // /* MW 13 */
+ 8948 "01010110" // /* MW 12 */
+ 8949 "11011000" // /* MW 11 */
+ 8950 "00000001" // /* MW 10 */
+ 8951 "00000000" // /* MW 9 */
+ 8952 "00000000" // /* MW 8 */
+ 8953 "11010011" // /* MW 7 */
+ 8954 "00011100" // /* MW 6 */
+ 8955 "00100001" // /* MW 5 */
+ 8956 "00000000" // /* MW 4 */
+ 8957 "11110000" // /* MW 3 */
+ 8958 "00101100" // /* MW 2 */
+ 8959 "00000000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8961 "00000000" // /* MW 15 */
+ 8962 "00000000" // /* MW 14 */
+ 8963 "01111000" // /* MW 13 */
+ 8964 "00110110" // /* MW 12 */
+ 8965 "01010000" // /* MW 11 */
+ 8966 "00000001" // /* MW 10 */
+ 8967 "00000000" // /* MW 9 */
+ 8968 "00000000" // /* MW 8 */
+ 8969 "01011011" // /* MW 7 */
+ 8970 "00000001" // /* MW 6 */
+ 8971 "00100000" // /* MW 5 */
+ 8972 "00000000" // /* MW 4 */
+ 8973 "11110000" // /* MW 3 */
+ 8974 "00101100" // /* MW 2 */
+ 8975 "00000000" // /* MW 1 */
+.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8977 "00000000" // /* MW 15 */
+ 8978 "00000000" // /* MW 14 */
+ 8979 "01111000" // /* MW 13 */
+ 8980 "01010110" // /* MW 12 */
+ 8981 "11010100" // /* MW 11 */
+ 8982 "00000000" // /* MW 10 */
+ 8983 "00000000" // /* MW 9 */
+ 8984 "00000000" // /* MW 8 */
+ 8985 "11010011" // /* MW 7 */
+ 8986 "00011101" // /* MW 6 */
+ 8987 "01101001" // /* MW 5 */
+ 8988 "00111010" // /* MW 4 */
+ 8989 "11110000" // /* MW 3 */
+ 8990 "00101100" // /* MW 2 */
+ 8991 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8993 "00000000" // /* MW 15 */
+ 8994 "00000000" // /* MW 14 */
+ 8995 "01111000" // /* MW 13 */
+ 8996 "00110110" // /* MW 12 */
+ 8997 "10001000" // /* MW 11 */
+ 8998 "00000001" // /* MW 10 */
+ 8999 "00000000" // /* MW 9 */
+ 9000 "00000000" // /* MW 8 */
+ 9001 "01011011" // /* MW 7 */
+ 9002 "00000001" // /* MW 6 */
+ 9003 "01101000" // /* MW 5 */
+ 9004 "00111001" // /* MW 4 */
+ 9005 "11110000" // /* MW 3 */
+ 9006 "00101100" // /* MW 2 */
+ 9007 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9009 "00000000" // /* MW 15 */
+ 9010 "00000000" // /* MW 14 */
+ 9011 "01111000" // /* MW 13 */
+ 9012 "01010110" // /* MW 12 */
+ 9013 "11011000" // /* MW 11 */
+ 9014 "00000001" // /* MW 10 */
+ 9015 "00000000" // /* MW 9 */
+ 9016 "00000000" // /* MW 8 */
+ 9017 "11010011" // /* MW 7 */
+ 9018 "00011100" // /* MW 6 */
+ 9019 "00100001" // /* MW 5 */
+ 9020 "00000000" // /* MW 4 */
+ 9021 "11110000" // /* MW 3 */
+ 9022 "00101100" // /* MW 2 */
+ 9023 "00000000" // /* MW 1 */
+.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176
+.src_ref 4 "max_min.hpp" 20 104 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9025 "00000000" // /* MW 15 */
+ 9026 "00000000" // /* MW 14 */
+ 9027 "01111000" // /* MW 13 */
+ 9028 "00110110" // /* MW 12 */
+ 9029 "01010000" // /* MW 11 */
+ 9030 "00000001" // /* MW 10 */
+ 9031 "00000000" // /* MW 9 */
+ 9032 "00000000" // /* MW 8 */
+ 9033 "01011011" // /* MW 7 */
+ 9034 "00000001" // /* MW 6 */
+ 9035 "00100000" // /* MW 5 */
+ 9036 "00000000" // /* MW 4 */
+ 9037 "11110000" // /* MW 3 */
+ 9038 "00101100" // /* MW 2 */
+ 9039 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9041 "01110000" // /* MW 7 */
+ 9042 "01010110" // /* MW 6 */
+ 9043 "11010100" // /* MW 5 */
+ 9044 "00000000" // /* MW 4 */
+ 9045 "01100000" // /* MW 3 */
+ 9046 "10111010" // /* MW 2 */
+ 9047 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9049 "01101100" // /* MW 3 */
+ 9050 "00010000" // /* MW 2 */
+ 9051 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+ 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9053 "01110000" // /* MW 7 */
+ 9054 "01010110" // /* MW 6 */
+ 9055 "11011000" // /* MW 5 */
+ 9056 "00000001" // /* MW 4 */
+ 9057 "01100000" // /* MW 3 */
+ 9058 "10011010" // /* MW 2 */
+ 9059 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 158 4 first
+ 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9061 "11011001" // /* MW 5 */
+ 9062 "01000000" // /* MW 4 */
+ 9063 "00000101" // /* MW 3 */
+ 9064 "00000000" // /* MW 2 */
+ 9065 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9067 "01110000" // /* MW 7 */
+ 9068 "01010110" // /* MW 6 */
+ 9069 "11010100" // /* MW 5 */
+ 9070 "00000000" // /* MW 4 */
+ 9071 "01100000" // /* MW 3 */
+ 9072 "10111010" // /* MW 2 */
+ 9073 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9075 "01101100" // /* MW 3 */
+ 9076 "00010000" // /* MW 2 */
+ 9077 "00011011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.delay_slot
+ 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9079 "10101100" // /* MW 3 */
+ 9080 "10110000" // /* MW 2 */
+ 9081 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.delay_slot
+ 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9083 "11010011" // /* MW 3 */
+ 9084 "00011100" // /* MW 2 */
+ 9085 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9087 "11010011" // /* MW 3 */
+ 9088 "00011101" // /* MW 2 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0
+ 9089 "00001001" // /* MW 1 */
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 176 first
+.src_ref 7 "superkernels.cpp" 181 6
+.function_start
+ 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9105 "10000000" // /* MW 5 */
+ 9106 "11001000" // /* MW 4 */
+ 9107 "11000110" // /* MW 3 */
+ 9108 "00000111" // /* MW 2 */
+ 9109 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6 first
+ 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9111 "11000001" // /* MW 5 */
+ 9112 "10110101" // /* MW 4 */
+ 9113 "11011000" // /* MW 3 */
+ 9114 "11000010" // /* MW 2 */
+ 9115 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 176
+ 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9117 "00000001" // /* MW 5 */
+ 9118 "00000000" // /* MW 4 */
+ 9119 "00000000" // /* MW 3 */
+ 9120 "00001000" // /* MW 2 */
+ 9121 "00000000" // /* MW 1 */
+ 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9123 "01110000" // /* MW 7 */
+ 9124 "11010000" // /* MW 6 */
+ 9125 "00001011" // /* MW 5 */
+ 9126 "00000000" // /* MW 4 */
+ 9127 "10110000" // /* MW 3 */
+ 9128 "01100011" // /* MW 2 */
+ 9129 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+ 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9131 "00010001" // /* MW 9 */
+ 9132 "00101000" // /* MW 8 */
+ 9133 "00110010" // /* MW 7 */
+ 9134 "11110011" // /* MW 6 */
+ 9135 "00000001" // /* MW 5 */
+ 9136 "00000000" // /* MW 4 */
+ 9137 "10110000" // /* MW 3 */
+ 9138 "10000010" // /* MW 2 */
+ 9139 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9141 "11000000" // /* MW 3 */
+ 9142 "11010100" // /* MW 2 */
+ 9143 "00011011" // /* MW 1 */
+ 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9145 "00000000" // /* MW 1 */
+ 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9147 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6
+.src_ref 7 "superkernels.cpp" 181 16
+ 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */
+ 9149 "00000001" // /* MW 5 */
+ 9150 "01000000" // /* MW 4 */
+ 9151 "00110000" // /* MW 3 */
+ 9152 "00010010" // /* MW 2 */
+ 9153 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 22 first
+.delay_slot
+ 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9155 "10010000" // /* MW 3 */
+ 9156 "01100010" // /* MW 2 */
+ 9157 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 30
+.delay_slot
+ 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9159 "11111011" // /* MW 3 */
+ 9160 "01100011" // /* MW 2 */
+ 9161 "00010100" // /* MW 1 */
+.delay_slot
+ 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9163 "00111101" // /* MW 3 */
+ 9164 "11110100" // /* MW 2 */
+ 9165 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9167 "01110000" // /* MW 7 */
+ 9168 "01100000" // /* MW 6 */
+ 9169 "00110000" // /* MW 5 */
+ 9170 "00000011" // /* MW 4 */
+ 9171 "00110000" // /* MW 3 */
+ 9172 "11000110" // /* MW 2 */
+ 9173 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4
+.src_ref 7 "superkernels.cpp" 195 2
+.delay_slot
+ 9174 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9175 "00000000" // /* MW 5 */
+ 9176 "11001011" // /* MW 4 */
+ 9177 "11000000" // /* MW 3 */
+ 9178 "00000111" // /* MW 2 */
+ 9179 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9181 "11010000" // /* MW 5 */
+ 9182 "11001000" // /* MW 4 */
+ 9183 "11000100" // /* MW 3 */
+ 9184 "00000111" // /* MW 2 */
+ 9185 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9187 "00010000" // /* MW 9 */
+ 9188 "00110010" // /* MW 8 */
+ 9189 "00110010" // /* MW 7 */
+ 9190 "11110001" // /* MW 6 */
+ 9191 "00000001" // /* MW 5 */
+ 9192 "00000000" // /* MW 4 */
+ 9193 "11100000" // /* MW 3 */
+ 9194 "11000000" // /* MW 2 */
+ 9195 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9197 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */
+ 9199 "00000001" // /* MW 5 */
+ 9200 "00000000" // /* MW 4 */
+ 9201 "00010000" // /* MW 3 */
+ 9202 "00010001" // /* MW 2 */
+ 9203 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9205 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9207 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9209 "00110001" // /* MW 3 */
+ 9210 "00100000" // /* MW 2 */
+ 9211 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9213 "00000101" // /* MW 3 */
+ 9214 "00100000" // /* MW 2 */
+ 9215 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9217 "00000000" // /* MW 15 */
+ 9218 "00000000" // /* MW 14 */
+ 9219 "01111000" // /* MW 13 */
+ 9220 "10100101" // /* MW 12 */
+ 9221 "00000001" // /* MW 11 */
+ 9222 "00000000" // /* MW 10 */
+ 9223 "00000000" // /* MW 9 */
+ 9224 "10000000" // /* MW 8 */
+ 9225 "00010001" // /* MW 7 */
+ 9226 "00000110" // /* MW 6 */
+ 9227 "00100010" // /* MW 5 */
+ 9228 "00000000" // /* MW 4 */
+ 9229 "11110000" // /* MW 3 */
+ 9230 "00101100" // /* MW 2 */
+ 9231 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18
+.return_address
+ 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9233 "10100000" // /* MW 5 */
+ 9234 "11001000" // /* MW 4 */
+ 9235 "11000100" // /* MW 3 */
+ 9236 "00000111" // /* MW 2 */
+ 9237 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18 first
+.src_ref 7 "superkernels.cpp" 188 43
+ 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9239 "00010000" // /* MW 9 */
+ 9240 "11000000" // /* MW 8 */
+ 9241 "00110010" // /* MW 7 */
+ 9242 "11110001" // /* MW 6 */
+ 9243 "00000001" // /* MW 5 */
+ 9244 "00000000" // /* MW 4 */
+ 9245 "11010000" // /* MW 3 */
+ 9246 "11000010" // /* MW 2 */
+ 9247 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29
+.src_ref 7 "superkernels.cpp" 188 43
+.src_ref 7 "superkernels.cpp" 195 2
+ 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9249 "00010000" // /* MW 9 */
+ 9250 "11000000" // /* MW 8 */
+ 9251 "00110010" // /* MW 7 */
+ 9252 "11110001" // /* MW 6 */
+ 9253 "00000001" // /* MW 5 */
+ 9254 "00000000" // /* MW 4 */
+ 9255 "11010000" // /* MW 3 */
+ 9256 "11000110" // /* MW 2 */
+ 9257 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29 first
+.src_ref 7 "superkernels.cpp" 188 16
+.src_ref 7 "superkernels.cpp" 193 47
+ 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9259 "00010000" // /* MW 9 */
+ 9260 "00101010" // /* MW 8 */
+ 9261 "10110010" // /* MW 7 */
+ 9262 "11110000" // /* MW 6 */
+ 9263 "00000001" // /* MW 5 */
+ 9264 "00000000" // /* MW 4 */
+ 9265 "01010000" // /* MW 3 */
+ 9266 "11001011" // /* MW 2 */
+ 9267 "01001000" // /* MW 1 */
+ 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9269 "00000000" // /* MW 1 */
+ 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9271 "00000000" // /* MW 1 */
+ 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */
+ 9273 "00000000" // /* MW 5 */
+ 9274 "00000000" // /* MW 4 */
+ 9275 "00111000" // /* MW 3 */
+ 9276 "00010010" // /* MW 2 */
+ 9277 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13
+.delay_slot
+ 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9279 "11000000" // /* MW 5 */
+ 9280 "11001000" // /* MW 4 */
+ 9281 "11000000" // /* MW 3 */
+ 9282 "00000111" // /* MW 2 */
+ 9283 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9285 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 27 first
+.delay_slot
+ 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9287 "00001111" // /* MW 3 */
+ 9288 "01100001" // /* MW 2 */
+ 9289 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13 first
+.delay_slot
+ 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9291 "10100011" // /* MW 5 */
+ 9292 "00001100" // /* MW 4 */
+ 9293 "11110000" // /* MW 3 */
+ 9294 "00101100" // /* MW 2 */
+ 9295 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 16 first
+.delay_slot
+ 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9297 "00000000" // /* MW 15 */
+ 9298 "00000000" // /* MW 14 */
+ 9299 "01111000" // /* MW 13 */
+ 9300 "10100101" // /* MW 12 */
+ 9301 "00000001" // /* MW 11 */
+ 9302 "00000000" // /* MW 10 */
+ 9303 "00000000" // /* MW 9 */
+ 9304 "10000000" // /* MW 8 */
+ 9305 "00010001" // /* MW 7 */
+ 9306 "00000110" // /* MW 6 */
+ 9307 "00100001" // /* MW 5 */
+ 9308 "00000000" // /* MW 4 */
+ 9309 "11110000" // /* MW 3 */
+ 9310 "00101100" // /* MW 2 */
+ 9311 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 193 47
+.src_ref 7 "superkernels.cpp" 195 2
+ 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9313 "00000000" // /* MW 15 */
+ 9314 "00000000" // /* MW 14 */
+ 9315 "00010000" // /* MW 13 */
+ 9316 "00101010" // /* MW 12 */
+ 9317 "10110010" // /* MW 11 */
+ 9318 "11110000" // /* MW 10 */
+ 9319 "00000001" // /* MW 9 */
+ 9320 "00000000" // /* MW 8 */
+ 9321 "10001011" // /* MW 7 */
+ 9322 "10000000" // /* MW 6 */
+ 9323 "00100010" // /* MW 5 */
+ 9324 "00000000" // /* MW 4 */
+ 9325 "11110000" // /* MW 3 */
+ 9326 "00101100" // /* MW 2 */
+ 9327 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9329 "00000000" // /* MW 7 */
+ 9330 "11000011" // /* MW 6 */
+ 9331 "10110011" // /* MW 5 */
+ 9332 "00000011" // /* MW 4 */
+ 9333 "01100000" // /* MW 3 */
+ 9334 "10010001" // /* MW 2 */
+ 9335 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9337 "00010000" // /* MW 9 */
+ 9338 "00100000" // /* MW 8 */
+ 9339 "00110010" // /* MW 7 */
+ 9340 "11110000" // /* MW 6 */
+ 9341 "00000001" // /* MW 5 */
+ 9342 "00000000" // /* MW 4 */
+ 9343 "11010000" // /* MW 3 */
+ 9344 "11101110" // /* MW 2 */
+ 9345 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9347 "00010110" // /* MW 3 */
+ 9348 "11111110" // /* MW 2 */
+ 9349 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9351 "00110110" // /* MW 3 */
+ 9352 "11111110" // /* MW 2 */
+ 9353 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9355 "01010110" // /* MW 3 */
+ 9356 "01000110" // /* MW 2 */
+ 9357 "00000111" // /* MW 1 */
+ 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9359 "00000000" // /* MW 1 */
+ 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9361 "00000000" // /* MW 1 */
+ 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9363 "00000000" // /* MW 1 */
+ 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9365 "00000000" // /* MW 1 */
+ 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9367 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9369 "00000010" // /* MW 3 */
+ 9370 "01100001" // /* MW 2 */
+ 9371 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9373 "00010001" // /* MW 3 */
+ 9374 "00000110" // /* MW 2 */
+ 9375 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9377 "11111101" // /* MW 3 */
+ 9378 "11100000" // /* MW 2 */
+ 9379 "00010111" // /* MW 1 */
+ 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9381 "00000000" // /* MW 1 */
+ 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9383 "00000000" // /* MW 1 */
+ 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9385 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9387 "00001000" // /* MW 3 */
+ 9388 "10010011" // /* MW 2 */
+ 9389 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+ 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9391 "10000001" // /* MW 5 */
+ 9392 "10101101" // /* MW 4 */
+ 9393 "10100111" // /* MW 3 */
+ 9394 "00000000" // /* MW 2 */
+ 9395 "00000100" // /* MW 1 */
+ 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9397 "00000000" // /* MW 1 */
+ 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9399 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+ 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9401 "00110110" // /* MW 3 */
+ 9402 "00000110" // /* MW 2 */
+ 9403 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9405 "10000001" // /* MW 5 */
+ 9406 "11011101" // /* MW 4 */
+ 9407 "11011100" // /* MW 3 */
+ 9408 "11001010" // /* MW 2 */
+ 9409 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 47 first
+ 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9411 "01110110" // /* MW 3 */
+ 9412 "00000110" // /* MW 2 */
+ 9413 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9415 "10011110" // /* MW 3 */
+ 9416 "01011100" // /* MW 2 */
+ 9417 "00000111" // /* MW 1 */
+ 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9419 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 195 2 first
+.no_stack_arguments
+ 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */
+ 9421 "00000001" // /* MW 5 */
+ 9422 "00000000" // /* MW 4 */
+ 9423 "01001000" // /* MW 3 */
+ 9424 "00010001" // /* MW 2 */
+ 9425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9427 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+.delay_slot
+ 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9429 "00000111" // /* MW 3 */
+ 9430 "01100010" // /* MW 2 */
+ 9431 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.delay_slot
+ 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9433 "00110001" // /* MW 3 */
+ 9434 "00000110" // /* MW 2 */
+ 9435 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45 first
+.delay_slot
+ 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9437 "00001101" // /* MW 3 */
+ 9438 "11100001" // /* MW 2 */
+ 9439 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+.delay_slot
+ 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9441 "00000000" // /* MW 15 */
+ 9442 "00000000" // /* MW 14 */
+ 9443 "10101000" // /* MW 13 */
+ 9444 "10100000" // /* MW 12 */
+ 9445 "00110100" // /* MW 11 */
+ 9446 "00000000" // /* MW 10 */
+ 9447 "00000000" // /* MW 9 */
+ 9448 "00000000" // /* MW 8 */
+ 9449 "01011011" // /* MW 7 */
+ 9450 "00000001" // /* MW 6 */
+ 9451 "00100000" // /* MW 5 */
+ 9452 "00000000" // /* MW 4 */
+ 9453 "11110000" // /* MW 3 */
+ 9454 "00101100" // /* MW 2 */
+ 9455 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+.src_ref 7 "superkernels.cpp" 198 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9457 "00010000" // /* MW 9 */
+ 9458 "00100000" // /* MW 8 */
+ 9459 "00110010" // /* MW 7 */
+ 9460 "11110011" // /* MW 6 */
+ 9461 "00000001" // /* MW 5 */
+ 9462 "00000000" // /* MW 4 */
+ 9463 "11010000" // /* MW 3 */
+ 9464 "11000110" // /* MW 2 */
+ 9465 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9467 "00000101" // /* MW 3 */
+ 9468 "00100000" // /* MW 2 */
+ 9469 "00010000" // /* MW 1 */
+ 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9471 "00000000" // /* MW 1 */
+ 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9473 "00000000" // /* MW 1 */
+ 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9475 "00000000" // /* MW 1 */
+ 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9477 "00000000" // /* MW 1 */
+ 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9479 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9481 "00001000" // /* MW 3 */
+ 9482 "01010001" // /* MW 2 */
+ 9483 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9485 "00010000" // /* MW 9 */
+ 9486 "00110000" // /* MW 8 */
+ 9487 "00110010" // /* MW 7 */
+ 9488 "11110001" // /* MW 6 */
+ 9489 "00000001" // /* MW 5 */
+ 9490 "00000000" // /* MW 4 */
+ 9491 "11010000" // /* MW 3 */
+ 9492 "11001110" // /* MW 2 */
+ 9493 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6 first
+ 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9495 "00110110" // /* MW 3 */
+ 9496 "00000110" // /* MW 2 */
+ 9497 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+ 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9499 "01010110" // /* MW 3 */
+ 9500 "00000110" // /* MW 2 */
+ 9501 "00000010" // /* MW 1 */
+ 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9503 "00000000" // /* MW 1 */
+ 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9505 "00000000" // /* MW 1 */
+ 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9507 "00000000" // /* MW 1 */
+ 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9509 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9511 "00110001" // /* MW 3 */
+ 9512 "00100001" // /* MW 2 */
+ 9513 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9515 "00010001" // /* MW 3 */
+ 9516 "11100110" // /* MW 2 */
+ 9517 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 16 first
+ 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9519 "00101000" // /* MW 3 */
+ 9520 "01100001" // /* MW 2 */
+ 9521 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+ 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */
+ 9523 "00000001" // /* MW 5 */
+ 9524 "01000000" // /* MW 4 */
+ 9525 "10101000" // /* MW 3 */
+ 9526 "00010010" // /* MW 2 */
+ 9527 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9535 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9537 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14
+ 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9539 "00000001" // /* MW 3 */
+ 9540 "00100000" // /* MW 2 */
+ 9541 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14 first
+ 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9543 "00000000" // /* MW 9 */
+ 9544 "00000000" // /* MW 8 */
+ 9545 "00000000" // /* MW 7 */
+ 9546 "10000000" // /* MW 6 */
+ 9547 "00010001" // /* MW 5 */
+ 9548 "00000110" // /* MW 4 */
+ 9549 "11110110" // /* MW 3 */
+ 9550 "00101100" // /* MW 2 */
+ 9551 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 200
+ 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9553 "00111001" // /* MW 3 */
+ 9554 "11110100" // /* MW 2 */
+ 9555 "00000111" // /* MW 1 */
+ 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9557 "00011001" // /* MW 3 */
+ 9558 "11111011" // /* MW 2 */
+ 9559 "00000111" // /* MW 1 */
+ 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9561 "00000000" // /* MW 1 */
+ 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9563 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9565 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9567 "11110001" // /* MW 3 */
+ 9568 "11111101" // /* MW 2 */
+ 9569 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9571 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9573 "00000000" // /* MW 3 */
+ 9574 "00101000" // /* MW 2 */
+ 9575 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9577 "10100000" // /* MW 3 */
+ 9578 "01100111" // /* MW 2 */
+ 9579 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200
+.delay_slot
+ 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9581 "00000001" // /* MW 5 */
+ 9582 "00000000" // /* MW 4 */
+ 9583 "00000000" // /* MW 3 */
+ 9584 "11111000" // /* MW 2 */
+ 9585 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9587 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9589 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 9591 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 205 first
+.src_ref 3 "elementwise_binary_shared.h" 211 24 first
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.function_start
+ 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9601 "01011000" // /* MW 9 */
+ 9602 "00000000" // /* MW 8 */
+ 9603 "00001000" // /* MW 7 */
+ 9604 "00001011" // /* MW 6 */
+ 9605 "00100000" // /* MW 5 */
+ 9606 "00001000" // /* MW 4 */
+ 9607 "11010000" // /* MW 3 */
+ 9608 "10000101" // /* MW 2 */
+ 9609 "00100011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+ 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9611 "00000001" // /* MW 3 */
+ 9612 "10000000" // /* MW 2 */
+ 9613 "00010111" // /* MW 1 */
+ 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9615 "00000000" // /* MW 1 */
+ 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9617 "00000000" // /* MW 1 */
+ 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9619 "00000000" // /* MW 1 */
+ 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9621 "00000000" // /* MW 1 */
+ 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9623 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 211 22 first
+ 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9625 "00101001" // /* MW 3 */
+ 9626 "00011100" // /* MW 2 */
+ 9627 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 24 first
+ 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9629 "00101110" // /* MW 3 */
+ 9630 "00011100" // /* MW 2 */
+ 9631 "00000001" // /* MW 1 */
+ 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9633 "00000000" // /* MW 1 */
+ 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9635 "00000000" // /* MW 1 */
+ 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9637 "00000000" // /* MW 1 */
+ 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9639 "00000000" // /* MW 1 */
+ 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9641 "00000000" // /* MW 1 */
+ 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9643 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 22
+ 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9645 "00101001" // /* MW 3 */
+ 9646 "00011100" // /* MW 2 */
+ 9647 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 24 first
+ 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9649 "00101110" // /* MW 3 */
+ 9650 "00000100" // /* MW 2 */
+ 9651 "00000001" // /* MW 1 */
+ 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9653 "00000000" // /* MW 1 */
+ 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9655 "00000000" // /* MW 1 */
+ 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9657 "00000000" // /* MW 1 */
+ 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9659 "00000000" // /* MW 1 */
+ 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9661 "00000000" // /* MW 1 */
+ 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9663 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 22
+ 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9665 "00101001" // /* MW 3 */
+ 9666 "00011100" // /* MW 2 */
+ 9667 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 24 first
+ 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9669 "01110110" // /* MW 3 */
+ 9670 "00010100" // /* MW 2 */
+ 9671 "00000001" // /* MW 1 */
+ 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9673 "00000000" // /* MW 1 */
+ 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9675 "00000000" // /* MW 1 */
+ 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9677 "00000000" // /* MW 1 */
+ 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9679 "00000000" // /* MW 1 */
+ 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9681 "00000000" // /* MW 1 */
+ 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9683 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 22
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9685 "01110001" // /* MW 3 */
+ 9686 "01001100" // /* MW 2 */
+ 9687 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 34 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9689 "00010111" // /* MW 3 */
+ 9690 "00000100" // /* MW 2 */
+ 9691 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 217 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9693 "00000000" // /* MW 3 */
+ 9694 "00101000" // /* MW 2 */
+ 9695 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9697 "00000000" // /* MW 5 */
+ 9698 "10111110" // /* MW 4 */
+ 9699 "11110000" // /* MW 3 */
+ 9700 "00000000" // /* MW 2 */
+ 9701 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9703 "00010100" // /* MW 3 */
+ 9704 "11000010" // /* MW 2 */
+ 9705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9707 "00100111" // /* MW 3 */
+ 9708 "01110110" // /* MW 2 */
+ 9709 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9711 "10000010" // /* MW 3 */
+ 9712 "00000001" // /* MW 2 */
+ 9713 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9715 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 219
+.src_ref 3 "elementwise_binary_shared.h" 219 first
+.function_start
+ 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9729 "00000001" // /* MW 5 */
+ 9730 "00000000" // /* MW 4 */
+ 9731 "00000000" // /* MW 3 */
+ 9732 "00001000" // /* MW 2 */
+ 9733 "00000000" // /* MW 1 */
+ 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9735 "00111101" // /* MW 3 */
+ 9736 "11111000" // /* MW 2 */
+ 9737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8 first
+.no_stack_arguments
+ 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */
+ 9739 "00000001" // /* MW 5 */
+ 9740 "00000000" // /* MW 4 */
+ 9741 "11000000" // /* MW 3 */
+ 9742 "00010010" // /* MW 2 */
+ 9743 "00000000" // /* MW 1 */
+.delay_slot
+ 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9745 "10011101" // /* MW 3 */
+ 9746 "11111111" // /* MW 2 */
+ 9747 "00001111" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+ 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9749 "11000000" // /* MW 3 */
+ 9750 "01100000" // /* MW 2 */
+ 9751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9757 "01100111" // /* MW 3 */
+ 9758 "00000001" // /* MW 2 */
+ 9759 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.return_address
+ 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9761 "00111001" // /* MW 3 */
+ 9762 "11111000" // /* MW 2 */
+ 9763 "00000111" // /* MW 1 */
+ 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9765 "00000000" // /* MW 1 */
+ 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9767 "00000000" // /* MW 1 */
+ 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9769 "00000000" // /* MW 1 */
+ 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9771 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9773 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9775 "10011001" // /* MW 3 */
+ 9776 "11111111" // /* MW 2 */
+ 9777 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9779 "00000000" // /* MW 3 */
+ 9780 "00101000" // /* MW 2 */
+ 9781 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9783 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9787 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9789 "00001001" // /* MW 3 */
+ 9790 "00100000" // /* MW 2 */
+ 9791 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.src_ref 8 "mul_impl.h" 193 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9793 "01110001" // /* MW 9 */
+ 9794 "00000000" // /* MW 8 */
+ 9795 "00000000" // /* MW 7 */
+ 9796 "00000000" // /* MW 6 */
+ 9797 "11111110" // /* MW 5 */
+ 9798 "00111111" // /* MW 4 */
+ 9799 "00110000" // /* MW 3 */
+ 9800 "11000010" // /* MW 2 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9801 "11101000" // /* MW 1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0
+.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.src_ref 3 "elementwise_binary_shared.h" 107 first
+.src_ref 3 "elementwise_binary_shared.h" 119 37
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.function_start
+ 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9809 "11000000" // /* MW 3 */
+ 9810 "00010110" // /* MW 2 */
+ 9811 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+ 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9813 "00000111" // /* MW 3 */
+ 9814 "01100000" // /* MW 2 */
+ 9815 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 122 22 first
+ 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9817 "01010010" // /* MW 3 */
+ 9818 "00011100" // /* MW 2 */
+ 9819 "00000011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 15 first
+ 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9821 "10010110" // /* MW 3 */
+ 9822 "00000100" // /* MW 2 */
+ 9823 "00000011" // /* MW 1 */
+ 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9825 "00000000" // /* MW 1 */
+ 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9827 "00000000" // /* MW 1 */
+ 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9829 "00000000" // /* MW 1 */
+ 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9831 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9833 "00001001" // /* MW 3 */
+ 9834 "00000110" // /* MW 2 */
+ 9835 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 107
+ 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9837 "00000001" // /* MW 5 */
+ 9838 "00000000" // /* MW 4 */
+ 9839 "00000000" // /* MW 3 */
+ 9840 "00010000" // /* MW 2 */
+ 9841 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9843 "01001100" // /* MW 3 */
+ 9844 "11000110" // /* MW 2 */
+ 9845 "00010000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25
+.src_ref 3 "elementwise_binary_shared.h" 124 8
+ 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */
+ 9847 "01100000" // /* MW 9 */
+ 9848 "00000000" // /* MW 8 */
+ 9849 "00010000" // /* MW 7 */
+ 9850 "11100010" // /* MW 6 */
+ 9851 "00000100" // /* MW 5 */
+ 9852 "00000110" // /* MW 4 */
+ 9853 "00000000" // /* MW 3 */
+ 9854 "00000001" // /* MW 2 */
+ 9855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25 first
+.delay_slot
+ 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9857 "01110010" // /* MW 3 */
+ 9858 "00000101" // /* MW 2 */
+ 9859 "00011000" // /* MW 1 */
+.delay_slot
+ 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9861 "11000000" // /* MW 3 */
+ 9862 "01011110" // /* MW 2 */
+ 9863 "00011000" // /* MW 1 */
+.delay_slot
+ 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9865 "11100000" // /* MW 3 */
+ 9866 "01100101" // /* MW 2 */
+ 9867 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9869 "10000001" // /* MW 5 */
+ 9870 "11011101" // /* MW 4 */
+ 9871 "00001010" // /* MW 3 */
+ 9872 "11110010" // /* MW 2 */
+ 9873 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+.delay_slot
+ 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9875 "00010011" // /* MW 3 */
+ 9876 "00000100" // /* MW 2 */
+ 9877 "00001111" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+ 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9879 "01110010" // /* MW 9 */
+ 9880 "10111001" // /* MW 8 */
+ 9881 "00000100" // /* MW 7 */
+ 9882 "00000000" // /* MW 6 */
+ 9883 "00001011" // /* MW 5 */
+ 9884 "10000000" // /* MW 4 */
+ 9885 "10000100" // /* MW 3 */
+ 9886 "10000010" // /* MW 2 */
+ 9887 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 126 34 first
+.src_ref 3 "elementwise_binary_shared.h" 131 19 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9889 "00000001" // /* MW 5 */
+ 9890 "00000001" // /* MW 4 */
+ 9891 "01010100" // /* MW 3 */
+ 9892 "00000001" // /* MW 2 */
+ 9893 "10000000" // /* MW 1 */
+ 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9895 "00000000" // /* MW 1 */
+ 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9897 "00000000" // /* MW 1 */
+ 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9899 "00000000" // /* MW 1 */
+ 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9901 "00000000" // /* MW 1 */
+ 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9903 "00000000" // /* MW 1 */
+ 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9905 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 131 12
+.src_ref 3 "elementwise_binary_shared.h" 131 35
+ 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */
+ 9907 "00000001" // /* MW 5 */
+ 9908 "01000000" // /* MW 4 */
+ 9909 "01110000" // /* MW 3 */
+ 9910 "00010011" // /* MW 2 */
+ 9911 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9913 "00000000" // /* MW 3 */
+ 9914 "00000000" // /* MW 2 */
+ 9915 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9917 "11010000" // /* MW 5 */
+ 9918 "11001000" // /* MW 4 */
+ 9919 "11001000" // /* MW 3 */
+ 9920 "00000111" // /* MW 2 */
+ 9921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9927 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */
+ 9929 "00100000" // /* MW 9 */
+ 9930 "00000000" // /* MW 8 */
+ 9931 "00000000" // /* MW 7 */
+ 9932 "11011110" // /* MW 6 */
+ 9933 "00000100" // /* MW 5 */
+ 9934 "00000000" // /* MW 4 */
+ 9935 "10000000" // /* MW 3 */
+ 9936 "00000100" // /* MW 2 */
+ 9937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9945 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9947 "00100110" // /* MW 5 */
+ 9948 "00001000" // /* MW 4 */
+ 9949 "11110000" // /* MW 3 */
+ 9950 "00101100" // /* MW 2 */
+ 9951 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9953 "10000000" // /* MW 3 */
+ 9954 "00000000" // /* MW 2 */
+ 9955 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9957 "01010000" // /* MW 11 */
+ 9958 "00000000" // /* MW 10 */
+ 9959 "00000000" // /* MW 9 */
+ 9960 "00000001" // /* MW 8 */
+ 9961 "00010011" // /* MW 7 */
+ 9962 "00000100" // /* MW 6 */
+ 9963 "00100001" // /* MW 5 */
+ 9964 "00000000" // /* MW 4 */
+ 9965 "11110000" // /* MW 3 */
+ 9966 "00101100" // /* MW 2 */
+ 9967 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160
+ 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */
+ 9969 "00000000" // /* MW 5 */
+ 9970 "00000000" // /* MW 4 */
+ 9971 "11001000" // /* MW 3 */
+ 9972 "00010011" // /* MW 2 */
+ 9973 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9975 "01110000" // /* MW 7 */
+ 9976 "01100000" // /* MW 6 */
+ 9977 "10110000" // /* MW 5 */
+ 9978 "00000011" // /* MW 4 */
+ 9979 "01100000" // /* MW 3 */
+ 9980 "10010001" // /* MW 2 */
+ 9981 "00010011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9983 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9985 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9987 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9989 "10000001" // /* MW 11 */
+ 9990 "10101101" // /* MW 10 */
+ 9991 "00000000" // /* MW 9 */
+ 9992 "00000000" // /* MW 8 */
+ 9993 "00000000" // /* MW 7 */
+ 9994 "00000000" // /* MW 6 */
+ 9995 "00100000" // /* MW 5 */
+ 9996 "00000000" // /* MW 4 */
+ 9997 "11110000" // /* MW 3 */
+ 9998 "00101100" // /* MW 2 */
+ 9999 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192
+.src_ref 3 "elementwise_binary_shared.h" 150 97
+ 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10001 "00001101" // /* MW 3 */
+ 10002 "00000100" // /* MW 2 */
+ 10003 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 97 first
+ 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10005 "01000111" // /* MW 3 */
+ 10006 "10000100" // /* MW 2 */
+ 10007 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */
+ 10009 "00000001" // /* MW 5 */
+ 10010 "01000000" // /* MW 4 */
+ 10011 "10100000" // /* MW 3 */
+ 10012 "00010011" // /* MW 2 */
+ 10013 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10015 "00000000" // /* MW 5 */
+ 10016 "00100000" // /* MW 4 */
+ 10017 "00000000" // /* MW 3 */
+ 10018 "10000000" // /* MW 2 */
+ 10019 "00111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10021 "11010000" // /* MW 5 */
+ 10022 "11001000" // /* MW 4 */
+ 10023 "11001000" // /* MW 3 */
+ 10024 "00000111" // /* MW 2 */
+ 10025 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10027 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10029 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10031 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10033 "00000000" // /* MW 15 */
+ 10034 "00000000" // /* MW 14 */
+ 10035 "00010000" // /* MW 13 */
+ 10036 "00000000" // /* MW 12 */
+ 10037 "00001000" // /* MW 11 */
+ 10038 "00000000" // /* MW 10 */
+ 10039 "11100000" // /* MW 9 */
+ 10040 "00101111" // /* MW 8 */
+ 10041 "01011011" // /* MW 7 */
+ 10042 "00000001" // /* MW 6 */
+ 10043 "00100000" // /* MW 5 */
+ 10044 "00000000" // /* MW 4 */
+ 10045 "11110000" // /* MW 3 */
+ 10046 "00101100" // /* MW 2 */
+ 10047 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10049 "01011000" // /* MW 9 */
+ 10050 "10111110" // /* MW 8 */
+ 10051 "01000111" // /* MW 7 */
+ 10052 "00000000" // /* MW 6 */
+ 10053 "11010010" // /* MW 5 */
+ 10054 "00000010" // /* MW 4 */
+ 10055 "01010000" // /* MW 3 */
+ 10056 "10000000" // /* MW 2 */
+ 10057 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10059 "10000000" // /* MW 3 */
+ 10060 "00000000" // /* MW 2 */
+ 10061 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10063 "00000000" // /* MW 3 */
+ 10064 "00000000" // /* MW 2 */
+ 10065 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10067 "10000000" // /* MW 3 */
+ 10068 "00000000" // /* MW 2 */
+ 10069 "00011010" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10071 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10073 "00010001" // /* MW 3 */
+ 10074 "00000000" // /* MW 2 */
+ 10075 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10077 "00100101" // /* MW 5 */
+ 10078 "00000001" // /* MW 4 */
+ 10079 "11100010" // /* MW 3 */
+ 10080 "00000010" // /* MW 2 */
+ 10081 "10100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10083 "10000000" // /* MW 3 */
+ 10084 "00111010" // /* MW 2 */
+ 10085 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10087 "10010110" // /* MW 3 */
+ 10088 "01000000" // /* MW 2 */
+ 10089 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10091 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10093 "00000001" // /* MW 3 */
+ 10094 "00000001" // /* MW 2 */
+ 10095 "00011000" // /* MW 1 */
+ 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10097 "00000000" // /* MW 1 */
+ 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10099 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10101 "00010010" // /* MW 3 */
+ 10102 "00000000" // /* MW 2 */
+ 10103 "00000101" // /* MW 1 */
+ 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10105 "00000000" // /* MW 1 */
+ 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10107 "00000000" // /* MW 1 */
+ 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10109 "00000000" // /* MW 1 */
+ 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10111 "00000000" // /* MW 1 */
+ 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10113 "00000000" // /* MW 1 */
+ 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10115 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10117 "01110010" // /* MW 3 */
+ 10118 "00000001" // /* MW 2 */
+ 10119 "00011000" // /* MW 1 */
+ 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10121 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10123 "01100110" // /* MW 5 */
+ 10124 "11111000" // /* MW 4 */
+ 10125 "11111111" // /* MW 3 */
+ 10126 "00101100" // /* MW 2 */
+ 10127 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 166 4 first
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+ 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10129 "00010000" // /* MW 11 */
+ 10130 "00000000" // /* MW 10 */
+ 10131 "01111100" // /* MW 9 */
+ 10132 "00001000" // /* MW 8 */
+ 10133 "00000000" // /* MW 7 */
+ 10134 "00000000" // /* MW 6 */
+ 10135 "11101000" // /* MW 5 */
+ 10136 "01010000" // /* MW 4 */
+ 10137 "11011110" // /* MW 3 */
+ 10138 "10001010" // /* MW 2 */
+ 10139 "01111000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10141 "00010000" // /* MW 11 */
+ 10142 "00011000" // /* MW 10 */
+ 10143 "10111100" // /* MW 9 */
+ 10144 "00001001" // /* MW 8 */
+ 10145 "00000000" // /* MW 7 */
+ 10146 "00000000" // /* MW 6 */
+ 10147 "01101000" // /* MW 5 */
+ 10148 "10010000" // /* MW 4 */
+ 10149 "00000010" // /* MW 3 */
+ 10150 "01100011" // /* MW 2 */
+ 10151 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 177 44
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10153 "11110001" // /* MW 7 */
+ 10154 "00000000" // /* MW 6 */
+ 10155 "11101000" // /* MW 5 */
+ 10156 "01010000" // /* MW 4 */
+ 10157 "01111110" // /* MW 3 */
+ 10158 "00000101" // /* MW 2 */
+ 10159 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10161 "01101000" // /* MW 5 */
+ 10162 "10010000" // /* MW 4 */
+ 10163 "01010010" // /* MW 3 */
+ 10164 "10010000" // /* MW 2 */
+ 10165 "10000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10167 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10169 "00101011" // /* MW 3 */
+ 10170 "00001000" // /* MW 2 */
+ 10171 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10175 "00111101" // /* MW 3 */
+ 10176 "10000100" // /* MW 2 */
+ 10177 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10179 "00000001" // /* MW 7 */
+ 10180 "00000010" // /* MW 6 */
+ 10181 "00000001" // /* MW 5 */
+ 10182 "10000110" // /* MW 4 */
+ 10183 "01111110" // /* MW 3 */
+ 10184 "01110001" // /* MW 2 */
+ 10185 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10187 "11101000" // /* MW 5 */
+ 10188 "01010000" // /* MW 4 */
+ 10189 "01111110" // /* MW 3 */
+ 10190 "00000011" // /* MW 2 */
+ 10191 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10193 "00000000" // /* MW 15 */
+ 10194 "00000000" // /* MW 14 */
+ 10195 "01111000" // /* MW 13 */
+ 10196 "10100101" // /* MW 12 */
+ 10197 "00000001" // /* MW 11 */
+ 10198 "00000000" // /* MW 10 */
+ 10199 "11010100" // /* MW 9 */
+ 10200 "00001001" // /* MW 8 */
+ 10201 "01011011" // /* MW 7 */
+ 10202 "00000001" // /* MW 6 */
+ 10203 "00100000" // /* MW 5 */
+ 10204 "00000000" // /* MW 4 */
+ 10205 "01110000" // /* MW 3 */
+ 10206 "00000101" // /* MW 2 */
+ 10207 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10209 "00000000" // /* MW 15 */
+ 10210 "00000000" // /* MW 14 */
+ 10211 "01111000" // /* MW 13 */
+ 10212 "10100101" // /* MW 12 */
+ 10213 "00000001" // /* MW 11 */
+ 10214 "00000000" // /* MW 10 */
+ 10215 "00000000" // /* MW 9 */
+ 10216 "00000000" // /* MW 8 */
+ 10217 "01011011" // /* MW 7 */
+ 10218 "00000001" // /* MW 6 */
+ 10219 "00100000" // /* MW 5 */
+ 10220 "00000000" // /* MW 4 */
+ 10221 "11110000" // /* MW 3 */
+ 10222 "00101100" // /* MW 2 */
+ 10223 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10225 "00010000" // /* MW 15 */
+ 10226 "00001000" // /* MW 14 */
+ 10227 "01111000" // /* MW 13 */
+ 10228 "10100101" // /* MW 12 */
+ 10229 "00000001" // /* MW 11 */
+ 10230 "00000000" // /* MW 10 */
+ 10231 "00000000" // /* MW 9 */
+ 10232 "00000000" // /* MW 8 */
+ 10233 "01011011" // /* MW 7 */
+ 10234 "00000001" // /* MW 6 */
+ 10235 "00100000" // /* MW 5 */
+ 10236 "00000000" // /* MW 4 */
+ 10237 "11110000" // /* MW 3 */
+ 10238 "00101100" // /* MW 2 */
+ 10239 "00000000" // /* MW 1 */
+.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10241 "00000000" // /* MW 15 */
+ 10242 "00000000" // /* MW 14 */
+ 10243 "01111000" // /* MW 13 */
+ 10244 "10100101" // /* MW 12 */
+ 10245 "00000001" // /* MW 11 */
+ 10246 "00000000" // /* MW 10 */
+ 10247 "00000000" // /* MW 9 */
+ 10248 "00000000" // /* MW 8 */
+ 10249 "01011011" // /* MW 7 */
+ 10250 "00000001" // /* MW 6 */
+ 10251 "11101000" // /* MW 5 */
+ 10252 "01010000" // /* MW 4 */
+ 10253 "01111110" // /* MW 3 */
+ 10254 "00000011" // /* MW 2 */
+ 10255 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10257 "00000000" // /* MW 15 */
+ 10258 "00000000" // /* MW 14 */
+ 10259 "01111000" // /* MW 13 */
+ 10260 "10100101" // /* MW 12 */
+ 10261 "00000001" // /* MW 11 */
+ 10262 "00000000" // /* MW 10 */
+ 10263 "00000000" // /* MW 9 */
+ 10264 "00000000" // /* MW 8 */
+ 10265 "10100011" // /* MW 7 */
+ 10266 "00011100" // /* MW 6 */
+ 10267 "00100010" // /* MW 5 */
+ 10268 "00000000" // /* MW 4 */
+ 10269 "01110000" // /* MW 3 */
+ 10270 "00000101" // /* MW 2 */
+ 10271 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10273 "00000000" // /* MW 15 */
+ 10274 "00000000" // /* MW 14 */
+ 10275 "01111000" // /* MW 13 */
+ 10276 "10100101" // /* MW 12 */
+ 10277 "00000001" // /* MW 11 */
+ 10278 "00000000" // /* MW 10 */
+ 10279 "00000000" // /* MW 9 */
+ 10280 "00000000" // /* MW 8 */
+ 10281 "01011011" // /* MW 7 */
+ 10282 "00000001" // /* MW 6 */
+ 10283 "00100000" // /* MW 5 */
+ 10284 "00000000" // /* MW 4 */
+ 10285 "11110000" // /* MW 3 */
+ 10286 "00101100" // /* MW 2 */
+ 10287 "00000000" // /* MW 1 */
+.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10289 "00010000" // /* MW 15 */
+ 10290 "00001000" // /* MW 14 */
+ 10291 "01111000" // /* MW 13 */
+ 10292 "10100101" // /* MW 12 */
+ 10293 "00000001" // /* MW 11 */
+ 10294 "00000000" // /* MW 10 */
+ 10295 "00000000" // /* MW 9 */
+ 10296 "00000000" // /* MW 8 */
+ 10297 "01011011" // /* MW 7 */
+ 10298 "00000001" // /* MW 6 */
+ 10299 "00100000" // /* MW 5 */
+ 10300 "00000000" // /* MW 4 */
+ 10301 "11110000" // /* MW 3 */
+ 10302 "00101100" // /* MW 2 */
+ 10303 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10305 "00000001" // /* MW 5 */
+ 10306 "00000000" // /* MW 4 */
+ 10307 "00000000" // /* MW 3 */
+ 10308 "11110000" // /* MW 2 */
+ 10309 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10311 "10100011" // /* MW 3 */
+ 10312 "00011100" // /* MW 2 */
+ 10313 "00001010" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10315 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10317 "00000001" // /* MW 3 */
+ 10318 "00000010" // /* MW 2 */
+ 10319 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10321 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10323 "00000000" // /* MW 3 */
+ 10324 "00101000" // /* MW 2 */
+ 10325 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10327 "10100011" // /* MW 3 */
+ 10328 "00011100" // /* MW 2 */
+ 10329 "00001010" // /* MW 1 */
+.delay_slot
+ 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10331 "10100000" // /* MW 3 */
+ 10332 "01100000" // /* MW 2 */
+ 10333 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10335 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.delay_slot
+ 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10337 "10100011" // /* MW 3 */
+ 10338 "00011100" // /* MW 2 */
+ 10339 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0
+ 10341 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 3 "elementwise_binary_shared.h" 237 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.function_start
+ 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10353 "01110010" // /* MW 9 */
+ 10354 "11110000" // /* MW 8 */
+ 10355 "01100000" // /* MW 7 */
+ 10356 "00000000" // /* MW 6 */
+ 10357 "10001011" // /* MW 5 */
+ 10358 "10001000" // /* MW 4 */
+ 10359 "10000011" // /* MW 3 */
+ 10360 "10000010" // /* MW 2 */
+ 10361 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19 first
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+ 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10363 "10000001" // /* MW 5 */
+ 10364 "11000101" // /* MW 4 */
+ 10365 "01010100" // /* MW 3 */
+ 10366 "00000001" // /* MW 2 */
+ 10367 "01000000" // /* MW 1 */
+ 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10369 "00000000" // /* MW 1 */
+ 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10371 "00000000" // /* MW 1 */
+ 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10373 "00000000" // /* MW 1 */
+ 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10375 "00000000" // /* MW 1 */
+ 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10377 "00000000" // /* MW 1 */
+ 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10379 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 244 12
+.src_ref 3 "elementwise_binary_shared.h" 244 35
+ 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */
+ 10381 "00000001" // /* MW 5 */
+ 10382 "00000000" // /* MW 4 */
+ 10383 "01101000" // /* MW 3 */
+ 10384 "00010100" // /* MW 2 */
+ 10385 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 237
+.delay_slot
+ 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10387 "00000001" // /* MW 5 */
+ 10388 "00000000" // /* MW 4 */
+ 10389 "00000000" // /* MW 3 */
+ 10390 "00001000" // /* MW 2 */
+ 10391 "00000000" // /* MW 1 */
+.delay_slot
+ 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10393 "11100000" // /* MW 3 */
+ 10394 "01010101" // /* MW 2 */
+ 10395 "00011000" // /* MW 1 */
+.delay_slot
+ 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10397 "11100000" // /* MW 3 */
+ 10398 "01100000" // /* MW 2 */
+ 10399 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+.delay_slot
+ 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10401 "00101011" // /* MW 3 */
+ 10402 "00000111" // /* MW 2 */
+ 10403 "00001001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10405 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 247 12 first
+.no_stack_arguments
+ 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10407 "00000001" // /* MW 5 */
+ 10408 "00000000" // /* MW 4 */
+ 10409 "00101000" // /* MW 3 */
+ 10410 "00010011" // /* MW 2 */
+ 10411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10421 "10000001" // /* MW 11 */
+ 10422 "10101101" // /* MW 10 */
+ 10423 "00000000" // /* MW 9 */
+ 10424 "00000000" // /* MW 8 */
+ 10425 "00000000" // /* MW 7 */
+ 10426 "00000000" // /* MW 6 */
+ 10427 "00100000" // /* MW 5 */
+ 10428 "00000000" // /* MW 4 */
+ 10429 "11110000" // /* MW 3 */
+ 10430 "00101100" // /* MW 2 */
+ 10431 "00000000" // /* MW 1 */
+.return_address
+ 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 10433 "00000000" // /* MW 5 */
+ 10434 "00000000" // /* MW 4 */
+ 10435 "01111000" // /* MW 3 */
+ 10436 "00010100" // /* MW 2 */
+ 10437 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10447 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96
+.src_ref 3 "elementwise_binary_shared.h" 245 12 first
+.no_stack_arguments
+ 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10449 "00000001" // /* MW 5 */
+ 10450 "00000000" // /* MW 4 */
+ 10451 "00101000" // /* MW 3 */
+ 10452 "00010011" // /* MW 2 */
+ 10453 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.delay_slot
+ 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10455 "01110000" // /* MW 7 */
+ 10456 "01100000" // /* MW 6 */
+ 10457 "10110000" // /* MW 5 */
+ 10458 "00000000" // /* MW 4 */
+ 10459 "01100000" // /* MW 3 */
+ 10460 "10010001" // /* MW 2 */
+ 10461 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10463 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10469 "10000001" // /* MW 11 */
+ 10470 "10101101" // /* MW 10 */
+ 10471 "00000000" // /* MW 9 */
+ 10472 "00000000" // /* MW 8 */
+ 10473 "00000000" // /* MW 7 */
+ 10474 "00000000" // /* MW 6 */
+ 10475 "00100000" // /* MW 5 */
+ 10476 "00000000" // /* MW 4 */
+ 10477 "11110000" // /* MW 3 */
+ 10478 "00101100" // /* MW 2 */
+ 10479 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.return_address
+ 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10481 "10000000" // /* MW 3 */
+ 10482 "01110001" // /* MW 2 */
+ 10483 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4 first
+ 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10485 "00000000" // /* MW 3 */
+ 10486 "00101000" // /* MW 2 */
+ 10487 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.delay_slot
+ 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10489 "00000001" // /* MW 5 */
+ 10490 "00000000" // /* MW 4 */
+ 10491 "00000000" // /* MW 3 */
+ 10492 "11111000" // /* MW 2 */
+ 10493 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10495 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0
+ 10501 "00000000" // /* MW 1 */
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 205 first
+.src_ref 7 "superkernels.cpp" 210 6
+.function_start
+ 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10513 "10000000" // /* MW 5 */
+ 10514 "11001000" // /* MW 4 */
+ 10515 "11000110" // /* MW 3 */
+ 10516 "00000111" // /* MW 2 */
+ 10517 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6 first
+ 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10519 "11000001" // /* MW 5 */
+ 10520 "10110101" // /* MW 4 */
+ 10521 "11011000" // /* MW 3 */
+ 10522 "11000010" // /* MW 2 */
+ 10523 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 205
+ 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10525 "00000001" // /* MW 5 */
+ 10526 "00000000" // /* MW 4 */
+ 10527 "00000000" // /* MW 3 */
+ 10528 "00001000" // /* MW 2 */
+ 10529 "00000000" // /* MW 1 */
+ 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10531 "01110000" // /* MW 7 */
+ 10532 "11010000" // /* MW 6 */
+ 10533 "00001011" // /* MW 5 */
+ 10534 "00000000" // /* MW 4 */
+ 10535 "10110000" // /* MW 3 */
+ 10536 "01100011" // /* MW 2 */
+ 10537 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+ 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10539 "00010001" // /* MW 9 */
+ 10540 "00101000" // /* MW 8 */
+ 10541 "00110010" // /* MW 7 */
+ 10542 "11110011" // /* MW 6 */
+ 10543 "00000001" // /* MW 5 */
+ 10544 "00000000" // /* MW 4 */
+ 10545 "10110000" // /* MW 3 */
+ 10546 "10000010" // /* MW 2 */
+ 10547 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10549 "11000000" // /* MW 3 */
+ 10550 "11010100" // /* MW 2 */
+ 10551 "00011011" // /* MW 1 */
+ 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10553 "00000000" // /* MW 1 */
+ 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6
+.src_ref 7 "superkernels.cpp" 210 16
+ 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */
+ 10557 "00000001" // /* MW 5 */
+ 10558 "01000000" // /* MW 4 */
+ 10559 "11110000" // /* MW 3 */
+ 10560 "00010100" // /* MW 2 */
+ 10561 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 22 first
+.delay_slot
+ 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10563 "10010000" // /* MW 3 */
+ 10564 "01100010" // /* MW 2 */
+ 10565 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 30
+.delay_slot
+ 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10567 "11111011" // /* MW 3 */
+ 10568 "01100011" // /* MW 2 */
+ 10569 "00010100" // /* MW 1 */
+.delay_slot
+ 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10571 "00111101" // /* MW 3 */
+ 10572 "11110100" // /* MW 2 */
+ 10573 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10575 "01110000" // /* MW 7 */
+ 10576 "01100000" // /* MW 6 */
+ 10577 "00110000" // /* MW 5 */
+ 10578 "00000011" // /* MW 4 */
+ 10579 "00110000" // /* MW 3 */
+ 10580 "11000110" // /* MW 2 */
+ 10581 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4
+.src_ref 7 "superkernels.cpp" 224 2
+.delay_slot
+ 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10583 "00000000" // /* MW 5 */
+ 10584 "11001010" // /* MW 4 */
+ 10585 "11000000" // /* MW 3 */
+ 10586 "00000111" // /* MW 2 */
+ 10587 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10589 "11010000" // /* MW 5 */
+ 10590 "11001000" // /* MW 4 */
+ 10591 "11000100" // /* MW 3 */
+ 10592 "00000111" // /* MW 2 */
+ 10593 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10595 "00010000" // /* MW 9 */
+ 10596 "00110010" // /* MW 8 */
+ 10597 "00110010" // /* MW 7 */
+ 10598 "11110001" // /* MW 6 */
+ 10599 "00000001" // /* MW 5 */
+ 10600 "00000000" // /* MW 4 */
+ 10601 "11100000" // /* MW 3 */
+ 10602 "11000000" // /* MW 2 */
+ 10603 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10605 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */
+ 10607 "00000001" // /* MW 5 */
+ 10608 "00000000" // /* MW 4 */
+ 10609 "00000000" // /* MW 3 */
+ 10610 "00010011" // /* MW 2 */
+ 10611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10615 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10617 "00110001" // /* MW 3 */
+ 10618 "00100000" // /* MW 2 */
+ 10619 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10621 "00000101" // /* MW 3 */
+ 10622 "00100000" // /* MW 2 */
+ 10623 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10625 "00000000" // /* MW 15 */
+ 10626 "00000000" // /* MW 14 */
+ 10627 "01111000" // /* MW 13 */
+ 10628 "10100101" // /* MW 12 */
+ 10629 "00000001" // /* MW 11 */
+ 10630 "00000000" // /* MW 10 */
+ 10631 "00000000" // /* MW 9 */
+ 10632 "10000000" // /* MW 8 */
+ 10633 "00010001" // /* MW 7 */
+ 10634 "00000110" // /* MW 6 */
+ 10635 "00100010" // /* MW 5 */
+ 10636 "00000000" // /* MW 4 */
+ 10637 "11110000" // /* MW 3 */
+ 10638 "00101100" // /* MW 2 */
+ 10639 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18
+.return_address
+ 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10641 "10100000" // /* MW 5 */
+ 10642 "11001000" // /* MW 4 */
+ 10643 "11000100" // /* MW 3 */
+ 10644 "00000111" // /* MW 2 */
+ 10645 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18 first
+.src_ref 7 "superkernels.cpp" 217 65
+ 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10647 "00010000" // /* MW 9 */
+ 10648 "10000000" // /* MW 8 */
+ 10649 "00110010" // /* MW 7 */
+ 10650 "11110001" // /* MW 6 */
+ 10651 "00000001" // /* MW 5 */
+ 10652 "00000000" // /* MW 4 */
+ 10653 "11010000" // /* MW 3 */
+ 10654 "11000010" // /* MW 2 */
+ 10655 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51
+.src_ref 7 "superkernels.cpp" 217 65
+.src_ref 7 "superkernels.cpp" 224 2
+ 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10657 "00010000" // /* MW 9 */
+ 10658 "10000000" // /* MW 8 */
+ 10659 "00110010" // /* MW 7 */
+ 10660 "11110001" // /* MW 6 */
+ 10661 "00000001" // /* MW 5 */
+ 10662 "00000000" // /* MW 4 */
+ 10663 "11010000" // /* MW 3 */
+ 10664 "11000110" // /* MW 2 */
+ 10665 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51 first
+.src_ref 7 "superkernels.cpp" 217 16
+.src_ref 7 "superkernels.cpp" 222 47
+ 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10667 "00010000" // /* MW 9 */
+ 10668 "00101010" // /* MW 8 */
+ 10669 "10110010" // /* MW 7 */
+ 10670 "11110000" // /* MW 6 */
+ 10671 "00000001" // /* MW 5 */
+ 10672 "00000000" // /* MW 4 */
+ 10673 "01010000" // /* MW 3 */
+ 10674 "11001011" // /* MW 2 */
+ 10675 "01001010" // /* MW 1 */
+ 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10677 "00000000" // /* MW 1 */
+ 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10679 "00000000" // /* MW 1 */
+ 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */
+ 10681 "00000000" // /* MW 5 */
+ 10682 "00000000" // /* MW 4 */
+ 10683 "11111000" // /* MW 3 */
+ 10684 "00010100" // /* MW 2 */
+ 10685 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13
+.delay_slot
+ 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10687 "11000000" // /* MW 5 */
+ 10688 "11001000" // /* MW 4 */
+ 10689 "11000000" // /* MW 3 */
+ 10690 "00000111" // /* MW 2 */
+ 10691 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10693 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 27 first
+.delay_slot
+ 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10695 "00001111" // /* MW 3 */
+ 10696 "01100001" // /* MW 2 */
+ 10697 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13 first
+.delay_slot
+ 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10699 "10100011" // /* MW 5 */
+ 10700 "00001100" // /* MW 4 */
+ 10701 "11110000" // /* MW 3 */
+ 10702 "00101100" // /* MW 2 */
+ 10703 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 16 first
+.delay_slot
+ 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10705 "00000000" // /* MW 15 */
+ 10706 "00000000" // /* MW 14 */
+ 10707 "01111000" // /* MW 13 */
+ 10708 "10100101" // /* MW 12 */
+ 10709 "00000001" // /* MW 11 */
+ 10710 "00000000" // /* MW 10 */
+ 10711 "00000000" // /* MW 9 */
+ 10712 "10000000" // /* MW 8 */
+ 10713 "00010001" // /* MW 7 */
+ 10714 "00000110" // /* MW 6 */
+ 10715 "00100001" // /* MW 5 */
+ 10716 "00000000" // /* MW 4 */
+ 10717 "11110000" // /* MW 3 */
+ 10718 "00101100" // /* MW 2 */
+ 10719 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 222 47
+.src_ref 7 "superkernels.cpp" 224 2
+ 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10721 "00000000" // /* MW 15 */
+ 10722 "00000000" // /* MW 14 */
+ 10723 "00010000" // /* MW 13 */
+ 10724 "00101010" // /* MW 12 */
+ 10725 "10110010" // /* MW 11 */
+ 10726 "11110000" // /* MW 10 */
+ 10727 "00000001" // /* MW 9 */
+ 10728 "00000000" // /* MW 8 */
+ 10729 "10001011" // /* MW 7 */
+ 10730 "10000000" // /* MW 6 */
+ 10731 "00100010" // /* MW 5 */
+ 10732 "00000000" // /* MW 4 */
+ 10733 "11110000" // /* MW 3 */
+ 10734 "00101100" // /* MW 2 */
+ 10735 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10737 "00000000" // /* MW 7 */
+ 10738 "11000011" // /* MW 6 */
+ 10739 "10110011" // /* MW 5 */
+ 10740 "00000011" // /* MW 4 */
+ 10741 "01100000" // /* MW 3 */
+ 10742 "10010001" // /* MW 2 */
+ 10743 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10745 "00010000" // /* MW 9 */
+ 10746 "00100000" // /* MW 8 */
+ 10747 "00110010" // /* MW 7 */
+ 10748 "11110000" // /* MW 6 */
+ 10749 "00000001" // /* MW 5 */
+ 10750 "00000000" // /* MW 4 */
+ 10751 "11010000" // /* MW 3 */
+ 10752 "11101110" // /* MW 2 */
+ 10753 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10755 "00010110" // /* MW 3 */
+ 10756 "11111110" // /* MW 2 */
+ 10757 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10759 "00110110" // /* MW 3 */
+ 10760 "11111110" // /* MW 2 */
+ 10761 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10763 "01010110" // /* MW 3 */
+ 10764 "01000110" // /* MW 2 */
+ 10765 "00000111" // /* MW 1 */
+ 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10767 "00000000" // /* MW 1 */
+ 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10769 "00000000" // /* MW 1 */
+ 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10771 "00000000" // /* MW 1 */
+ 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10773 "00000000" // /* MW 1 */
+ 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10775 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10777 "00000010" // /* MW 3 */
+ 10778 "01100001" // /* MW 2 */
+ 10779 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10781 "00010001" // /* MW 3 */
+ 10782 "00000110" // /* MW 2 */
+ 10783 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10785 "11111101" // /* MW 3 */
+ 10786 "11100000" // /* MW 2 */
+ 10787 "00010111" // /* MW 1 */
+ 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10789 "00000000" // /* MW 1 */
+ 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10791 "00000000" // /* MW 1 */
+ 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10793 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10795 "00001000" // /* MW 3 */
+ 10796 "10010011" // /* MW 2 */
+ 10797 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+ 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10799 "10000001" // /* MW 5 */
+ 10800 "10101101" // /* MW 4 */
+ 10801 "10100111" // /* MW 3 */
+ 10802 "00000000" // /* MW 2 */
+ 10803 "00000100" // /* MW 1 */
+ 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10805 "00000000" // /* MW 1 */
+ 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10807 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+ 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10809 "00110110" // /* MW 3 */
+ 10810 "00000110" // /* MW 2 */
+ 10811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10813 "10000001" // /* MW 5 */
+ 10814 "11011101" // /* MW 4 */
+ 10815 "11011100" // /* MW 3 */
+ 10816 "11001010" // /* MW 2 */
+ 10817 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 47 first
+ 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10819 "01110110" // /* MW 3 */
+ 10820 "00000110" // /* MW 2 */
+ 10821 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10823 "10011110" // /* MW 3 */
+ 10824 "01011100" // /* MW 2 */
+ 10825 "00000111" // /* MW 1 */
+ 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10827 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 224 2 first
+.no_stack_arguments
+ 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */
+ 10829 "00000001" // /* MW 5 */
+ 10830 "00000000" // /* MW 4 */
+ 10831 "00111000" // /* MW 3 */
+ 10832 "00010100" // /* MW 2 */
+ 10833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10835 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+.delay_slot
+ 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10837 "00000111" // /* MW 3 */
+ 10838 "01100010" // /* MW 2 */
+ 10839 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.delay_slot
+ 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10841 "00110001" // /* MW 3 */
+ 10842 "00000110" // /* MW 2 */
+ 10843 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45 first
+.delay_slot
+ 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10845 "00001101" // /* MW 3 */
+ 10846 "11100001" // /* MW 2 */
+ 10847 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+.delay_slot
+ 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10849 "00000000" // /* MW 15 */
+ 10850 "00000000" // /* MW 14 */
+ 10851 "10101000" // /* MW 13 */
+ 10852 "10100000" // /* MW 12 */
+ 10853 "00110100" // /* MW 11 */
+ 10854 "00000000" // /* MW 10 */
+ 10855 "00000000" // /* MW 9 */
+ 10856 "00000000" // /* MW 8 */
+ 10857 "01011011" // /* MW 7 */
+ 10858 "00000001" // /* MW 6 */
+ 10859 "00100000" // /* MW 5 */
+ 10860 "00000000" // /* MW 4 */
+ 10861 "11110000" // /* MW 3 */
+ 10862 "00101100" // /* MW 2 */
+ 10863 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+.src_ref 7 "superkernels.cpp" 227 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10865 "00010000" // /* MW 9 */
+ 10866 "00100000" // /* MW 8 */
+ 10867 "00110010" // /* MW 7 */
+ 10868 "11110011" // /* MW 6 */
+ 10869 "00000001" // /* MW 5 */
+ 10870 "00000000" // /* MW 4 */
+ 10871 "11010000" // /* MW 3 */
+ 10872 "11000110" // /* MW 2 */
+ 10873 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10875 "00000101" // /* MW 3 */
+ 10876 "00100000" // /* MW 2 */
+ 10877 "00010000" // /* MW 1 */
+ 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10879 "00000000" // /* MW 1 */
+ 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10881 "00000000" // /* MW 1 */
+ 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10883 "00000000" // /* MW 1 */
+ 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10885 "00000000" // /* MW 1 */
+ 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10887 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10889 "00001000" // /* MW 3 */
+ 10890 "01010001" // /* MW 2 */
+ 10891 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10893 "00010000" // /* MW 9 */
+ 10894 "00110000" // /* MW 8 */
+ 10895 "00110010" // /* MW 7 */
+ 10896 "11110001" // /* MW 6 */
+ 10897 "00000001" // /* MW 5 */
+ 10898 "00000000" // /* MW 4 */
+ 10899 "11010000" // /* MW 3 */
+ 10900 "11001110" // /* MW 2 */
+ 10901 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6 first
+ 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10903 "00110110" // /* MW 3 */
+ 10904 "00000110" // /* MW 2 */
+ 10905 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+ 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10907 "01010110" // /* MW 3 */
+ 10908 "00000110" // /* MW 2 */
+ 10909 "00000010" // /* MW 1 */
+ 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10911 "00000000" // /* MW 1 */
+ 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10913 "00000000" // /* MW 1 */
+ 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10915 "00000000" // /* MW 1 */
+ 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10917 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10919 "00110001" // /* MW 3 */
+ 10920 "00100001" // /* MW 2 */
+ 10921 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10923 "00010001" // /* MW 3 */
+ 10924 "11100110" // /* MW 2 */
+ 10925 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 16 first
+ 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10927 "00101000" // /* MW 3 */
+ 10928 "01100001" // /* MW 2 */
+ 10929 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+ 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */
+ 10931 "00000001" // /* MW 5 */
+ 10932 "01000000" // /* MW 4 */
+ 10933 "01101000" // /* MW 3 */
+ 10934 "00010101" // /* MW 2 */
+ 10935 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14
+ 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10947 "00000001" // /* MW 3 */
+ 10948 "00100000" // /* MW 2 */
+ 10949 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14 first
+ 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10951 "00000000" // /* MW 9 */
+ 10952 "00000000" // /* MW 8 */
+ 10953 "00000000" // /* MW 7 */
+ 10954 "10000000" // /* MW 6 */
+ 10955 "00010001" // /* MW 5 */
+ 10956 "00000110" // /* MW 4 */
+ 10957 "11110110" // /* MW 3 */
+ 10958 "00101100" // /* MW 2 */
+ 10959 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 229
+ 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10961 "00111001" // /* MW 3 */
+ 10962 "11110100" // /* MW 2 */
+ 10963 "00000111" // /* MW 1 */
+ 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10965 "00011001" // /* MW 3 */
+ 10966 "11111011" // /* MW 2 */
+ 10967 "00000111" // /* MW 1 */
+ 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10969 "00000000" // /* MW 1 */
+ 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10971 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10973 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10975 "11110001" // /* MW 3 */
+ 10976 "11111101" // /* MW 2 */
+ 10977 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10979 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10981 "00000000" // /* MW 3 */
+ 10982 "00101000" // /* MW 2 */
+ 10983 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10985 "10100000" // /* MW 3 */
+ 10986 "01100111" // /* MW 2 */
+ 10987 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229
+.delay_slot
+ 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10989 "00000001" // /* MW 5 */
+ 10990 "00000000" // /* MW 4 */
+ 10991 "00000000" // /* MW 3 */
+ 10992 "11111000" // /* MW 2 */
+ 10993 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10995 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10997 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 10999 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 11008 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11009 "00000001" // /* MW 5 */
+ 11010 "00100001" // /* MW 4 */
+ 11011 "00000000" // /* MW 3 */
+ 11012 "00000000" // /* MW 2 */
+ 11013 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11014 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11015 "11000000" // /* MW 3 */
+ 11016 "01010000" // /* MW 2 */
+ 11017 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11018 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11019 "10010000" // /* MW 3 */
+ 11020 "01100000" // /* MW 2 */
+ 11021 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 11022 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11023 "00010001" // /* MW 3 */
+ 11024 "00000100" // /* MW 2 */
+ 11025 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 11026 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11027 "00010001" // /* MW 3 */
+ 11028 "00010100" // /* MW 2 */
+ 11029 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0
+ 11031 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 11040 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11041 "00101110" // /* MW 3 */
+ 11042 "00011100" // /* MW 2 */
+ 11043 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 11044 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11045 "00000001" // /* MW 5 */
+ 11046 "00000000" // /* MW 4 */
+ 11047 "00000000" // /* MW 3 */
+ 11048 "00001000" // /* MW 2 */
+ 11049 "00000000" // /* MW 1 */
+ 11050 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11051 "00111101" // /* MW 3 */
+ 11052 "11111100" // /* MW 2 */
+ 11053 "00001111" // /* MW 1 */
+ 11054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11055 "00000000" // /* MW 1 */
+ 11056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11057 "00000000" // /* MW 1 */
+ 11058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11059 "00000000" // /* MW 1 */
+ 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11061 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 11062 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11063 "00101001" // /* MW 3 */
+ 11064 "00011100" // /* MW 2 */
+ 11065 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 11066 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11067 "00101110" // /* MW 3 */
+ 11068 "00011100" // /* MW 2 */
+ 11069 "00000001" // /* MW 1 */
+ 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11071 "00000000" // /* MW 1 */
+ 11072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11073 "00000000" // /* MW 1 */
+ 11074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11075 "00000000" // /* MW 1 */
+ 11076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11077 "00000000" // /* MW 1 */
+ 11078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11079 "00000000" // /* MW 1 */
+ 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11081 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 11082 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11083 "00101001" // /* MW 3 */
+ 11084 "00011100" // /* MW 2 */
+ 11085 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 11086 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11087 "00101110" // /* MW 3 */
+ 11088 "00000100" // /* MW 2 */
+ 11089 "00000001" // /* MW 1 */
+ 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11091 "00000000" // /* MW 1 */
+ 11092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11093 "00000000" // /* MW 1 */
+ 11094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11095 "00000000" // /* MW 1 */
+ 11096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11097 "00000000" // /* MW 1 */
+ 11098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11099 "00000000" // /* MW 1 */
+ 11100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11101 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 11102 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11103 "00101001" // /* MW 3 */
+ 11104 "00011100" // /* MW 2 */
+ 11105 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 11106 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11107 "00101110" // /* MW 3 */
+ 11108 "00010100" // /* MW 2 */
+ 11109 "00000001" // /* MW 1 */
+ 11110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11111 "00000000" // /* MW 1 */
+ 11112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11113 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 11114 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */
+ 11115 "00000001" // /* MW 5 */
+ 11116 "00000000" // /* MW 4 */
+ 11117 "10000000" // /* MW 3 */
+ 11118 "00010101" // /* MW 2 */
+ 11119 "00000000" // /* MW 1 */
+.delay_slot
+ 11120 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11121 "10011101" // /* MW 3 */
+ 11122 "11111011" // /* MW 2 */
+ 11123 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11125 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11127 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 11128 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11129 "00101001" // /* MW 3 */
+ 11130 "11011100" // /* MW 2 */
+ 11131 "00001000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+ 11132 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11133 "11000000" // /* MW 3 */
+ 11134 "01100000" // /* MW 2 */
+ 11135 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.return_address
+ 11136 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11137 "00111001" // /* MW 3 */
+ 11138 "11111100" // /* MW 2 */
+ 11139 "00000111" // /* MW 1 */
+ 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11141 "00000000" // /* MW 1 */
+ 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11143 "00000000" // /* MW 1 */
+ 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11145 "00000000" // /* MW 1 */
+ 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11147 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11149 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11150 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11151 "10011001" // /* MW 3 */
+ 11152 "11111011" // /* MW 2 */
+ 11153 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11154 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11155 "00000000" // /* MW 3 */
+ 11156 "00101000" // /* MW 2 */
+ 11157 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11159 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11161 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11163 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11164 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11165 "00000001" // /* MW 3 */
+ 11166 "00100000" // /* MW 2 */
+ 11167 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "mul_impl.h" 134 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11168 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11169 "01110001" // /* MW 9 */
+ 11170 "00000000" // /* MW 8 */
+ 11171 "00000000" // /* MW 7 */
+ 11172 "00000000" // /* MW 6 */
+ 11173 "11111110" // /* MW 5 */
+ 11174 "00111111" // /* MW 4 */
+ 11175 "00110000" // /* MW 3 */
+ 11176 "11000010" // /* MW 2 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 11177 "11101000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 149 first
+.src_ref 3 "elementwise_binary.h" 156 37
+.src_ref 3 "elementwise_binary.h" 168 8 first
+.function_start
+ 11184 "10111010" // MOVA m0, #32; MOVXM ls, #11360 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11185 "00010000" // /* MW 9 */
+ 11186 "00110000" // /* MW 8 */
+ 11187 "01111110" // /* MW 7 */
+ 11188 "00001000" // /* MW 6 */
+ 11189 "00000000" // /* MW 5 */
+ 11190 "00000000" // /* MW 4 */
+ 11191 "10000000" // /* MW 3 */
+ 11192 "00000000" // /* MW 2 */
+ 11193 "00000100" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 37 first
+.src_ref 3 "elementwise_binary.h" 168 8 first
+ 11194 "10111010" // LDA r3, [p3], m0; MOVXM le, #11376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11195 "00010000" // /* MW 9 */
+ 11196 "00111000" // /* MW 8 */
+ 11197 "10111110" // /* MW 7 */
+ 11198 "00001001" // /* MW 6 */
+ 11199 "00000000" // /* MW 5 */
+ 11200 "00000000" // /* MW 4 */
+ 11201 "11010000" // /* MW 3 */
+ 11202 "00001110" // /* MW 2 */
+ 11203 "01100001" // /* MW 1 */
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11204 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11205 "01011000" // /* MW 9 */
+ 11206 "00111100" // /* MW 8 */
+ 11207 "00001011" // /* MW 7 */
+ 11208 "01001000" // /* MW 6 */
+ 11209 "00010111" // /* MW 5 */
+ 11210 "00111110" // /* MW 4 */
+ 11211 "11010000" // /* MW 3 */
+ 11212 "10010000" // /* MW 2 */
+ 11213 "01100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11214 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11215 "00010000" // /* MW 9 */
+ 11216 "00110100" // /* MW 8 */
+ 11217 "00110010" // /* MW 7 */
+ 11218 "11110010" // /* MW 6 */
+ 11219 "00000001" // /* MW 5 */
+ 11220 "00000000" // /* MW 4 */
+ 11221 "11010000" // /* MW 3 */
+ 11222 "10000000" // /* MW 2 */
+ 11223 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11224 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11225 "01000010" // /* MW 3 */
+ 11226 "00000100" // /* MW 2 */
+ 11227 "00000100" // /* MW 1 */
+ 11228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11229 "00000000" // /* MW 1 */
+ 11230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11231 "00000000" // /* MW 1 */
+ 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11233 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11234 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11235 "00011101" // /* MW 3 */
+ 11236 "11000010" // /* MW 2 */
+ 11237 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 168 8
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 11238 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11239 "11111001" // /* MW 5 */
+ 11240 "11100001" // /* MW 4 */
+ 11241 "10001010" // /* MW 3 */
+ 11242 "00001110" // /* MW 2 */
+ 11243 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11244 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11245 "01101000" // /* MW 5 */
+ 11246 "01010000" // /* MW 4 */
+ 11247 "01110000" // /* MW 3 */
+ 11248 "00010011" // /* MW 2 */
+ 11249 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11250 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11251 "10000000" // /* MW 7 */
+ 11252 "10111010" // /* MW 6 */
+ 11253 "11101000" // /* MW 5 */
+ 11254 "01010000" // /* MW 4 */
+ 11255 "01110000" // /* MW 3 */
+ 11256 "00011011" // /* MW 2 */
+ 11257 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11258 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11259 "01101000" // /* MW 5 */
+ 11260 "01010000" // /* MW 4 */
+ 11261 "01110000" // /* MW 3 */
+ 11262 "00010011" // /* MW 2 */
+ 11263 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11264 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11265 "11101000" // /* MW 5 */
+ 11266 "01010000" // /* MW 4 */
+ 11267 "01110000" // /* MW 3 */
+ 11268 "00011011" // /* MW 2 */
+ 11269 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11270 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11271 "10011011" // /* MW 3 */
+ 11272 "00001000" // /* MW 2 */
+ 11273 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11274 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11275 "01101000" // /* MW 5 */
+ 11276 "01010000" // /* MW 4 */
+ 11277 "01110000" // /* MW 3 */
+ 11278 "00011011" // /* MW 2 */
+ 11279 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11280 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11281 "11101000" // /* MW 5 */
+ 11282 "01010000" // /* MW 4 */
+ 11283 "01110000" // /* MW 3 */
+ 11284 "00010011" // /* MW 2 */
+ 11285 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11286 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11287 "01000001" // /* MW 9 */
+ 11288 "11100010" // /* MW 8 */
+ 11289 "00000000" // /* MW 7 */
+ 11290 "00011101" // /* MW 6 */
+ 11291 "00110100" // /* MW 5 */
+ 11292 "00101000" // /* MW 4 */
+ 11293 "01110000" // /* MW 3 */
+ 11294 "00011011" // /* MW 2 */
+ 11295 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11296 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11297 "01100001" // /* MW 9 */
+ 11298 "11100000" // /* MW 8 */
+ 11299 "00000001" // /* MW 7 */
+ 11300 "00011101" // /* MW 6 */
+ 11301 "01110100" // /* MW 5 */
+ 11302 "00101000" // /* MW 4 */
+ 11303 "01110000" // /* MW 3 */
+ 11304 "00010011" // /* MW 2 */
+ 11305 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11306 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11307 "01000001" // /* MW 9 */
+ 11308 "11100010" // /* MW 8 */
+ 11309 "00000000" // /* MW 7 */
+ 11310 "00011101" // /* MW 6 */
+ 11311 "00110100" // /* MW 5 */
+ 11312 "00101000" // /* MW 4 */
+ 11313 "01110000" // /* MW 3 */
+ 11314 "00011011" // /* MW 2 */
+ 11315 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11316 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11317 "01100001" // /* MW 9 */
+ 11318 "11100000" // /* MW 8 */
+ 11319 "00000001" // /* MW 7 */
+ 11320 "00011101" // /* MW 6 */
+ 11321 "01110100" // /* MW 5 */
+ 11322 "00101000" // /* MW 4 */
+ 11323 "01110000" // /* MW 3 */
+ 11324 "00010011" // /* MW 2 */
+ 11325 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11326 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11327 "01000001" // /* MW 9 */
+ 11328 "11100010" // /* MW 8 */
+ 11329 "00000000" // /* MW 7 */
+ 11330 "00011101" // /* MW 6 */
+ 11331 "00110100" // /* MW 5 */
+ 11332 "00101000" // /* MW 4 */
+ 11333 "01110000" // /* MW 3 */
+ 11334 "00011011" // /* MW 2 */
+ 11335 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11336 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11337 "01100001" // /* MW 9 */
+ 11338 "11100000" // /* MW 8 */
+ 11339 "00000001" // /* MW 7 */
+ 11340 "00011101" // /* MW 6 */
+ 11341 "01110100" // /* MW 5 */
+ 11342 "00101000" // /* MW 4 */
+ 11343 "01110000" // /* MW 3 */
+ 11344 "00010011" // /* MW 2 */
+ 11345 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11346 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11347 "01000001" // /* MW 13 */
+ 11348 "11100010" // /* MW 12 */
+ 11349 "00000000" // /* MW 11 */
+ 11350 "10001100" // /* MW 10 */
+ 11351 "01110000" // /* MW 9 */
+ 11352 "00001000" // /* MW 8 */
+ 11353 "00000000" // /* MW 7 */
+ 11354 "00000000" // /* MW 6 */
+ 11355 "01101000" // /* MW 5 */
+ 11356 "01010000" // /* MW 4 */
+ 11357 "01110000" // /* MW 3 */
+ 11358 "00011011" // /* MW 2 */
+ 11359 "00100001" // /* MW 1 */
+.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 11360 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11361 "00000011" // /* MW 15 */
+ 11362 "00001111" // /* MW 14 */
+ 11363 "01111000" // /* MW 13 */
+ 11364 "10100101" // /* MW 12 */
+ 11365 "00000001" // /* MW 11 */
+ 11366 "00000000" // /* MW 10 */
+ 11367 "00000000" // /* MW 9 */
+ 11368 "00000000" // /* MW 8 */
+ 11369 "10100011" // /* MW 7 */
+ 11370 "00011100" // /* MW 6 */
+ 11371 "11101010" // /* MW 5 */
+ 11372 "01010000" // /* MW 4 */
+ 11373 "01110000" // /* MW 3 */
+ 11374 "00010011" // /* MW 2 */
+ 11375 "00100001" // /* MW 1 */
+.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11376 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11377 "00010010" // /* MW 15 */
+ 11378 "00000111" // /* MW 14 */
+ 11379 "01111000" // /* MW 13 */
+ 11380 "10100101" // /* MW 12 */
+ 11381 "00000001" // /* MW 11 */
+ 11382 "00000000" // /* MW 10 */
+ 11383 "00000000" // /* MW 9 */
+ 11384 "00000000" // /* MW 8 */
+ 11385 "00100011" // /* MW 7 */
+ 11386 "00011100" // /* MW 6 */
+ 11387 "01101010" // /* MW 5 */
+ 11388 "01010000" // /* MW 4 */
+ 11389 "01110000" // /* MW 3 */
+ 11390 "00011011" // /* MW 2 */
+ 11391 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 11392 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11393 "01100001" // /* MW 7 */
+ 11394 "11100000" // /* MW 6 */
+ 11395 "00000001" // /* MW 5 */
+ 11396 "00000010" // /* MW 4 */
+ 11397 "01100000" // /* MW 3 */
+ 11398 "10010100" // /* MW 2 */
+ 11399 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11400 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11401 "01000001" // /* MW 7 */
+ 11402 "11100010" // /* MW 6 */
+ 11403 "00000000" // /* MW 5 */
+ 11404 "00000010" // /* MW 4 */
+ 11405 "01100000" // /* MW 3 */
+ 11406 "10000100" // /* MW 2 */
+ 11407 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11408 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11409 "01100001" // /* MW 7 */
+ 11410 "11100000" // /* MW 6 */
+ 11411 "00000001" // /* MW 5 */
+ 11412 "00000010" // /* MW 4 */
+ 11413 "01100000" // /* MW 3 */
+ 11414 "10010100" // /* MW 2 */
+ 11415 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11416 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11417 "01000001" // /* MW 7 */
+ 11418 "11100010" // /* MW 6 */
+ 11419 "00000000" // /* MW 5 */
+ 11420 "00000010" // /* MW 4 */
+ 11421 "01100000" // /* MW 3 */
+ 11422 "10000100" // /* MW 2 */
+ 11423 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11424 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11425 "01100001" // /* MW 7 */
+ 11426 "11100000" // /* MW 6 */
+ 11427 "00000001" // /* MW 5 */
+ 11428 "00000010" // /* MW 4 */
+ 11429 "01100000" // /* MW 3 */
+ 11430 "10010100" // /* MW 2 */
+ 11431 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11432 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11433 "01000001" // /* MW 7 */
+ 11434 "11100010" // /* MW 6 */
+ 11435 "00000000" // /* MW 5 */
+ 11436 "00000010" // /* MW 4 */
+ 11437 "01100000" // /* MW 3 */
+ 11438 "10000100" // /* MW 2 */
+ 11439 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11440 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11441 "01100001" // /* MW 7 */
+ 11442 "11100000" // /* MW 6 */
+ 11443 "00000001" // /* MW 5 */
+ 11444 "00000010" // /* MW 4 */
+ 11445 "01100000" // /* MW 3 */
+ 11446 "10010100" // /* MW 2 */
+ 11447 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11448 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11449 "00100011" // /* MW 3 */
+ 11450 "00011100" // /* MW 2 */
+ 11451 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 172 4 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11452 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11453 "00000000" // /* MW 5 */
+ 11454 "01010000" // /* MW 4 */
+ 11455 "01100000" // /* MW 3 */
+ 11456 "10010100" // /* MW 2 */
+ 11457 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11458 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11459 "00100011" // /* MW 3 */
+ 11460 "00011100" // /* MW 2 */
+ 11461 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11462 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11463 "10100011" // /* MW 3 */
+ 11464 "00011100" // /* MW 2 */
+ 11465 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 11466 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11467 "00100011" // /* MW 3 */
+ 11468 "00011100" // /* MW 2 */
+ 11469 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 11470 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11471 "10100011" // /* MW 3 */
+ 11472 "00011100" // /* MW 2 */
+ 11473 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0
+ 11475 "00000000" // /* MW 1 */
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.src_ref 7 "superkernels.cpp" 369 first
+.src_ref 7 "superkernels.cpp" 374 6
+.function_start
+ 11488 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11489 "10000000" // /* MW 5 */
+ 11490 "11001000" // /* MW 4 */
+ 11491 "11001000" // /* MW 3 */
+ 11492 "00000111" // /* MW 2 */
+ 11493 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+ 11494 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11495 "11000001" // /* MW 5 */
+ 11496 "10110101" // /* MW 4 */
+ 11497 "11011000" // /* MW 3 */
+ 11498 "11000010" // /* MW 2 */
+ 11499 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 369
+ 11500 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11501 "00000001" // /* MW 5 */
+ 11502 "00000000" // /* MW 4 */
+ 11503 "00000000" // /* MW 3 */
+ 11504 "00001000" // /* MW 2 */
+ 11505 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 22 first
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11506 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11507 "01111001" // /* MW 9 */
+ 11508 "01100000" // /* MW 8 */
+ 11509 "11001010" // /* MW 7 */
+ 11510 "10000001" // /* MW 6 */
+ 11511 "00010100" // /* MW 5 */
+ 11512 "00100011" // /* MW 4 */
+ 11513 "10110000" // /* MW 3 */
+ 11514 "00111010" // /* MW 2 */
+ 11515 "11111111" // /* MW 1 */
+ 11516 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11517 "01110000" // /* MW 7 */
+ 11518 "11010000" // /* MW 6 */
+ 11519 "00001011" // /* MW 5 */
+ 11520 "00000000" // /* MW 4 */
+ 11521 "10110000" // /* MW 3 */
+ 11522 "10000011" // /* MW 2 */
+ 11523 "11111101" // /* MW 1 */
+ 11524 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11525 "00010101" // /* MW 3 */
+ 11526 "11111100" // /* MW 2 */
+ 11527 "00001111" // /* MW 1 */
+ 11528 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11529 "00111101" // /* MW 3 */
+ 11530 "11110000" // /* MW 2 */
+ 11531 "00001111" // /* MW 1 */
+ 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11533 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+.src_ref 7 "superkernels.cpp" 374 16 first
+ 11534 "10000100" // JNZ r16, #11680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11680 delay_slots=5 */
+ 11535 "00000001" // /* MW 5 */
+ 11536 "01000000" // /* MW 4 */
+ 11537 "11010000" // /* MW 3 */
+ 11538 "00010110" // /* MW 2 */
+ 11539 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 30 first
+.delay_slot
+ 11540 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11541 "11111011" // /* MW 3 */
+ 11542 "01100011" // /* MW 2 */
+ 11543 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11544 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11545 "10100000" // /* MW 5 */
+ 11546 "11001000" // /* MW 4 */
+ 11547 "11000100" // /* MW 3 */
+ 11548 "00000111" // /* MW 2 */
+ 11549 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11550 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11551 "01110000" // /* MW 7 */
+ 11552 "01100000" // /* MW 6 */
+ 11553 "00110111" // /* MW 5 */
+ 11554 "00000001" // /* MW 4 */
+ 11555 "00110000" // /* MW 3 */
+ 11556 "11000110" // /* MW 2 */
+ 11557 "01000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 11558 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11559 "11000000" // /* MW 3 */
+ 11560 "11010110" // /* MW 2 */
+ 11561 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 7 "superkernels.cpp" 379 28
+.src_ref 7 "superkernels.cpp" 381 42
+.src_ref 7 "superkernels.cpp" 393 2
+.delay_slot
+ 11562 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11563 "00010001" // /* MW 9 */
+ 11564 "10100000" // /* MW 8 */
+ 11565 "10110010" // /* MW 7 */
+ 11566 "11110011" // /* MW 6 */
+ 11567 "00000001" // /* MW 5 */
+ 11568 "00000000" // /* MW 4 */
+ 11569 "10110000" // /* MW 3 */
+ 11570 "10100011" // /* MW 2 */
+ 11571 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11572 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11573 "00010001" // /* MW 9 */
+ 11574 "00110100" // /* MW 8 */
+ 11575 "00110010" // /* MW 7 */
+ 11576 "11110001" // /* MW 6 */
+ 11577 "00000001" // /* MW 5 */
+ 11578 "00000000" // /* MW 4 */
+ 11579 "01100000" // /* MW 3 */
+ 11580 "10010001" // /* MW 2 */
+ 11581 "00010011" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11582 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11583 "00010000" // /* MW 9 */
+ 11584 "00110010" // /* MW 8 */
+ 11585 "00110010" // /* MW 7 */
+ 11586 "11110001" // /* MW 6 */
+ 11587 "00000001" // /* MW 5 */
+ 11588 "00000000" // /* MW 4 */
+ 11589 "11100000" // /* MW 3 */
+ 11590 "11000000" // /* MW 2 */
+ 11591 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11593 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11594 "00000100" // JL #11040 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */
+ 11595 "00000001" // /* MW 5 */
+ 11596 "00000000" // /* MW 4 */
+ 11597 "10010000" // /* MW 3 */
+ 11598 "00010101" // /* MW 2 */
+ 11599 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11601 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11603 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11604 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11605 "00110001" // /* MW 3 */
+ 11606 "00100000" // /* MW 2 */
+ 11607 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 11608 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11609 "00000101" // /* MW 3 */
+ 11610 "00100000" // /* MW 2 */
+ 11611 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 11612 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11613 "00010001" // /* MW 3 */
+ 11614 "00000110" // /* MW 2 */
+ 11615 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 381 42 first
+.return_address
+ 11616 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11617 "00010000" // /* MW 9 */
+ 11618 "00101000" // /* MW 8 */
+ 11619 "10110010" // /* MW 7 */
+ 11620 "11110000" // /* MW 6 */
+ 11621 "00000001" // /* MW 5 */
+ 11622 "00000000" // /* MW 4 */
+ 11623 "11010000" // /* MW 3 */
+ 11624 "11000010" // /* MW 2 */
+ 11625 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 390 48
+ 11626 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11627 "00010000" // /* MW 9 */
+ 11628 "00101010" // /* MW 8 */
+ 11629 "10110010" // /* MW 7 */
+ 11630 "11110001" // /* MW 6 */
+ 11631 "00000001" // /* MW 5 */
+ 11632 "00000000" // /* MW 4 */
+ 11633 "11010000" // /* MW 3 */
+ 11634 "11000110" // /* MW 2 */
+ 11635 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 28 first
+.src_ref 7 "superkernels.cpp" 382 16
+.src_ref 7 "superkernels.cpp" 391 48
+ 11636 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11637 "00010000" // /* MW 9 */
+ 11638 "00101110" // /* MW 8 */
+ 11639 "10110010" // /* MW 7 */
+ 11640 "11110000" // /* MW 6 */
+ 11641 "00000001" // /* MW 5 */
+ 11642 "00000000" // /* MW 4 */
+ 11643 "01010000" // /* MW 3 */
+ 11644 "11001011" // /* MW 2 */
+ 11645 "11101010" // /* MW 1 */
+ 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11647 "00000000" // /* MW 1 */
+ 11648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11649 "00000000" // /* MW 1 */
+ 11650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11651 "00000000" // /* MW 1 */
+ 11652 "10000100" // J #11696 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11696 delay_slots=5 */
+ 11653 "00000000" // /* MW 5 */
+ 11654 "00000000" // /* MW 4 */
+ 11655 "11011000" // /* MW 3 */
+ 11656 "00010110" // /* MW 2 */
+ 11657 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13
+.delay_slot
+ 11658 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11659 "11000000" // /* MW 5 */
+ 11660 "11001000" // /* MW 4 */
+ 11661 "11000100" // /* MW 3 */
+ 11662 "00000111" // /* MW 2 */
+ 11663 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 27 first
+.delay_slot
+ 11664 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11665 "00001111" // /* MW 3 */
+ 11666 "01100001" // /* MW 2 */
+ 11667 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13 first
+.delay_slot
+ 11668 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11669 "01010001" // /* MW 3 */
+ 11670 "00000110" // /* MW 2 */
+ 11671 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16 first
+.delay_slot
+ 11672 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11673 "00010001" // /* MW 3 */
+ 11674 "00000110" // /* MW 2 */
+ 11675 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 382 16 first
+.delay_slot
+ 11676 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11677 "00010001" // /* MW 3 */
+ 11678 "00000110" // /* MW 2 */
+ 11679 "00001001" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192
+.src_ref 7 "superkernels.cpp" 390 48
+ 11680 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11681 "10101000" // /* MW 5 */
+ 11682 "11001000" // /* MW 4 */
+ 11683 "11000110" // /* MW 3 */
+ 11684 "00000111" // /* MW 2 */
+ 11685 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48
+ 11686 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11687 "00010000" // /* MW 9 */
+ 11688 "00101110" // /* MW 8 */
+ 11689 "10110010" // /* MW 7 */
+ 11690 "11110000" // /* MW 6 */
+ 11691 "00000001" // /* MW 5 */
+ 11692 "00000000" // /* MW 4 */
+ 11693 "11110000" // /* MW 3 */
+ 11694 "00101100" // /* MW 2 */
+ 11695 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 11696 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11697 "10000110" // /* MW 3 */
+ 11698 "01100111" // /* MW 2 */
+ 11699 "00011000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11700 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11701 "00010000" // /* MW 9 */
+ 11702 "00100000" // /* MW 8 */
+ 11703 "00110010" // /* MW 7 */
+ 11704 "11110001" // /* MW 6 */
+ 11705 "00000001" // /* MW 5 */
+ 11706 "00000000" // /* MW 4 */
+ 11707 "11010000" // /* MW 3 */
+ 11708 "11101110" // /* MW 2 */
+ 11709 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 11710 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11711 "00010110" // /* MW 3 */
+ 11712 "11111110" // /* MW 2 */
+ 11713 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 11714 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11715 "00110110" // /* MW 3 */
+ 11716 "11111110" // /* MW 2 */
+ 11717 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+ 11718 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11719 "01010110" // /* MW 3 */
+ 11720 "00000110" // /* MW 2 */
+ 11721 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 11722 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11723 "01110110" // /* MW 3 */
+ 11724 "01000110" // /* MW 2 */
+ 11725 "00000000" // /* MW 1 */
+ 11726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11727 "00000000" // /* MW 1 */
+ 11728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11729 "00000000" // /* MW 1 */
+ 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11731 "00000000" // /* MW 1 */
+ 11732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11733 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 11734 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11735 "00000010" // /* MW 3 */
+ 11736 "01100001" // /* MW 2 */
+ 11737 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+.src_ref 1 "io_buffer_main.h" 218 20
+ 11738 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11739 "00001110" // /* MW 5 */
+ 11740 "01000000" // /* MW 4 */
+ 11741 "00111001" // /* MW 3 */
+ 11742 "11000010" // /* MW 2 */
+ 11743 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+ 11744 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11745 "00010001" // /* MW 3 */
+ 11746 "00000110" // /* MW 2 */
+ 11747 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+.src_ref 1 "io_buffer_main.h" 395 8
+ 11748 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11749 "11111101" // /* MW 3 */
+ 11750 "11100000" // /* MW 2 */
+ 11751 "00010111" // /* MW 1 */
+ 11752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11753 "00000000" // /* MW 1 */
+ 11754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11755 "00000000" // /* MW 1 */
+ 11756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11757 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 11758 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11759 "00001000" // /* MW 3 */
+ 11760 "11010011" // /* MW 2 */
+ 11761 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 11762 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11763 "00000110" // /* MW 3 */
+ 11764 "01100111" // /* MW 2 */
+ 11765 "00011010" // /* MW 1 */
+ 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11767 "00000000" // /* MW 1 */
+ 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11769 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11770 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11771 "01110110" // /* MW 3 */
+ 11772 "11111111" // /* MW 2 */
+ 11773 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 11774 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11775 "00110110" // /* MW 3 */
+ 11776 "11111110" // /* MW 2 */
+ 11777 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 11778 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11779 "01010110" // /* MW 3 */
+ 11780 "11111110" // /* MW 2 */
+ 11781 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 47 first
+ 11782 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11783 "01110110" // /* MW 3 */
+ 11784 "01010110" // /* MW 2 */
+ 11785 "00000010" // /* MW 1 */
+ 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11787 "00000000" // /* MW 1 */
+ 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11789 "00000000" // /* MW 1 */
+ 11790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11791 "00000000" // /* MW 1 */
+ 11792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11793 "00000000" // /* MW 1 */
+ 11794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11795 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 11796 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11797 "00010010" // /* MW 3 */
+ 11798 "10100011" // /* MW 2 */
+ 11799 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 11800 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11801 "00110001" // /* MW 3 */
+ 11802 "00000110" // /* MW 2 */
+ 11803 "00001010" // /* MW 1 */
+ 11804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11805 "00000000" // /* MW 1 */
+ 11806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11807 "00000000" // /* MW 1 */
+ 11808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11809 "00000000" // /* MW 1 */
+ 11810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 11812 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11813 "00001000" // /* MW 3 */
+ 11814 "11010011" // /* MW 2 */
+ 11815 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46
+.src_ref 7 "superkernels.cpp" 391 46
+.src_ref 1 "io_buffer_main.h" 324 32
+ 11816 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11817 "01111001" // /* MW 9 */
+ 11818 "01100000" // /* MW 8 */
+ 11819 "11001110" // /* MW 7 */
+ 11820 "00101001" // /* MW 6 */
+ 11821 "00000000" // /* MW 5 */
+ 11822 "00000001" // /* MW 4 */
+ 11823 "01100000" // /* MW 3 */
+ 11824 "00010001" // /* MW 2 */
+ 11825 "11010001" // /* MW 1 */
+ 11826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11827 "00000000" // /* MW 1 */
+ 11828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11829 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+ 11830 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11831 "00011001" // /* MW 3 */
+ 11832 "11101110" // /* MW 2 */
+ 11833 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 48 first
+ 11834 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11835 "00111011" // /* MW 5 */
+ 11836 "11011000" // /* MW 4 */
+ 11837 "11011111" // /* MW 3 */
+ 11838 "11000110" // /* MW 2 */
+ 11839 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48 first
+.src_ref 7 "superkernels.cpp" 393 2
+ 11840 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11841 "10000001" // /* MW 5 */
+ 11842 "11011101" // /* MW 4 */
+ 11843 "11010110" // /* MW 3 */
+ 11844 "11010010" // /* MW 2 */
+ 11845 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 11846 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11847 "01010110" // /* MW 3 */
+ 11848 "01001110" // /* MW 2 */
+ 11849 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 11850 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11851 "00011110" // /* MW 3 */
+ 11852 "01011101" // /* MW 2 */
+ 11853 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11854 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11855 "11000000" // /* MW 3 */
+ 11856 "01100000" // /* MW 2 */
+ 11857 "00011111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11859 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11860 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11861 "01110110" // /* MW 3 */
+ 11862 "00000110" // /* MW 2 */
+ 11863 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 11864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11865 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 393 2 first
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11866 "00000100" // JL #11184 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11184 delay_slots=5 */
+ 11867 "00000001" // /* MW 5 */
+ 11868 "00000000" // /* MW 4 */
+ 11869 "11011000" // /* MW 3 */
+ 11870 "00010101" // /* MW 2 */
+ 11871 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11872 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11873 "11000000" // /* MW 3 */
+ 11874 "11010100" // /* MW 2 */
+ 11875 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 11876 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11877 "00001101" // /* MW 3 */
+ 11878 "01100011" // /* MW 2 */
+ 11879 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46 first
+.delay_slot
+ 11880 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11881 "00001101" // /* MW 3 */
+ 11882 "00100001" // /* MW 2 */
+ 11883 "00010101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46
+.delay_slot
+ 11884 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11885 "01000001" // /* MW 3 */
+ 11886 "01101001" // /* MW 2 */
+ 11887 "00011001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11889 "00000000" // /* MW 15 */
+ 11890 "00000000" // /* MW 14 */
+ 11891 "10101000" // /* MW 13 */
+ 11892 "11100010" // /* MW 12 */
+ 11893 "00110100" // /* MW 11 */
+ 11894 "00000000" // /* MW 10 */
+ 11895 "00000000" // /* MW 9 */
+ 11896 "00000000" // /* MW 8 */
+ 11897 "01011011" // /* MW 7 */
+ 11898 "00000001" // /* MW 6 */
+ 11899 "00100000" // /* MW 5 */
+ 11900 "00000000" // /* MW 4 */
+ 11901 "11110000" // /* MW 3 */
+ 11902 "00101100" // /* MW 2 */
+ 11903 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 32 first
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 40
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.return_address
+ 11904 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11905 "01111000" // /* MW 9 */
+ 11906 "11010000" // /* MW 8 */
+ 11907 "10110011" // /* MW 7 */
+ 11908 "00101000" // /* MW 6 */
+ 11909 "00000000" // /* MW 5 */
+ 11910 "00000001" // /* MW 4 */
+ 11911 "11010000" // /* MW 3 */
+ 11912 "11000110" // /* MW 2 */
+ 11913 "11001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19
+ 11914 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11915 "11000000" // /* MW 5 */
+ 11916 "11001000" // /* MW 4 */
+ 11917 "11001100" // /* MW 3 */
+ 11918 "00000111" // /* MW 2 */
+ 11919 "00000000" // /* MW 1 */
+ 11920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11921 "00000000" // /* MW 1 */
+ 11922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11923 "00000000" // /* MW 1 */
+ 11924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11925 "00000000" // /* MW 1 */
+ 11926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11927 "00000000" // /* MW 1 */
+ 11928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11929 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 11930 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11931 "00001000" // /* MW 3 */
+ 11932 "01010001" // /* MW 2 */
+ 11933 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 11934 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11935 "00110110" // /* MW 3 */
+ 11936 "11110110" // /* MW 2 */
+ 11937 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 11938 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11939 "00011001" // /* MW 3 */
+ 11940 "11101101" // /* MW 2 */
+ 11941 "00000111" // /* MW 1 */
+ 11942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11943 "00000000" // /* MW 1 */
+ 11944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11945 "00000000" // /* MW 1 */
+ 11946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11947 "00000000" // /* MW 1 */
+ 11948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11949 "00000000" // /* MW 1 */
+ 11950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11951 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 11952 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11953 "00010001" // /* MW 3 */
+ 11954 "00100011" // /* MW 2 */
+ 11955 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 28
+ 11956 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11957 "01100011" // /* MW 5 */
+ 11958 "11101100" // /* MW 4 */
+ 11959 "11010011" // /* MW 3 */
+ 11960 "11000110" // /* MW 2 */
+ 11961 "01001010" // /* MW 1 */
+ 11962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11963 "00000000" // /* MW 1 */
+ 11964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11965 "00000000" // /* MW 1 */
+ 11966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11967 "00000000" // /* MW 1 */
+ 11968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11969 "00000000" // /* MW 1 */
+ 11970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11971 "00000000" // /* MW 1 */
+ 11972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11973 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 11974 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11975 "00001000" // /* MW 3 */
+ 11976 "01010001" // /* MW 2 */
+ 11977 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+.src_ref 7 "superkernels.cpp" 398 14
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 11978 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11979 "00010000" // /* MW 9 */
+ 11980 "00100000" // /* MW 8 */
+ 11981 "10110010" // /* MW 7 */
+ 11982 "11110000" // /* MW 6 */
+ 11983 "00000001" // /* MW 5 */
+ 11984 "00000000" // /* MW 4 */
+ 11985 "11010000" // /* MW 3 */
+ 11986 "11001110" // /* MW 2 */
+ 11987 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19 first
+ 11988 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11989 "01010110" // /* MW 3 */
+ 11990 "00000110" // /* MW 2 */
+ 11991 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 11992 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11993 "00110110" // /* MW 3 */
+ 11994 "00000110" // /* MW 2 */
+ 11995 "00000001" // /* MW 1 */
+ 11996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11997 "00000000" // /* MW 1 */
+ 11998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11999 "00000000" // /* MW 1 */
+ 12000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12001 "00000000" // /* MW 1 */
+ 12002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12003 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 12004 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12005 "00110001" // /* MW 3 */
+ 12006 "00100001" // /* MW 2 */
+ 12007 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 12008 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12009 "00010001" // /* MW 3 */
+ 12010 "11100110" // /* MW 2 */
+ 12011 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 16 first
+ 12012 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12013 "00101000" // /* MW 3 */
+ 12014 "01100001" // /* MW 2 */
+ 12015 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 12016 "10000100" // JNZ r16, #12048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12048 delay_slots=5 */
+ 12017 "00000001" // /* MW 5 */
+ 12018 "01000000" // /* MW 4 */
+ 12019 "10001000" // /* MW 3 */
+ 12020 "00010111" // /* MW 2 */
+ 12021 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12023 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12025 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12027 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12029 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12031 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14
+ 12032 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12033 "00000001" // /* MW 3 */
+ 12034 "00100000" // /* MW 2 */
+ 12035 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14 first
+ 12036 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12037 "11000001" // /* MW 11 */
+ 12038 "00001000" // /* MW 10 */
+ 12039 "10000011" // /* MW 9 */
+ 12040 "00000000" // /* MW 8 */
+ 12041 "00000000" // /* MW 7 */
+ 12042 "00000000" // /* MW 6 */
+ 12043 "00100000" // /* MW 5 */
+ 12044 "00000000" // /* MW 4 */
+ 12045 "11110000" // /* MW 3 */
+ 12046 "00101100" // /* MW 2 */
+ 12047 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560
+.src_ref 7 "superkernels.cpp" 400
+ 12048 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12049 "00111001" // /* MW 3 */
+ 12050 "11110000" // /* MW 2 */
+ 12051 "00000111" // /* MW 1 */
+ 12052 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12053 "11110001" // /* MW 3 */
+ 12054 "11111101" // /* MW 2 */
+ 12055 "00000111" // /* MW 1 */
+ 12056 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12057 "10011001" // /* MW 3 */
+ 12058 "11110111" // /* MW 2 */
+ 12059 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12061 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 12062 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12063 "11010001" // /* MW 3 */
+ 12064 "11111001" // /* MW 2 */
+ 12065 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12067 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12069 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12070 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12071 "00000000" // /* MW 3 */
+ 12072 "00101000" // /* MW 2 */
+ 12073 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12074 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12075 "00001011" // /* MW 3 */
+ 12076 "10001110" // /* MW 2 */
+ 12077 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400
+.delay_slot
+ 12078 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12079 "00000001" // /* MW 5 */
+ 12080 "00000000" // /* MW 4 */
+ 12081 "00000000" // /* MW 3 */
+ 12082 "11111000" // /* MW 2 */
+ 12083 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12085 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12087 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0
+ 12089 "00000000" // /* MW 1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0
+.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.src_ref 2 "conv2d_dw_bf16_params.h" 211 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.function_start
+ 12096 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12097 "00010000" // /* MW 9 */
+ 12098 "11000000" // /* MW 8 */
+ 12099 "10110011" // /* MW 7 */
+ 12100 "11110000" // /* MW 6 */
+ 12101 "00000001" // /* MW 5 */
+ 12102 "00000000" // /* MW 4 */
+ 12103 "11010000" // /* MW 3 */
+ 12104 "10000101" // /* MW 2 */
+ 12105 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12106 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12107 "01011000" // /* MW 9 */
+ 12108 "00000000" // /* MW 8 */
+ 12109 "00001000" // /* MW 7 */
+ 12110 "01001011" // /* MW 6 */
+ 12111 "00000000" // /* MW 5 */
+ 12112 "00000001" // /* MW 4 */
+ 12113 "11010000" // /* MW 3 */
+ 12114 "10000001" // /* MW 2 */
+ 12115 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 211
+ 12116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12117 "00000001" // /* MW 5 */
+ 12118 "00000000" // /* MW 4 */
+ 12119 "00000000" // /* MW 3 */
+ 12120 "00001000" // /* MW 2 */
+ 12121 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32
+ 12122 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12123 "00010001" // /* MW 9 */
+ 12124 "11000000" // /* MW 8 */
+ 12125 "10110011" // /* MW 7 */
+ 12126 "11110011" // /* MW 6 */
+ 12127 "00000001" // /* MW 5 */
+ 12128 "00000000" // /* MW 4 */
+ 12129 "10110000" // /* MW 3 */
+ 12130 "11110011" // /* MW 2 */
+ 12131 "11111110" // /* MW 1 */
+ 12132 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12133 "00111101" // /* MW 3 */
+ 12134 "11111100" // /* MW 2 */
+ 12135 "00001111" // /* MW 1 */
+ 12136 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12137 "11110101" // /* MW 3 */
+ 12138 "11111001" // /* MW 2 */
+ 12139 "00001111" // /* MW 1 */
+ 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12141 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12142 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12143 "00101001" // /* MW 3 */
+ 12144 "00011100" // /* MW 2 */
+ 12145 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12146 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12147 "00001001" // /* MW 3 */
+ 12148 "00011100" // /* MW 2 */
+ 12149 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12150 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12151 "00101110" // /* MW 3 */
+ 12152 "00000100" // /* MW 2 */
+ 12153 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12154 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12155 "00001110" // /* MW 3 */
+ 12156 "00010100" // /* MW 2 */
+ 12157 "00000000" // /* MW 1 */
+ 12158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12159 "00000000" // /* MW 1 */
+ 12160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12161 "00000000" // /* MW 1 */
+ 12162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12163 "00000000" // /* MW 1 */
+ 12164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12165 "00000000" // /* MW 1 */
+ 12166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12167 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12168 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12169 "00101001" // /* MW 3 */
+ 12170 "00000100" // /* MW 2 */
+ 12171 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12172 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12173 "00001001" // /* MW 3 */
+ 12174 "00010100" // /* MW 2 */
+ 12175 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first
+ 12176 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12177 "00101010" // /* MW 3 */
+ 12178 "01011110" // /* MW 2 */
+ 12179 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 52
+ 12180 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12181 "01001010" // /* MW 3 */
+ 12182 "11101110" // /* MW 2 */
+ 12183 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12184 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12185 "00101010" // /* MW 3 */
+ 12186 "11101100" // /* MW 2 */
+ 12187 "00000111" // /* MW 1 */
+ 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12189 "00000000" // /* MW 1 */
+ 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12191 "00000000" // /* MW 1 */
+ 12192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12193 "00000000" // /* MW 1 */
+ 12194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12195 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.no_stack_arguments
+ 12196 "00000100" // JL #14224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=14224 delay_slots=5 */
+ 12197 "00000001" // /* MW 5 */
+ 12198 "00000000" // /* MW 4 */
+ 12199 "11001000" // /* MW 3 */
+ 12200 "00011011" // /* MW 2 */
+ 12201 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 38
+.delay_slot
+ 12202 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12203 "01000011" // /* MW 5 */
+ 12204 "10111110" // /* MW 4 */
+ 12205 "10111000" // /* MW 3 */
+ 12206 "11001010" // /* MW 2 */
+ 12207 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+.delay_slot
+ 12208 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12209 "00010001" // /* MW 5 */
+ 12210 "11000010" // /* MW 4 */
+ 12211 "10110000" // /* MW 3 */
+ 12212 "10000110" // /* MW 2 */
+ 12213 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12214 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12215 "00010101" // /* MW 5 */
+ 12216 "11101111" // /* MW 4 */
+ 12217 "10110111" // /* MW 3 */
+ 12218 "01000010" // /* MW 2 */
+ 12219 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12220 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12221 "11110001" // /* MW 3 */
+ 12222 "00100010" // /* MW 2 */
+ 12223 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12224 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12225 "00000000" // /* MW 15 */
+ 12226 "00000000" // /* MW 14 */
+ 12227 "01111000" // /* MW 13 */
+ 12228 "10100101" // /* MW 12 */
+ 12229 "00000001" // /* MW 11 */
+ 12230 "10010000" // /* MW 10 */
+ 12231 "00001000" // /* MW 9 */
+ 12232 "00011110" // /* MW 8 */
+ 12233 "01011011" // /* MW 7 */
+ 12234 "00000001" // /* MW 6 */
+ 12235 "00100000" // /* MW 5 */
+ 12236 "00000000" // /* MW 4 */
+ 12237 "11110000" // /* MW 3 */
+ 12238 "00101100" // /* MW 2 */
+ 12239 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.return_address
+ 12240 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12241 "00000010" // /* MW 5 */
+ 12242 "01000000" // /* MW 4 */
+ 12243 "00100000" // /* MW 3 */
+ 12244 "11010010" // /* MW 2 */
+ 12245 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first
+ 12246 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12247 "01000011" // /* MW 5 */
+ 12248 "01001000" // /* MW 4 */
+ 12249 "01011000" // /* MW 3 */
+ 12250 "11000101" // /* MW 2 */
+ 12251 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 52
+ 12252 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12253 "01101010" // /* MW 3 */
+ 12254 "11101110" // /* MW 2 */
+ 12255 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12256 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12257 "00110001" // /* MW 3 */
+ 12258 "11101100" // /* MW 2 */
+ 12259 "00000111" // /* MW 1 */
+ 12260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12261 "00000000" // /* MW 1 */
+ 12262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12263 "00000000" // /* MW 1 */
+ 12264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12265 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+ 12266 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12267 "01000110" // /* MW 3 */
+ 12268 "11101001" // /* MW 2 */
+ 12269 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+ 12270 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12271 "00001010" // /* MW 3 */
+ 12272 "00110111" // /* MW 2 */
+ 12273 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first
+ 12274 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12275 "01100011" // /* MW 5 */
+ 12276 "11000110" // /* MW 4 */
+ 12277 "10111000" // /* MW 3 */
+ 12278 "01001110" // /* MW 2 */
+ 12279 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.no_stack_arguments
+ 12280 "00111010" // ST r17, [sp, #-32]; JL #14224 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=14224 delay_slots=5 */
+ 12281 "01000001" // /* MW 9 */
+ 12282 "00000000" // /* MW 8 */
+ 12283 "00000000" // /* MW 7 */
+ 12284 "11110010" // /* MW 6 */
+ 12285 "00000110" // /* MW 5 */
+ 12286 "00000000" // /* MW 4 */
+ 12287 "10110000" // /* MW 3 */
+ 12288 "01000110" // /* MW 2 */
+ 12289 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12290 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12291 "00100010" // /* MW 3 */
+ 12292 "10101001" // /* MW 2 */
+ 12293 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12294 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12295 "00001010" // /* MW 3 */
+ 12296 "01110111" // /* MW 2 */
+ 12297 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.delay_slot
+ 12298 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12299 "00010001" // /* MW 3 */
+ 12300 "00100101" // /* MW 2 */
+ 12301 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12302 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12303 "01110000" // /* MW 3 */
+ 12304 "00100110" // /* MW 2 */
+ 12305 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 87
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12306 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12307 "01100000" // /* MW 13 */
+ 12308 "00101011" // /* MW 12 */
+ 12309 "00000000" // /* MW 11 */
+ 12310 "00001001" // /* MW 10 */
+ 12311 "10011000" // /* MW 9 */
+ 12312 "00111101" // /* MW 8 */
+ 12313 "00100010" // /* MW 7 */
+ 12314 "01000001" // /* MW 6 */
+ 12315 "00100100" // /* MW 5 */
+ 12316 "00000000" // /* MW 4 */
+ 12317 "11110000" // /* MW 3 */
+ 12318 "00101100" // /* MW 2 */
+ 12319 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+.return_address
+ 12320 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12321 "01011000" // /* MW 9 */
+ 12322 "01000010" // /* MW 8 */
+ 12323 "00000000" // /* MW 7 */
+ 12324 "11001000" // /* MW 6 */
+ 12325 "00110111" // /* MW 5 */
+ 12326 "00111111" // /* MW 4 */
+ 12327 "00100000" // /* MW 3 */
+ 12328 "00001110" // /* MW 2 */
+ 12329 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12330 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12331 "01011000" // /* MW 9 */
+ 12332 "11111100" // /* MW 8 */
+ 12333 "00101001" // /* MW 7 */
+ 12334 "00001000" // /* MW 6 */
+ 12335 "10000000" // /* MW 5 */
+ 12336 "00000001" // /* MW 4 */
+ 12337 "00100000" // /* MW 3 */
+ 12338 "11000010" // /* MW 2 */
+ 12339 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53
+ 12340 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12341 "01011000" // /* MW 9 */
+ 12342 "00000010" // /* MW 8 */
+ 12343 "10001000" // /* MW 7 */
+ 12344 "10001000" // /* MW 6 */
+ 12345 "01100000" // /* MW 5 */
+ 12346 "00000000" // /* MW 4 */
+ 12347 "00100000" // /* MW 3 */
+ 12348 "11011010" // /* MW 2 */
+ 12349 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+ 12350 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12351 "01011000" // /* MW 9 */
+ 12352 "00010111" // /* MW 8 */
+ 12353 "10001000" // /* MW 7 */
+ 12354 "00001011" // /* MW 6 */
+ 12355 "01010001" // /* MW 5 */
+ 12356 "00000000" // /* MW 4 */
+ 12357 "01010000" // /* MW 3 */
+ 12358 "01000101" // /* MW 2 */
+ 12359 "11100001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76
+ 12360 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12361 "01011000" // /* MW 9 */
+ 12362 "00100000" // /* MW 8 */
+ 12363 "10000000" // /* MW 7 */
+ 12364 "01001000" // /* MW 6 */
+ 12365 "00100111" // /* MW 5 */
+ 12366 "00111111" // /* MW 4 */
+ 12367 "00100000" // /* MW 3 */
+ 12368 "01010110" // /* MW 2 */
+ 12369 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12370 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12371 "01011000" // /* MW 9 */
+ 12372 "00000001" // /* MW 8 */
+ 12373 "01001000" // /* MW 7 */
+ 12374 "11001011" // /* MW 6 */
+ 12375 "01110000" // /* MW 5 */
+ 12376 "00000001" // /* MW 4 */
+ 12377 "00100000" // /* MW 3 */
+ 12378 "01111010" // /* MW 2 */
+ 12379 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41
+ 12380 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12381 "01011000" // /* MW 9 */
+ 12382 "11000000" // /* MW 8 */
+ 12383 "11101111" // /* MW 7 */
+ 12384 "00001011" // /* MW 6 */
+ 12385 "11010000" // /* MW 5 */
+ 12386 "00000101" // /* MW 4 */
+ 12387 "10000000" // /* MW 3 */
+ 12388 "11000000" // /* MW 2 */
+ 12389 "11101001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12390 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12391 "00100001" // /* MW 3 */
+ 12392 "00101000" // /* MW 2 */
+ 12393 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12394 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12395 "00000110" // /* MW 3 */
+ 12396 "11000111" // /* MW 2 */
+ 12397 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+ 12398 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12399 "00000010" // /* MW 5 */
+ 12400 "00110110" // /* MW 4 */
+ 12401 "01010000" // /* MW 3 */
+ 12402 "11110001" // /* MW 2 */
+ 12403 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69
+ 12404 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12405 "11110101" // /* MW 5 */
+ 12406 "00111111" // /* MW 4 */
+ 12407 "01001011" // /* MW 3 */
+ 12408 "00101000" // /* MW 2 */
+ 12409 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12410 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12411 "00011101" // /* MW 5 */
+ 12412 "00100000" // /* MW 4 */
+ 12413 "11110001" // /* MW 3 */
+ 12414 "11100001" // /* MW 2 */
+ 12415 "01111000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12416 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12417 "01110000" // /* MW 3 */
+ 12418 "00101000" // /* MW 2 */
+ 12419 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+ 12420 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12421 "00000001" // /* MW 5 */
+ 12422 "10100000" // /* MW 4 */
+ 12423 "10010000" // /* MW 3 */
+ 12424 "00000000" // /* MW 2 */
+ 12425 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first
+ 12426 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12427 "00000001" // /* MW 5 */
+ 12428 "10110100" // /* MW 4 */
+ 12429 "10111101" // /* MW 3 */
+ 12430 "11100111" // /* MW 2 */
+ 12431 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first
+ 12432 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12433 "00000010" // /* MW 5 */
+ 12434 "10100011" // /* MW 4 */
+ 12435 "10110000" // /* MW 3 */
+ 12436 "00001101" // /* MW 2 */
+ 12437 "01111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 70
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first
+ 12438 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12439 "11111111" // /* MW 5 */
+ 12440 "00110101" // /* MW 4 */
+ 12441 "10110000" // /* MW 3 */
+ 12442 "11001101" // /* MW 2 */
+ 12443 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first
+ 12444 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12445 "00001111" // /* MW 3 */
+ 12446 "11001101" // /* MW 2 */
+ 12447 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first
+ 12448 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12449 "00011111" // /* MW 3 */
+ 12450 "11011111" // /* MW 2 */
+ 12451 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 79
+ 12452 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12453 "11111111" // /* MW 5 */
+ 12454 "10110011" // /* MW 4 */
+ 12455 "11111001" // /* MW 3 */
+ 12456 "01101011" // /* MW 2 */
+ 12457 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first
+ 12458 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12459 "00000111" // /* MW 3 */
+ 12460 "00110111" // /* MW 2 */
+ 12461 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first
+ 12462 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12463 "11011111" // /* MW 5 */
+ 12464 "10010000" // /* MW 4 */
+ 12465 "00110111" // /* MW 3 */
+ 12466 "11010110" // /* MW 2 */
+ 12467 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+ 12468 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12469 "01010010" // /* MW 3 */
+ 12470 "00111000" // /* MW 2 */
+ 12471 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first
+ 12472 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12473 "00101101" // /* MW 3 */
+ 12474 "00100101" // /* MW 2 */
+ 12475 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+ 12476 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12477 "00111111" // /* MW 5 */
+ 12478 "11001000" // /* MW 4 */
+ 12479 "00111000" // /* MW 3 */
+ 12480 "01001010" // /* MW 2 */
+ 12481 "11100101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first
+ 12482 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12483 "11111011" // /* MW 5 */
+ 12484 "01110010" // /* MW 4 */
+ 12485 "00111111" // /* MW 3 */
+ 12486 "11110010" // /* MW 2 */
+ 12487 "11111001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 47
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first
+ 12488 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12489 "00011111" // /* MW 5 */
+ 12490 "01110000" // /* MW 4 */
+ 12491 "00111001" // /* MW 3 */
+ 12492 "11110010" // /* MW 2 */
+ 12493 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first
+ 12494 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12495 "11111011" // /* MW 5 */
+ 12496 "11001110" // /* MW 4 */
+ 12497 "00111001" // /* MW 3 */
+ 12498 "11001110" // /* MW 2 */
+ 12499 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first
+ 12500 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12501 "11101010" // /* MW 5 */
+ 12502 "10110011" // /* MW 4 */
+ 12503 "10111001" // /* MW 3 */
+ 12504 "00110101" // /* MW 2 */
+ 12505 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first
+ 12506 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12507 "01011011" // /* MW 5 */
+ 12508 "01111011" // /* MW 4 */
+ 12509 "00111001" // /* MW 3 */
+ 12510 "11111110" // /* MW 2 */
+ 12511 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12512 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12513 "11100010" // /* MW 5 */
+ 12514 "00110011" // /* MW 4 */
+ 12515 "11111001" // /* MW 3 */
+ 12516 "00100001" // /* MW 2 */
+ 12517 "10010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first
+ 12518 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12519 "00000100" // /* MW 5 */
+ 12520 "11110011" // /* MW 4 */
+ 12521 "00111111" // /* MW 3 */
+ 12522 "10000010" // /* MW 2 */
+ 12523 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first
+ 12524 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12525 "01101101" // /* MW 3 */
+ 12526 "11111111" // /* MW 2 */
+ 12527 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 73
+ 12528 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12529 "11111111" // /* MW 5 */
+ 12530 "10111111" // /* MW 4 */
+ 12531 "00111001" // /* MW 3 */
+ 12532 "01100110" // /* MW 2 */
+ 12533 "11110000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+ 12534 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12535 "11011011" // /* MW 5 */
+ 12536 "11000110" // /* MW 4 */
+ 12537 "00111000" // /* MW 3 */
+ 12538 "10000110" // /* MW 2 */
+ 12539 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first
+ 12540 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12541 "11111111" // /* MW 5 */
+ 12542 "00110001" // /* MW 4 */
+ 12543 "00111001" // /* MW 3 */
+ 12544 "10100100" // /* MW 2 */
+ 12545 "11000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12546 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12547 "11000011" // /* MW 5 */
+ 12548 "11011011" // /* MW 4 */
+ 12549 "00110011" // /* MW 3 */
+ 12550 "11011010" // /* MW 2 */
+ 12551 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12552 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12553 "01011011" // /* MW 5 */
+ 12554 "01000011" // /* MW 4 */
+ 12555 "00111000" // /* MW 3 */
+ 12556 "11001010" // /* MW 2 */
+ 12557 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12558 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12559 "01011011" // /* MW 5 */
+ 12560 "11111100" // /* MW 4 */
+ 12561 "00111001" // /* MW 3 */
+ 12562 "10011110" // /* MW 2 */
+ 12563 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12564 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12565 "11000001" // /* MW 5 */
+ 12566 "11011010" // /* MW 4 */
+ 12567 "00111110" // /* MW 3 */
+ 12568 "11001110" // /* MW 2 */
+ 12569 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+ 12570 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12571 "11110010" // /* MW 5 */
+ 12572 "10111111" // /* MW 4 */
+ 12573 "00011110" // /* MW 3 */
+ 12574 "00100000" // /* MW 2 */
+ 12575 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12576 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12577 "10100011" // /* MW 5 */
+ 12578 "01000011" // /* MW 4 */
+ 12579 "00111000" // /* MW 3 */
+ 12580 "11011010" // /* MW 2 */
+ 12581 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140
+ 12582 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12583 "01011001" // /* MW 9 */
+ 12584 "11111111" // /* MW 8 */
+ 12585 "00001111" // /* MW 7 */
+ 12586 "01101110" // /* MW 6 */
+ 12587 "01101101" // /* MW 5 */
+ 12588 "00011111" // /* MW 4 */
+ 12589 "00110000" // /* MW 3 */
+ 12590 "11000010" // /* MW 2 */
+ 12591 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first
+ 12592 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12593 "10000001" // /* MW 5 */
+ 12594 "01101010" // /* MW 4 */
+ 12595 "00111110" // /* MW 3 */
+ 12596 "11001010" // /* MW 2 */
+ 12597 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+ 12598 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12599 "11000011" // /* MW 5 */
+ 12600 "01010010" // /* MW 4 */
+ 12601 "00111010" // /* MW 3 */
+ 12602 "11101010" // /* MW 2 */
+ 12603 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41
+ 12604 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12605 "00001000" // /* MW 11 */
+ 12606 "00010000" // /* MW 10 */
+ 12607 "01101101" // /* MW 9 */
+ 12608 "10110010" // /* MW 8 */
+ 12609 "00001000" // /* MW 7 */
+ 12610 "10101011" // /* MW 6 */
+ 12611 "01110001" // /* MW 5 */
+ 12612 "00011110" // /* MW 4 */
+ 12613 "00000111" // /* MW 3 */
+ 12614 "00010001" // /* MW 2 */
+ 12615 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first
+ 12616 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12617 "01110001" // /* MW 3 */
+ 12618 "00011110" // /* MW 2 */
+ 12619 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first
+ 12620 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12621 "11111011" // /* MW 5 */
+ 12622 "01010010" // /* MW 4 */
+ 12623 "00111000" // /* MW 3 */
+ 12624 "11000110" // /* MW 2 */
+ 12625 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+ 12626 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12627 "10000011" // /* MW 5 */
+ 12628 "01000010" // /* MW 4 */
+ 12629 "00111100" // /* MW 3 */
+ 12630 "11000010" // /* MW 2 */
+ 12631 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first
+ 12632 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12633 "11111011" // /* MW 5 */
+ 12634 "01010010" // /* MW 4 */
+ 12635 "00111001" // /* MW 3 */
+ 12636 "11000110" // /* MW 2 */
+ 12637 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12638 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12639 "10000011" // /* MW 5 */
+ 12640 "01000010" // /* MW 4 */
+ 12641 "00111100" // /* MW 3 */
+ 12642 "11000010" // /* MW 2 */
+ 12643 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first
+ 12644 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12645 "01010001" // /* MW 3 */
+ 12646 "00011110" // /* MW 2 */
+ 12647 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first
+ 12648 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12649 "00110001" // /* MW 3 */
+ 12650 "00011110" // /* MW 2 */
+ 12651 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first
+ 12652 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12653 "00010001" // /* MW 3 */
+ 12654 "00001010" // /* MW 2 */
+ 12655 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first
+ 12656 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12657 "00001010" // /* MW 3 */
+ 12658 "00000110" // /* MW 2 */
+ 12659 "00000111" // /* MW 1 */
+ 12660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12661 "00000000" // /* MW 1 */
+ 12662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12663 "00000000" // /* MW 1 */
+ 12664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12665 "00000000" // /* MW 1 */
+ 12666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12667 "00000000" // /* MW 1 */
+ 12668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12669 "00000000" // /* MW 1 */
+ 12670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12671 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 58
+ 12672 "10000100" // JZ r16, #12704 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12704 delay_slots=5 */
+ 12673 "00000001" // /* MW 5 */
+ 12674 "00000000" // /* MW 4 */
+ 12675 "11010000" // /* MW 3 */
+ 12676 "00011000" // /* MW 2 */
+ 12677 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12678 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12679 "01100000" // /* MW 3 */
+ 12680 "00111011" // /* MW 2 */
+ 12681 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12682 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12683 "00000000" // /* MW 5 */
+ 12684 "10100000" // /* MW 4 */
+ 12685 "00001001" // /* MW 3 */
+ 12686 "01111111" // /* MW 2 */
+ 12687 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12689 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12691 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12693 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12694 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12695 "00000001" // /* MW 9 */
+ 12696 "00100110" // /* MW 8 */
+ 12697 "00000000" // /* MW 7 */
+ 12698 "00000000" // /* MW 6 */
+ 12699 "01011011" // /* MW 5 */
+ 12700 "00000001" // /* MW 4 */
+ 12701 "11110000" // /* MW 3 */
+ 12702 "00101100" // /* MW 2 */
+ 12703 "00000000" // /* MW 1 */
+.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267
+ 12704 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12705 "00010000" // /* MW 9 */
+ 12706 "00110100" // /* MW 8 */
+ 12707 "00110010" // /* MW 7 */
+ 12708 "11110000" // /* MW 6 */
+ 12709 "00000001" // /* MW 5 */
+ 12710 "00000000" // /* MW 4 */
+ 12711 "00100000" // /* MW 3 */
+ 12712 "10000111" // /* MW 2 */
+ 12713 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12714 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12715 "11100010" // /* MW 5 */
+ 12716 "00000100" // /* MW 4 */
+ 12717 "01010000" // /* MW 3 */
+ 12718 "11000000" // /* MW 2 */
+ 12719 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39
+ 12720 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12721 "11101001" // /* MW 5 */
+ 12722 "00000010" // /* MW 4 */
+ 12723 "00100001" // /* MW 3 */
+ 12724 "10000011" // /* MW 2 */
+ 12725 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12726 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12727 "00100101" // /* MW 5 */
+ 12728 "00000001" // /* MW 4 */
+ 12729 "00100000" // /* MW 3 */
+ 12730 "00111110" // /* MW 2 */
+ 12731 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+ 12732 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12733 "00000001" // /* MW 5 */
+ 12734 "00000000" // /* MW 4 */
+ 12735 "00000000" // /* MW 3 */
+ 12736 "11111000" // /* MW 2 */
+ 12737 "11111111" // /* MW 1 */
+ 12738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12739 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 12740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12741 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 12742 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12743 "00010111" // /* MW 3 */
+ 12744 "00000010" // /* MW 2 */
+ 12745 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 12746 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12747 "01000001" // /* MW 5 */
+ 12748 "01110000" // /* MW 4 */
+ 12749 "00001111" // /* MW 3 */
+ 12750 "00000000" // /* MW 2 */
+ 12751 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 12752 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12753 "00010110" // /* MW 3 */
+ 12754 "01000000" // /* MW 2 */
+ 12755 "00001000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 12756 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12757 "11000000" // /* MW 3 */
+ 12758 "01100000" // /* MW 2 */
+ 12759 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12760 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12761 "00000001" // /* MW 3 */
+ 12762 "00000001" // /* MW 2 */
+ 12763 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12765 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0
+ 12767 "00000000" // /* MW 1 */
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 2 "conv2d_dw_bf16.h" 199 first
+.function_start
+ 12768 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12769 "11000000" // /* MW 3 */
+ 12770 "01010110" // /* MW 2 */
+ 12771 "00011100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 82
+ 12772 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12773 "10101001" // /* MW 5 */
+ 12774 "00000001" // /* MW 4 */
+ 12775 "11011110" // /* MW 3 */
+ 12776 "10010011" // /* MW 2 */
+ 12777 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 1 "io_buffer_main.h" 125 25
+ 12778 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12779 "00000010" // /* MW 5 */
+ 12780 "11010001" // /* MW 4 */
+ 12781 "11010110" // /* MW 3 */
+ 12782 "10000011" // /* MW 2 */
+ 12783 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 204 82 first
+ 12784 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12785 "10001010" // /* MW 3 */
+ 12786 "11101000" // /* MW 2 */
+ 12787 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4 first
+ 12788 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12789 "01000110" // /* MW 3 */
+ 12790 "11111101" // /* MW 2 */
+ 12791 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 12792 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12793 "00100110" // /* MW 3 */
+ 12794 "00111101" // /* MW 2 */
+ 12795 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 12796 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12797 "01000110" // /* MW 3 */
+ 12798 "11111111" // /* MW 2 */
+ 12799 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 12800 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12801 "00100110" // /* MW 3 */
+ 12802 "00101111" // /* MW 2 */
+ 12803 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 12804 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12805 "00000110" // /* MW 3 */
+ 12806 "00101101" // /* MW 2 */
+ 12807 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4 first
+ 12808 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12809 "01000110" // /* MW 3 */
+ 12810 "11111100" // /* MW 2 */
+ 12811 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 12812 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12813 "00100110" // /* MW 3 */
+ 12814 "00111100" // /* MW 2 */
+ 12815 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 12816 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12817 "01000110" // /* MW 3 */
+ 12818 "11111110" // /* MW 2 */
+ 12819 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 12820 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12821 "00100110" // /* MW 3 */
+ 12822 "00101110" // /* MW 2 */
+ 12823 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 12824 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12825 "00000110" // /* MW 3 */
+ 12826 "00101100" // /* MW 2 */
+ 12827 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4 first
+ 12828 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12829 "11000110" // /* MW 3 */
+ 12830 "11111100" // /* MW 2 */
+ 12831 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 12832 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12833 "10100110" // /* MW 3 */
+ 12834 "00111100" // /* MW 2 */
+ 12835 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 12836 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12837 "11000110" // /* MW 3 */
+ 12838 "11111110" // /* MW 2 */
+ 12839 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 12840 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12841 "10100110" // /* MW 3 */
+ 12842 "00101110" // /* MW 2 */
+ 12843 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 12844 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12845 "10000110" // /* MW 3 */
+ 12846 "00101100" // /* MW 2 */
+ 12847 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4 first
+ 12848 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12849 "11000110" // /* MW 3 */
+ 12850 "11111111" // /* MW 2 */
+ 12851 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+ 12852 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12853 "10100110" // /* MW 3 */
+ 12854 "00101111" // /* MW 2 */
+ 12855 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 12856 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12857 "00010000" // /* MW 9 */
+ 12858 "00110100" // /* MW 8 */
+ 12859 "00110010" // /* MW 7 */
+ 12860 "11110010" // /* MW 6 */
+ 12861 "00000001" // /* MW 5 */
+ 12862 "00000000" // /* MW 4 */
+ 12863 "11010000" // /* MW 3 */
+ 12864 "11110000" // /* MW 2 */
+ 12865 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 12866 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12867 "10000001" // /* MW 5 */
+ 12868 "11000101" // /* MW 4 */
+ 12869 "01011000" // /* MW 3 */
+ 12870 "10011000" // /* MW 2 */
+ 12871 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 12872 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12873 "00010000" // /* MW 3 */
+ 12874 "00001111" // /* MW 2 */
+ 12875 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+ 12876 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12877 "01011000" // /* MW 11 */
+ 12878 "00000000" // /* MW 10 */
+ 12879 "01100000" // /* MW 9 */
+ 12880 "01101010" // /* MW 8 */
+ 12881 "00100000" // /* MW 7 */
+ 12882 "00000000" // /* MW 6 */
+ 12883 "01101000" // /* MW 5 */
+ 12884 "00111011" // /* MW 4 */
+ 12885 "01110000" // /* MW 3 */
+ 12886 "10000101" // /* MW 2 */
+ 12887 "10000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43 first
+.src_ref 2 "conv2d_dw_bf16.h" 225 4 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 12888 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12889 "01100000" // /* MW 13 */
+ 12890 "00001001" // /* MW 12 */
+ 12891 "01100010" // /* MW 11 */
+ 12892 "00001011" // /* MW 10 */
+ 12893 "00010000" // /* MW 9 */
+ 12894 "11100000" // /* MW 8 */
+ 12895 "00101101" // /* MW 7 */
+ 12896 "00000100" // /* MW 6 */
+ 12897 "11101001" // /* MW 5 */
+ 12898 "00111000" // /* MW 4 */
+ 12899 "11010000" // /* MW 3 */
+ 12900 "10111000" // /* MW 2 */
+ 12901 "01111111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 12902 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12903 "01110010" // /* MW 9 */
+ 12904 "10010000" // /* MW 8 */
+ 12905 "10000000" // /* MW 7 */
+ 12906 "00000010" // /* MW 6 */
+ 12907 "01001011" // /* MW 5 */
+ 12908 "00001100" // /* MW 4 */
+ 12909 "11010001" // /* MW 3 */
+ 12910 "10110100" // /* MW 2 */
+ 12911 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 12912 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12913 "01111110" // /* MW 9 */
+ 12914 "11000000" // /* MW 8 */
+ 12915 "11100001" // /* MW 7 */
+ 12916 "00000011" // /* MW 6 */
+ 12917 "10010000" // /* MW 5 */
+ 12918 "10101011" // /* MW 4 */
+ 12919 "11010001" // /* MW 3 */
+ 12920 "00110000" // /* MW 2 */
+ 12921 "01101101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 12922 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12923 "01011110" // /* MW 9 */
+ 12924 "10010000" // /* MW 8 */
+ 12925 "00000111" // /* MW 7 */
+ 12926 "00000010" // /* MW 6 */
+ 12927 "11110100" // /* MW 5 */
+ 12928 "11110000" // /* MW 4 */
+ 12929 "11010001" // /* MW 3 */
+ 12930 "00001010" // /* MW 2 */
+ 12931 "01111001" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 12932 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12933 "10000010" // /* MW 5 */
+ 12934 "00000000" // /* MW 4 */
+ 12935 "01010000" // /* MW 3 */
+ 12936 "00011110" // /* MW 2 */
+ 12937 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+ 12938 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13040 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12939 "00010000" // /* MW 11 */
+ 12940 "01111000" // /* MW 10 */
+ 12941 "01111001" // /* MW 9 */
+ 12942 "00001100" // /* MW 8 */
+ 12943 "00000000" // /* MW 7 */
+ 12944 "00000000" // /* MW 6 */
+ 12945 "01001011" // /* MW 5 */
+ 12946 "00010000" // /* MW 4 */
+ 12947 "11010110" // /* MW 3 */
+ 12948 "11000000" // /* MW 2 */
+ 12949 "01101001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+ 12950 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13136 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12951 "00010000" // /* MW 11 */
+ 12952 "10101000" // /* MW 10 */
+ 12953 "10111001" // /* MW 9 */
+ 12954 "00001101" // /* MW 8 */
+ 12955 "00000000" // /* MW 7 */
+ 12956 "00000000" // /* MW 6 */
+ 12957 "01001011" // /* MW 5 */
+ 12958 "00010000" // /* MW 4 */
+ 12959 "11010010" // /* MW 3 */
+ 12960 "10010010" // /* MW 2 */
+ 12961 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 12962 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12963 "00000101" // /* MW 5 */
+ 12964 "01100001" // /* MW 4 */
+ 12965 "10000100" // /* MW 3 */
+ 12966 "00010110" // /* MW 2 */
+ 12967 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+ 12968 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12969 "10001010" // /* MW 3 */
+ 12970 "00000000" // /* MW 2 */
+ 12971 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 244 4
+ 12972 "10111010" // LDA r5, [p3]; MOVXM p3, #13200 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12973 "00010000" // /* MW 9 */
+ 12974 "11001000" // /* MW 8 */
+ 12975 "10110001" // /* MW 7 */
+ 12976 "00001101" // /* MW 6 */
+ 12977 "00000000" // /* MW 5 */
+ 12978 "00000000" // /* MW 4 */
+ 12979 "11010000" // /* MW 3 */
+ 12980 "10010110" // /* MW 2 */
+ 12981 "01100000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+ 12982 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12983 "10101000" // /* MW 9 */
+ 12984 "00000001" // /* MW 8 */
+ 12985 "10001110" // /* MW 7 */
+ 12986 "00001010" // /* MW 6 */
+ 12987 "00010100" // /* MW 5 */
+ 12988 "00000000" // /* MW 4 */
+ 12989 "11110000" // /* MW 3 */
+ 12990 "00101100" // /* MW 2 */
+ 12991 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.src_ref 2 "conv2d_dw_bf16.h" 271 12
+.src_ref 2 "conv2d_dw_bf16.h" 272 12
+.src_ref 2 "conv2d_dw_bf16.h" 273 12
+.src_ref 2 "conv2d_dw_bf16.h" 274 12
+.src_ref 2 "conv2d_dw_bf16.h" 275 12
+.src_ref 2 "conv2d_dw_bf16.h" 276 12
+.src_ref 2 "conv2d_dw_bf16.h" 277 12
+ 12992 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12993 "00000000" // /* MW 15 */
+ 12994 "00000000" // /* MW 14 */
+ 12995 "01111000" // /* MW 13 */
+ 12996 "10111001" // /* MW 12 */
+ 12997 "00001110" // /* MW 11 */
+ 12998 "00001000" // /* MW 10 */
+ 12999 "00110110" // /* MW 9 */
+ 13000 "00000000" // /* MW 8 */
+ 13001 "01011011" // /* MW 7 */
+ 13002 "00000001" // /* MW 6 */
+ 13003 "00100000" // /* MW 5 */
+ 13004 "00000000" // /* MW 4 */
+ 13005 "00000000" // /* MW 3 */
+ 13006 "10010001" // /* MW 2 */
+ 13007 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13008 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13009 "01101010" // /* MW 15 */
+ 13010 "01100011" // /* MW 14 */
+ 13011 "10101100" // /* MW 13 */
+ 13012 "00000011" // /* MW 12 */
+ 13013 "00001110" // /* MW 11 */
+ 13014 "00000010" // /* MW 10 */
+ 13015 "11010100" // /* MW 9 */
+ 13016 "00001101" // /* MW 8 */
+ 13017 "01001011" // /* MW 7 */
+ 13018 "00010000" // /* MW 6 */
+ 13019 "00100000" // /* MW 5 */
+ 13020 "00000000" // /* MW 4 */
+ 13021 "11110000" // /* MW 3 */
+ 13022 "00101100" // /* MW 2 */
+ 13023 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13024 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13025 "00011010" // /* MW 15 */
+ 13026 "01001000" // /* MW 14 */
+ 13027 "11001100" // /* MW 13 */
+ 13028 "00111111" // /* MW 12 */
+ 13029 "10111001" // /* MW 11 */
+ 13030 "11011010" // /* MW 10 */
+ 13031 "00101111" // /* MW 9 */
+ 13032 "00000100" // /* MW 8 */
+ 13033 "01001011" // /* MW 7 */
+ 13034 "00010000" // /* MW 6 */
+ 13035 "00100101" // /* MW 5 */
+ 13036 "00000000" // /* MW 4 */
+ 13037 "11010000" // /* MW 3 */
+ 13038 "10100011" // /* MW 2 */
+ 13039 "01000000" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+.loop_nesting 1
+ 13040 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13041 "01101110" // /* MW 9 */
+ 13042 "10000001" // /* MW 8 */
+ 13043 "10000100" // /* MW 7 */
+ 13044 "00000010" // /* MW 6 */
+ 13045 "11110100" // /* MW 5 */
+ 13046 "11110000" // /* MW 4 */
+ 13047 "01110001" // /* MW 3 */
+ 13048 "10110011" // /* MW 2 */
+ 13049 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13050 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13051 "00000001" // /* MW 9 */
+ 13052 "10001001" // /* MW 8 */
+ 13053 "10001010" // /* MW 7 */
+ 13054 "01000110" // /* MW 6 */
+ 13055 "00001011" // /* MW 5 */
+ 13056 "10011100" // /* MW 4 */
+ 13057 "11101010" // /* MW 3 */
+ 13058 "00111000" // /* MW 2 */
+ 13059 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13060 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13061 "00000001" // /* MW 9 */
+ 13062 "00110101" // /* MW 8 */
+ 13063 "10001001" // /* MW 7 */
+ 13064 "11000110" // /* MW 6 */
+ 13065 "10000110" // /* MW 5 */
+ 13066 "00110000" // /* MW 4 */
+ 13067 "01101010" // /* MW 3 */
+ 13068 "10110001" // /* MW 2 */
+ 13069 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13070 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13071 "00000110" // /* MW 3 */
+ 13072 "10001001" // /* MW 2 */
+ 13073 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13074 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13075 "10100001" // /* MW 7 */
+ 13076 "01001000" // /* MW 6 */
+ 13077 "10001100" // /* MW 5 */
+ 13078 "11000110" // /* MW 4 */
+ 13079 "10001110" // /* MW 3 */
+ 13080 "10110000" // /* MW 2 */
+ 13081 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13082 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13083 "10100001" // /* MW 7 */
+ 13084 "00110110" // /* MW 6 */
+ 13085 "10001010" // /* MW 5 */
+ 13086 "01000110" // /* MW 4 */
+ 13087 "00001111" // /* MW 3 */
+ 13088 "10011100" // /* MW 2 */
+ 13089 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13090 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13091 "00001110" // /* MW 3 */
+ 13092 "10001001" // /* MW 2 */
+ 13093 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13094 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13095 "11100001" // /* MW 7 */
+ 13096 "10010010" // /* MW 6 */
+ 13097 "10001011" // /* MW 5 */
+ 13098 "01000110" // /* MW 4 */
+ 13099 "00000011" // /* MW 3 */
+ 13100 "00011100" // /* MW 2 */
+ 13101 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13102 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13103 "11100001" // /* MW 7 */
+ 13104 "01010110" // /* MW 6 */
+ 13105 "10001000" // /* MW 5 */
+ 13106 "01000110" // /* MW 4 */
+ 13107 "00000111" // /* MW 3 */
+ 13108 "00011100" // /* MW 2 */
+ 13109 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13110 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13111 "01101110" // /* MW 9 */
+ 13112 "01000001" // /* MW 8 */
+ 13113 "00011000" // /* MW 7 */
+ 13114 "00000001" // /* MW 6 */
+ 13115 "00010000" // /* MW 5 */
+ 13116 "00000000" // /* MW 4 */
+ 13117 "11110000" // /* MW 3 */
+ 13118 "00101100" // /* MW 2 */
+ 13119 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13120 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13121 "01101010" // /* MW 15 */
+ 13122 "01100011" // /* MW 14 */
+ 13123 "01111100" // /* MW 13 */
+ 13124 "10100101" // /* MW 12 */
+ 13125 "00000001" // /* MW 11 */
+ 13126 "00000000" // /* MW 10 */
+ 13127 "00000000" // /* MW 9 */
+ 13128 "00000000" // /* MW 8 */
+ 13129 "01011011" // /* MW 7 */
+ 13130 "00000001" // /* MW 6 */
+ 13131 "00100000" // /* MW 5 */
+ 13132 "00000000" // /* MW 4 */
+ 13133 "11110000" // /* MW 3 */
+ 13134 "00101100" // /* MW 2 */
+ 13135 "00000000" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 13136 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13137 "00011010" // /* MW 15 */
+ 13138 "01001000" // /* MW 14 */
+ 13139 "01111100" // /* MW 13 */
+ 13140 "10100101" // /* MW 12 */
+ 13141 "00000001" // /* MW 11 */
+ 13142 "00000000" // /* MW 10 */
+ 13143 "00000000" // /* MW 9 */
+ 13144 "00000000" // /* MW 8 */
+ 13145 "01011011" // /* MW 7 */
+ 13146 "00000001" // /* MW 6 */
+ 13147 "00100000" // /* MW 5 */
+ 13148 "00000000" // /* MW 4 */
+ 13149 "11110000" // /* MW 3 */
+ 13150 "00101100" // /* MW 2 */
+ 13151 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13152 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13153 "01101110" // /* MW 9 */
+ 13154 "10000001" // /* MW 8 */
+ 13155 "10000100" // /* MW 7 */
+ 13156 "00000010" // /* MW 6 */
+ 13157 "10010000" // /* MW 5 */
+ 13158 "01110011" // /* MW 4 */
+ 13159 "11110100" // /* MW 3 */
+ 13160 "00001100" // /* MW 2 */
+ 13161 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13162 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13163 "00000001" // /* MW 7 */
+ 13164 "10001001" // /* MW 6 */
+ 13165 "10001010" // /* MW 5 */
+ 13166 "01000110" // /* MW 4 */
+ 13167 "00001011" // /* MW 3 */
+ 13168 "10011100" // /* MW 2 */
+ 13169 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13170 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13171 "00000001" // /* MW 7 */
+ 13172 "00110101" // /* MW 6 */
+ 13173 "10001001" // /* MW 5 */
+ 13174 "11000110" // /* MW 4 */
+ 13175 "10000110" // /* MW 3 */
+ 13176 "00110000" // /* MW 2 */
+ 13177 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13178 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13179 "00000110" // /* MW 3 */
+ 13180 "10001001" // /* MW 2 */
+ 13181 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13182 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13183 "10100001" // /* MW 7 */
+ 13184 "01001000" // /* MW 6 */
+ 13185 "10001100" // /* MW 5 */
+ 13186 "01000110" // /* MW 4 */
+ 13187 "00001111" // /* MW 3 */
+ 13188 "10011100" // /* MW 2 */
+ 13189 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13190 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13191 "10100001" // /* MW 9 */
+ 13192 "00110110" // /* MW 8 */
+ 13193 "10001010" // /* MW 7 */
+ 13194 "11000010" // /* MW 6 */
+ 13195 "10001110" // /* MW 5 */
+ 13196 "10110000" // /* MW 4 */
+ 13197 "11110100" // /* MW 3 */
+ 13198 "00101100" // /* MW 2 */
+ 13199 "00000000" // /* MW 1 */
+.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13200 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13201 "00011101" // /* MW 5 */
+ 13202 "00010010" // /* MW 4 */
+ 13203 "10001011" // /* MW 3 */
+ 13204 "00011110" // /* MW 2 */
+ 13205 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13206 "01011010" // MOVXM le, #13376; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13207 "11100001" // /* MW 9 */
+ 13208 "10010010" // /* MW 8 */
+ 13209 "10001011" // /* MW 7 */
+ 13210 "00000010" // /* MW 6 */
+ 13211 "01000100" // /* MW 5 */
+ 13212 "10110111" // /* MW 4 */
+ 13213 "00000001" // /* MW 3 */
+ 13214 "00000000" // /* MW 2 */
+ 13215 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13216 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13296; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13217 "11100001" // /* MW 11 */
+ 13218 "01010110" // /* MW 10 */
+ 13219 "10001000" // /* MW 9 */
+ 13220 "00000010" // /* MW 8 */
+ 13221 "00111111" // /* MW 7 */
+ 13222 "10001111" // /* MW 6 */
+ 13223 "00000001" // /* MW 5 */
+ 13224 "00000000" // /* MW 4 */
+ 13225 "01110000" // /* MW 3 */
+ 13226 "10000101" // /* MW 2 */
+ 13227 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13228 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13229 "01111111" // /* MW 3 */
+ 13230 "01110010" // /* MW 2 */
+ 13231 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13232 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13233 "10011011" // /* MW 3 */
+ 13234 "00011101" // /* MW 2 */
+ 13235 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13236 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13237 "01110100" // /* MW 3 */
+ 13238 "00011100" // /* MW 2 */
+ 13239 "00111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13240 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13241 "10110100" // /* MW 3 */
+ 13242 "01011000" // /* MW 2 */
+ 13243 "00111000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13244 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13245 "10010110" // /* MW 3 */
+ 13246 "00010001" // /* MW 2 */
+ 13247 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13248 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13249 "00010110" // /* MW 3 */
+ 13250 "00010000" // /* MW 2 */
+ 13251 "00001011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13252 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13253 "01101100" // /* MW 3 */
+ 13254 "01010000" // /* MW 2 */
+ 13255 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13256 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13257 "00010100" // /* MW 3 */
+ 13258 "01010011" // /* MW 2 */
+ 13259 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13260 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13261 "01110000" // /* MW 7 */
+ 13262 "00110110" // /* MW 6 */
+ 13263 "10101000" // /* MW 5 */
+ 13264 "00000010" // /* MW 4 */
+ 13265 "01100000" // /* MW 3 */
+ 13266 "01000010" // /* MW 2 */
+ 13267 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13268 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13269 "00000011" // /* MW 3 */
+ 13270 "00011100" // /* MW 2 */
+ 13271 "00011101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13272 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13273 "01110000" // /* MW 7 */
+ 13274 "01000101" // /* MW 6 */
+ 13275 "10000000" // /* MW 5 */
+ 13276 "00000001" // /* MW 4 */
+ 13277 "01100000" // /* MW 3 */
+ 13278 "01010010" // /* MW 2 */
+ 13279 "01000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13280 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13281 "01000001" // /* MW 7 */
+ 13282 "01101101" // /* MW 6 */
+ 13283 "10001100" // /* MW 5 */
+ 13284 "01000110" // /* MW 4 */
+ 13285 "00000111" // /* MW 3 */
+ 13286 "00011100" // /* MW 2 */
+ 13287 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13288 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13289 "01000001" // /* MW 7 */
+ 13290 "00000011" // /* MW 6 */
+ 13291 "10001001" // /* MW 5 */
+ 13292 "11000110" // /* MW 4 */
+ 13293 "10000010" // /* MW 3 */
+ 13294 "00110000" // /* MW 2 */
+ 13295 "00000010" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+.loop_nesting 2
+ 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13297 "01101110" // /* MW 9 */
+ 13298 "10000001" // /* MW 8 */
+ 13299 "10000100" // /* MW 7 */
+ 13300 "00000010" // /* MW 6 */
+ 13301 "11110100" // /* MW 5 */
+ 13302 "11110000" // /* MW 4 */
+ 13303 "01110001" // /* MW 3 */
+ 13304 "10110011" // /* MW 2 */
+ 13305 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13307 "00000001" // /* MW 9 */
+ 13308 "10001001" // /* MW 8 */
+ 13309 "10001010" // /* MW 7 */
+ 13310 "01000110" // /* MW 6 */
+ 13311 "00001011" // /* MW 5 */
+ 13312 "10011100" // /* MW 4 */
+ 13313 "11101010" // /* MW 3 */
+ 13314 "00111000" // /* MW 2 */
+ 13315 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13317 "00000001" // /* MW 9 */
+ 13318 "00110101" // /* MW 8 */
+ 13319 "10001001" // /* MW 7 */
+ 13320 "11000110" // /* MW 6 */
+ 13321 "10000110" // /* MW 5 */
+ 13322 "00110000" // /* MW 4 */
+ 13323 "01101010" // /* MW 3 */
+ 13324 "10110001" // /* MW 2 */
+ 13325 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13327 "00000110" // /* MW 3 */
+ 13328 "10001001" // /* MW 2 */
+ 13329 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13331 "10100001" // /* MW 7 */
+ 13332 "01001000" // /* MW 6 */
+ 13333 "10001100" // /* MW 5 */
+ 13334 "11000110" // /* MW 4 */
+ 13335 "10001110" // /* MW 3 */
+ 13336 "10110000" // /* MW 2 */
+ 13337 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13339 "10100001" // /* MW 7 */
+ 13340 "00110110" // /* MW 6 */
+ 13341 "10001010" // /* MW 5 */
+ 13342 "01000110" // /* MW 4 */
+ 13343 "00001111" // /* MW 3 */
+ 13344 "10011100" // /* MW 2 */
+ 13345 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13347 "00001110" // /* MW 3 */
+ 13348 "10001001" // /* MW 2 */
+ 13349 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13351 "11100001" // /* MW 7 */
+ 13352 "10010010" // /* MW 6 */
+ 13353 "10001011" // /* MW 5 */
+ 13354 "01000110" // /* MW 4 */
+ 13355 "00000011" // /* MW 3 */
+ 13356 "00011100" // /* MW 2 */
+ 13357 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13359 "11100001" // /* MW 7 */
+ 13360 "01010110" // /* MW 6 */
+ 13361 "10001000" // /* MW 5 */
+ 13362 "01000110" // /* MW 4 */
+ 13363 "00000111" // /* MW 3 */
+ 13364 "00011100" // /* MW 2 */
+ 13365 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13366 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13367 "00000101" // /* MW 5 */
+ 13368 "01100001" // /* MW 4 */
+ 13369 "11110100" // /* MW 3 */
+ 13370 "00101100" // /* MW 2 */
+ 13371 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13372 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13373 "01000001" // /* MW 3 */
+ 13374 "01101101" // /* MW 2 */
+ 13375 "10001100" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13377 "00011010" // /* MW 15 */
+ 13378 "01001000" // /* MW 14 */
+ 13379 "01111100" // /* MW 13 */
+ 13380 "10100101" // /* MW 12 */
+ 13381 "00000001" // /* MW 11 */
+ 13382 "00000000" // /* MW 10 */
+ 13383 "00000000" // /* MW 9 */
+ 13384 "00000000" // /* MW 8 */
+ 13385 "01011011" // /* MW 7 */
+ 13386 "00000001" // /* MW 6 */
+ 13387 "00100000" // /* MW 5 */
+ 13388 "00000000" // /* MW 4 */
+ 13389 "11110000" // /* MW 3 */
+ 13390 "00101100" // /* MW 2 */
+ 13391 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 4 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13392 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 13393 "01101000" // /* MW 11 */
+ 13394 "10000001" // /* MW 10 */
+ 13395 "10000100" // /* MW 9 */
+ 13396 "00000010" // /* MW 8 */
+ 13397 "00100111" // /* MW 7 */
+ 13398 "00000100" // /* MW 6 */
+ 13399 "00100000" // /* MW 5 */
+ 13400 "11100111" // /* MW 4 */
+ 13401 "11111000" // /* MW 3 */
+ 13402 "00001100" // /* MW 2 */
+ 13403 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13404 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13405 "00000001" // /* MW 7 */
+ 13406 "10001001" // /* MW 6 */
+ 13407 "10001010" // /* MW 5 */
+ 13408 "01000110" // /* MW 4 */
+ 13409 "00001011" // /* MW 3 */
+ 13410 "10011100" // /* MW 2 */
+ 13411 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13412 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13413 "00000001" // /* MW 7 */
+ 13414 "00110101" // /* MW 6 */
+ 13415 "10001001" // /* MW 5 */
+ 13416 "11000110" // /* MW 4 */
+ 13417 "10000110" // /* MW 3 */
+ 13418 "00110000" // /* MW 2 */
+ 13419 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13420 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13421 "00000110" // /* MW 3 */
+ 13422 "10001001" // /* MW 2 */
+ 13423 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13424 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13425 "10100001" // /* MW 7 */
+ 13426 "01001000" // /* MW 6 */
+ 13427 "10001100" // /* MW 5 */
+ 13428 "01000110" // /* MW 4 */
+ 13429 "00001111" // /* MW 3 */
+ 13430 "10011100" // /* MW 2 */
+ 13431 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13432 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13433 "10100001" // /* MW 7 */
+ 13434 "00110110" // /* MW 6 */
+ 13435 "10001010" // /* MW 5 */
+ 13436 "11000110" // /* MW 4 */
+ 13437 "10001110" // /* MW 3 */
+ 13438 "10110000" // /* MW 2 */
+ 13439 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13440 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13441 "00001110" // /* MW 3 */
+ 13442 "10001001" // /* MW 2 */
+ 13443 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13444 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13445 "11100001" // /* MW 3 */
+ 13446 "10010010" // /* MW 2 */
+ 13447 "10001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13448 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13449 "11100001" // /* MW 3 */
+ 13450 "01010110" // /* MW 2 */
+ 13451 "10001000" // /* MW 1 */
+ 13452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13453 "00000000" // /* MW 1 */
+ 13454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13455 "00000000" // /* MW 1 */
+ 13456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13457 "00000000" // /* MW 1 */
+ 13458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13459 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+ 13460 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13461 "10010110" // /* MW 3 */
+ 13462 "00010001" // /* MW 2 */
+ 13463 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 290 first
+ 13464 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13465 "00000000" // /* MW 5 */
+ 13466 "01010000" // /* MW 4 */
+ 13467 "11000000" // /* MW 3 */
+ 13468 "00000010" // /* MW 2 */
+ 13469 "01100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13470 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13471 "01101100" // /* MW 3 */
+ 13472 "01010000" // /* MW 2 */
+ 13473 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.delay_slot
+ 13474 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13475 "00010100" // /* MW 3 */
+ 13476 "01010011" // /* MW 2 */
+ 13477 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13478 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13479 "01101100" // /* MW 3 */
+ 13480 "01010000" // /* MW 2 */
+ 13481 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.delay_slot
+ 13482 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13483 "00010011" // /* MW 3 */
+ 13484 "10001010" // /* MW 2 */
+ 13485 "00001010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33
+.delay_slot
+ 13486 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13487 "10010011" // /* MW 3 */
+ 13488 "00111010" // /* MW 2 */
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0
+ 13489 "00001010" // /* MW 1 */
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 444 first
+.src_ref 7 "superkernels.cpp" 449 6
+.function_start
+ 13504 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13505 "10000000" // /* MW 5 */
+ 13506 "11001000" // /* MW 4 */
+ 13507 "11001000" // /* MW 3 */
+ 13508 "00000111" // /* MW 2 */
+ 13509 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6 first
+ 13510 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13511 "01000001" // /* MW 5 */
+ 13512 "00101111" // /* MW 4 */
+ 13513 "11010000" // /* MW 3 */
+ 13514 "11000010" // /* MW 2 */
+ 13515 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 444
+ 13516 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13517 "00000001" // /* MW 5 */
+ 13518 "00000000" // /* MW 4 */
+ 13519 "00000000" // /* MW 3 */
+ 13520 "00010000" // /* MW 2 */
+ 13521 "00000000" // /* MW 1 */
+ 13522 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13523 "01110000" // /* MW 7 */
+ 13524 "01110000" // /* MW 6 */
+ 13525 "00101101" // /* MW 5 */
+ 13526 "00000010" // /* MW 4 */
+ 13527 "10110000" // /* MW 3 */
+ 13528 "00111010" // /* MW 2 */
+ 13529 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+ 13530 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13531 "01110000" // /* MW 7 */
+ 13532 "11110000" // /* MW 6 */
+ 13533 "10101000" // /* MW 5 */
+ 13534 "00000001" // /* MW 4 */
+ 13535 "10110000" // /* MW 3 */
+ 13536 "10110110" // /* MW 2 */
+ 13537 "11111111" // /* MW 1 */
+ 13538 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13539 "00011101" // /* MW 3 */
+ 13540 "11101100" // /* MW 2 */
+ 13541 "00001111" // /* MW 1 */
+ 13542 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13543 "10011101" // /* MW 3 */
+ 13544 "11110111" // /* MW 2 */
+ 13545 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+ 13546 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13547 "01110000" // /* MW 7 */
+ 13548 "01100000" // /* MW 6 */
+ 13549 "11001010" // /* MW 5 */
+ 13550 "00000001" // /* MW 4 */
+ 13551 "10110000" // /* MW 3 */
+ 13552 "00000010" // /* MW 2 */
+ 13553 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6
+.src_ref 7 "superkernels.cpp" 449 16
+ 13554 "10000100" // JNZ r16, #13680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13680 delay_slots=5 */
+ 13555 "00000001" // /* MW 5 */
+ 13556 "01000000" // /* MW 4 */
+ 13557 "10111000" // /* MW 3 */
+ 13558 "00011010" // /* MW 2 */
+ 13559 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 13560 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13561 "11000000" // /* MW 3 */
+ 13562 "11010110" // /* MW 2 */
+ 13563 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 22 first
+.delay_slot
+ 13564 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13565 "10010000" // /* MW 3 */
+ 13566 "01100010" // /* MW 2 */
+ 13567 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 30
+.delay_slot
+ 13568 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13569 "11111011" // /* MW 3 */
+ 13570 "01100011" // /* MW 2 */
+ 13571 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13572 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13573 "10100000" // /* MW 5 */
+ 13574 "11001000" // /* MW 4 */
+ 13575 "11000110" // /* MW 3 */
+ 13576 "00000111" // /* MW 2 */
+ 13577 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13578 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13579 "00110001" // /* MW 3 */
+ 13580 "00000110" // /* MW 2 */
+ 13581 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13582 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13583 "00010001" // /* MW 9 */
+ 13584 "00110100" // /* MW 8 */
+ 13585 "10110010" // /* MW 7 */
+ 13586 "11110000" // /* MW 6 */
+ 13587 "00000001" // /* MW 5 */
+ 13588 "00000000" // /* MW 4 */
+ 13589 "01100000" // /* MW 3 */
+ 13590 "10010001" // /* MW 2 */
+ 13591 "11110000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13592 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13593 "00010000" // /* MW 11 */
+ 13594 "00110010" // /* MW 10 */
+ 13595 "10110010" // /* MW 9 */
+ 13596 "11110000" // /* MW 8 */
+ 13597 "00000001" // /* MW 7 */
+ 13598 "00000000" // /* MW 6 */
+ 13599 "10001011" // /* MW 5 */
+ 13600 "10001000" // /* MW 4 */
+ 13601 "11100000" // /* MW 3 */
+ 13602 "11000000" // /* MW 2 */
+ 13603 "00100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13605 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 13606 "00000100" // JL #12096 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12096 delay_slots=5 */
+ 13607 "00000001" // /* MW 5 */
+ 13608 "00000000" // /* MW 4 */
+ 13609 "10100000" // /* MW 3 */
+ 13610 "00010111" // /* MW 2 */
+ 13611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13615 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13617 "00110001" // /* MW 3 */
+ 13618 "00100000" // /* MW 2 */
+ 13619 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 13620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13621 "00000101" // /* MW 3 */
+ 13622 "00100000" // /* MW 2 */
+ 13623 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 13624 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13625 "01110000" // /* MW 7 */
+ 13626 "10100101" // /* MW 6 */
+ 13627 "00000001" // /* MW 5 */
+ 13628 "00000000" // /* MW 4 */
+ 13629 "00110000" // /* MW 3 */
+ 13630 "11000010" // /* MW 2 */
+ 13631 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+.src_ref 7 "superkernels.cpp" 461 2
+.return_address
+ 13632 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13633 "00000000" // /* MW 7 */
+ 13634 "10000010" // /* MW 6 */
+ 13635 "00110011" // /* MW 5 */
+ 13636 "00000001" // /* MW 4 */
+ 13637 "01100000" // /* MW 3 */
+ 13638 "10010001" // /* MW 2 */
+ 13639 "00110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 17 first
+ 13640 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13641 "00111010" // /* MW 3 */
+ 13642 "00000110" // /* MW 2 */
+ 13643 "00000010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13
+.src_ref 7 "superkernels.cpp" 453 15 first
+ 13644 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13645 "00010000" // /* MW 9 */
+ 13646 "00110000" // /* MW 8 */
+ 13647 "00110010" // /* MW 7 */
+ 13648 "11110001" // /* MW 6 */
+ 13649 "00000001" // /* MW 5 */
+ 13650 "00000000" // /* MW 4 */
+ 13651 "01010000" // /* MW 3 */
+ 13652 "11000011" // /* MW 2 */
+ 13653 "01000100" // /* MW 1 */
+ 13654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13655 "00000000" // /* MW 1 */
+ 13656 "10000100" // J #13696 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13696 delay_slots=5 */
+ 13657 "00000000" // /* MW 5 */
+ 13658 "00000000" // /* MW 4 */
+ 13659 "11000000" // /* MW 3 */
+ 13660 "00011010" // /* MW 2 */
+ 13661 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15
+.src_ref 7 "superkernels.cpp" 457 26
+.delay_slot
+ 13662 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13663 "10110000" // /* MW 5 */
+ 13664 "11001000" // /* MW 4 */
+ 13665 "11000110" // /* MW 3 */
+ 13666 "00000111" // /* MW 2 */
+ 13667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13671 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15 first
+.delay_slot
+ 13672 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13673 "00110001" // /* MW 3 */
+ 13674 "00000110" // /* MW 2 */
+ 13675 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13 first
+.delay_slot
+ 13676 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13677 "00010001" // /* MW 3 */
+ 13678 "00000110" // /* MW 2 */
+ 13679 "00001010" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176
+.src_ref 7 "superkernels.cpp" 457 26
+ 13680 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13681 "00000000" // /* MW 15 */
+ 13682 "00000000" // /* MW 14 */
+ 13683 "00010000" // /* MW 13 */
+ 13684 "00101100" // /* MW 12 */
+ 13685 "10110010" // /* MW 11 */
+ 13686 "11110001" // /* MW 10 */
+ 13687 "00000001" // /* MW 9 */
+ 13688 "00000000" // /* MW 8 */
+ 13689 "01011011" // /* MW 7 */
+ 13690 "00000001" // /* MW 6 */
+ 13691 "00100000" // /* MW 5 */
+ 13692 "00000000" // /* MW 4 */
+ 13693 "11110000" // /* MW 3 */
+ 13694 "00101100" // /* MW 2 */
+ 13695 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 13696 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13697 "10000110" // /* MW 3 */
+ 13698 "01100111" // /* MW 2 */
+ 13699 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15
+.src_ref 1 "io_buffer_main.h" 218 49
+ 13700 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13701 "00010000" // /* MW 9 */
+ 13702 "00101000" // /* MW 8 */
+ 13703 "00110010" // /* MW 7 */
+ 13704 "11110010" // /* MW 6 */
+ 13705 "00000001" // /* MW 5 */
+ 13706 "00000000" // /* MW 4 */
+ 13707 "11010000" // /* MW 3 */
+ 13708 "11101110" // /* MW 2 */
+ 13709 "01011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 13710 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13711 "00010110" // /* MW 3 */
+ 13712 "11111110" // /* MW 2 */
+ 13713 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 13714 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13715 "00110110" // /* MW 3 */
+ 13716 "11111110" // /* MW 2 */
+ 13717 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 13718 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13719 "01010110" // /* MW 3 */
+ 13720 "01000110" // /* MW 2 */
+ 13721 "00000010" // /* MW 1 */
+ 13722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13723 "00000000" // /* MW 1 */
+ 13724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13725 "00000000" // /* MW 1 */
+ 13726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13727 "00000000" // /* MW 1 */
+ 13728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13729 "00000000" // /* MW 1 */
+ 13730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13731 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 13732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13733 "00000010" // /* MW 3 */
+ 13734 "01100001" // /* MW 2 */
+ 13735 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 13736 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13737 "00010001" // /* MW 3 */
+ 13738 "00000110" // /* MW 2 */
+ 13739 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 13740 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13741 "11111101" // /* MW 3 */
+ 13742 "11100000" // /* MW 2 */
+ 13743 "00010111" // /* MW 1 */
+ 13744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13745 "00000000" // /* MW 1 */
+ 13746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13747 "00000000" // /* MW 1 */
+ 13748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13749 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 13750 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13751 "00001000" // /* MW 3 */
+ 13752 "10010011" // /* MW 2 */
+ 13753 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11
+.src_ref 7 "superkernels.cpp" 459 47
+.src_ref 7 "superkernels.cpp" 464 6
+.src_ref 7 "superkernels.cpp" 465 16
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 13754 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13755 "00010000" // /* MW 9 */
+ 13756 "00100000" // /* MW 8 */
+ 13757 "10110010" // /* MW 7 */
+ 13758 "11110011" // /* MW 6 */
+ 13759 "00000001" // /* MW 5 */
+ 13760 "00000000" // /* MW 4 */
+ 13761 "00000000" // /* MW 3 */
+ 13762 "00101111" // /* MW 2 */
+ 13763 "00000000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+ 13764 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13765 "11000001" // /* MW 5 */
+ 13766 "00101011" // /* MW 4 */
+ 13767 "00101000" // /* MW 3 */
+ 13768 "00000000" // /* MW 2 */
+ 13769 "00000110" // /* MW 1 */
+ 13770 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13771 "01011010" // /* MW 3 */
+ 13772 "01101000" // /* MW 2 */
+ 13773 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 13774 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13775 "10000001" // /* MW 5 */
+ 13776 "00101001" // /* MW 4 */
+ 13777 "00100111" // /* MW 3 */
+ 13778 "11010011" // /* MW 2 */
+ 13779 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15 first
+ 13780 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13781 "00110110" // /* MW 3 */
+ 13782 "00000110" // /* MW 2 */
+ 13783 "00000100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 26
+.src_ref 7 "superkernels.cpp" 461 2
+ 13784 "10111010" // LDA r16, [p3]; MOVXM p3, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13785 "00010000" // /* MW 9 */
+ 13786 "11000000" // /* MW 8 */
+ 13787 "10110011" // /* MW 7 */
+ 13788 "11110001" // /* MW 6 */
+ 13789 "00000001" // /* MW 5 */
+ 13790 "00000000" // /* MW 4 */
+ 13791 "11010000" // /* MW 3 */
+ 13792 "11000010" // /* MW 2 */
+ 13793 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 13794 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13795 "01010110" // /* MW 3 */
+ 13796 "00000110" // /* MW 2 */
+ 13797 "00000111" // /* MW 1 */
+ 13798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13799 "00000000" // /* MW 1 */
+ 13800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13801 "00000000" // /* MW 1 */
+ 13802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13803 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 13804 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13805 "01110110" // /* MW 3 */
+ 13806 "00000110" // /* MW 2 */
+ 13807 "00000101" // /* MW 1 */
+ 13808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13809 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 24 first
+ 13810 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13811 "00001111" // /* MW 3 */
+ 13812 "01100001" // /* MW 2 */
+ 13813 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 13814 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13815 "00000111" // /* MW 3 */
+ 13816 "10100010" // /* MW 2 */
+ 13817 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+ 13818 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13819 "11111101" // /* MW 3 */
+ 13820 "00100000" // /* MW 2 */
+ 13821 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2 first
+.no_stack_arguments
+ 13822 "00000100" // JL #12768 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12768 delay_slots=5 */
+ 13823 "00000001" // /* MW 5 */
+ 13824 "00000000" // /* MW 4 */
+ 13825 "11110000" // /* MW 3 */
+ 13826 "00011000" // /* MW 2 */
+ 13827 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+.delay_slot
+ 13828 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13829 "00110001" // /* MW 3 */
+ 13830 "00000110" // /* MW 2 */
+ 13831 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+.delay_slot
+ 13832 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13833 "11000001" // /* MW 3 */
+ 13834 "01001001" // /* MW 2 */
+ 13835 "00011000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 201 10 first
+.delay_slot
+ 13836 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13837 "00100101" // /* MW 3 */
+ 13838 "10110100" // /* MW 2 */
+ 13839 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16 first
+.delay_slot
+ 13840 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13841 "00010101" // /* MW 3 */
+ 13842 "10111011" // /* MW 2 */
+ 13843 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+.delay_slot
+ 13844 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13845 "11000001" // /* MW 11 */
+ 13846 "10001010" // /* MW 10 */
+ 13847 "11011111" // /* MW 9 */
+ 13848 "00000011" // /* MW 8 */
+ 13849 "00000000" // /* MW 7 */
+ 13850 "00000000" // /* MW 6 */
+ 13851 "00100000" // /* MW 5 */
+ 13852 "00000000" // /* MW 4 */
+ 13853 "11110000" // /* MW 3 */
+ 13854 "00101100" // /* MW 2 */
+ 13855 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 13856 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13857 "00001010" // /* MW 3 */
+ 13858 "01100111" // /* MW 2 */
+ 13859 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 13860 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13861 "00010110" // /* MW 3 */
+ 13862 "00000110" // /* MW 2 */
+ 13863 "00000010" // /* MW 1 */
+ 13864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13865 "00000000" // /* MW 1 */
+ 13866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13867 "00000000" // /* MW 1 */
+ 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13869 "00000000" // /* MW 1 */
+ 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13871 "00000000" // /* MW 1 */
+ 13872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13873 "00000000" // /* MW 1 */
+ 13874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13875 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 13876 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13877 "11111000" // /* MW 3 */
+ 13878 "00010000" // /* MW 2 */
+ 13879 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 13880 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13881 "00010000" // /* MW 9 */
+ 13882 "00110000" // /* MW 8 */
+ 13883 "10110010" // /* MW 7 */
+ 13884 "11110000" // /* MW 6 */
+ 13885 "00000001" // /* MW 5 */
+ 13886 "00000000" // /* MW 4 */
+ 13887 "11010000" // /* MW 3 */
+ 13888 "11000010" // /* MW 2 */
+ 13889 "01011100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19 first
+ 13890 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13891 "01010110" // /* MW 3 */
+ 13892 "00000110" // /* MW 2 */
+ 13893 "00000001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 13894 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13895 "00110110" // /* MW 3 */
+ 13896 "00000110" // /* MW 2 */
+ 13897 "00000111" // /* MW 1 */
+ 13898 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13899 "10011001" // /* MW 3 */
+ 13900 "11110100" // /* MW 2 */
+ 13901 "00000111" // /* MW 1 */
+ 13902 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13903 "11010001" // /* MW 3 */
+ 13904 "11111001" // /* MW 2 */
+ 13905 "00000111" // /* MW 1 */
+ 13906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13907 "00000000" // /* MW 1 */
+ 13908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13909 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 13910 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13911 "00000001" // /* MW 3 */
+ 13912 "11100001" // /* MW 2 */
+ 13913 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 13914 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13915 "00010001" // /* MW 3 */
+ 13916 "11100110" // /* MW 2 */
+ 13917 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 16 first
+ 13918 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13919 "00101000" // /* MW 3 */
+ 13920 "01100001" // /* MW 2 */
+ 13921 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 13922 "10000100" // JNZ r16, #13952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13952 delay_slots=5 */
+ 13923 "00000001" // /* MW 5 */
+ 13924 "01000000" // /* MW 4 */
+ 13925 "01000000" // /* MW 3 */
+ 13926 "00011011" // /* MW 2 */
+ 13927 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16
+.delay_slot
+ 13928 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13929 "00000001" // /* MW 3 */
+ 13930 "00110000" // /* MW 2 */
+ 13931 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13933 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13935 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13939 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16 first
+ 13940 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13941 "11000001" // /* MW 11 */
+ 13942 "10001000" // /* MW 10 */
+ 13943 "10000011" // /* MW 9 */
+ 13944 "00000011" // /* MW 8 */
+ 13945 "00000000" // /* MW 7 */
+ 13946 "00000000" // /* MW 6 */
+ 13947 "00100000" // /* MW 5 */
+ 13948 "00000000" // /* MW 4 */
+ 13949 "11110000" // /* MW 3 */
+ 13950 "00101100" // /* MW 2 */
+ 13951 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 467
+ 13952 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13953 "01000001" // /* MW 5 */
+ 13954 "11101101" // /* MW 4 */
+ 13955 "00101110" // /* MW 3 */
+ 13956 "10110110" // /* MW 2 */
+ 13957 "11111111" // /* MW 1 */
+ 13958 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13959 "11110001" // /* MW 3 */
+ 13960 "11110001" // /* MW 2 */
+ 13961 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467 first
+ 13962 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13963 "00000000" // /* MW 3 */
+ 13964 "00101000" // /* MW 2 */
+ 13965 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+.delay_slot
+ 13966 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13967 "00000001" // /* MW 5 */
+ 13968 "00000000" // /* MW 4 */
+ 13969 "00000000" // /* MW 3 */
+ 13970 "11110000" // /* MW 2 */
+ 13971 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13973 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13975 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13977 "00000000" // /* MW 1 */
+.delay_slot
+ 13978 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13979 "11000000" // /* MW 3 */
+ 13980 "01100010" // /* MW 2 */
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 13981 "00011111" // /* MW 1 */
+.label __Z13_b896_wrapperPPv___func_begin0
+.label _Z13_b896_wrapperPPv
+.function _b896_wrapper _Z13_b896_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 20 first
+.src_ref 0 "0_0_reloadable3.cc" 22 79
+.function_start
+ 13984 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13985 "11000000" // /* MW 3 */
+ 13986 "01100000" // /* MW 2 */
+ 13987 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 22 79 first
+ 13988 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13989 "00011110" // /* MW 3 */
+ 13990 "00011100" // /* MW 2 */
+ 13991 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 23 79 first
+ 13992 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13993 "10011110" // /* MW 3 */
+ 13994 "00101100" // /* MW 2 */
+ 13995 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 25 81 first
+ 13996 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13997 "10011110" // /* MW 3 */
+ 13998 "11110101" // /* MW 2 */
+ 13999 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 24 47 first
+ 14000 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14001 "00011110" // /* MW 3 */
+ 14002 "00000101" // /* MW 2 */
+ 14003 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 21 4 first
+.tail_call
+ 14004 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */
+ 14005 "00000000" // /* MW 5 */
+ 14006 "00000000" // /* MW 4 */
+ 14007 "01110000" // /* MW 3 */
+ 14008 "00001101" // /* MW 2 */
+ 14009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14013 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14015 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14017 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b896_wrapperPPv__end
+.label __Z13_b896_wrapperPPv___func_end0
+ 14019 "00000000" // /* MW 1 */
+.label __Z13_b901_wrapperPPv___func_begin0
+.label _Z13_b901_wrapperPPv
+.function _b901_wrapper _Z13_b901_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 29 first
+.src_ref 0 "0_0_reloadable3.cc" 31 79
+.function_start
+ 14032 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14033 "11000000" // /* MW 3 */
+ 14034 "01100000" // /* MW 2 */
+ 14035 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 31 79 first
+ 14036 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14037 "00011110" // /* MW 3 */
+ 14038 "00101100" // /* MW 2 */
+ 14039 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 33 81 first
+ 14040 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14041 "00011110" // /* MW 3 */
+ 14042 "11110101" // /* MW 2 */
+ 14043 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 32 47 first
+ 14044 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14045 "10011110" // /* MW 3 */
+ 14046 "00000100" // /* MW 2 */
+ 14047 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 30 4 first
+.tail_call
+ 14048 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */
+ 14049 "00000000" // /* MW 5 */
+ 14050 "00000000" // /* MW 4 */
+ 14051 "00011000" // /* MW 3 */
+ 14052 "00010000" // /* MW 2 */
+ 14053 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14055 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14057 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14059 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14061 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b901_wrapperPPv__end
+.label __Z13_b901_wrapperPPv___func_end0
+ 14063 "00000000" // /* MW 1 */
+.label __Z13_b906_wrapperPPv___func_begin0
+.label _Z13_b906_wrapperPPv
+.function _b906_wrapper _Z13_b906_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 37 first
+.src_ref 0 "0_0_reloadable3.cc" 39 79
+.function_start
+ 14064 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14065 "11000000" // /* MW 3 */
+ 14066 "01100000" // /* MW 2 */
+ 14067 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 39 79 first
+ 14068 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14069 "00011110" // /* MW 3 */
+ 14070 "00101100" // /* MW 2 */
+ 14071 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 41 81 first
+ 14072 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14073 "00011110" // /* MW 3 */
+ 14074 "11110101" // /* MW 2 */
+ 14075 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 40 47 first
+ 14076 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14077 "10011110" // /* MW 3 */
+ 14078 "00000100" // /* MW 2 */
+ 14079 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 38 4 first
+.tail_call
+ 14080 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */
+ 14081 "00000000" // /* MW 5 */
+ 14082 "00000000" // /* MW 4 */
+ 14083 "11001000" // /* MW 3 */
+ 14084 "00010001" // /* MW 2 */
+ 14085 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14087 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14089 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14091 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14093 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b906_wrapperPPv__end
+.label __Z13_b906_wrapperPPv___func_end0
+ 14095 "00000000" // /* MW 1 */
+.label __Z13_b881_wrapperPPv___func_begin0
+.label _Z13_b881_wrapperPPv
+.function _b881_wrapper _Z13_b881_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 45 first
+.src_ref 0 "0_0_reloadable3.cc" 47 79
+.function_start
+ 14096 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14097 "11000000" // /* MW 3 */
+ 14098 "01100000" // /* MW 2 */
+ 14099 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 47 79 first
+ 14100 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14101 "00011110" // /* MW 3 */
+ 14102 "00101100" // /* MW 2 */
+ 14103 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 49 81 first
+ 14104 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14105 "00011110" // /* MW 3 */
+ 14106 "11110101" // /* MW 2 */
+ 14107 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 48 47 first
+ 14108 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14109 "10011110" // /* MW 3 */
+ 14110 "00000100" // /* MW 2 */
+ 14111 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 46 4 first
+.tail_call
+ 14112 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */
+ 14113 "00000000" // /* MW 5 */
+ 14114 "00000000" // /* MW 4 */
+ 14115 "10001000" // /* MW 3 */
+ 14116 "00010100" // /* MW 2 */
+ 14117 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14119 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14121 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14123 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14125 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b881_wrapperPPv__end
+.label __Z13_b881_wrapperPPv___func_end0
+ 14127 "00000000" // /* MW 1 */
+.label __Z13_b891_wrapperPPv___func_begin0
+.label _Z13_b891_wrapperPPv
+.function _b891_wrapper _Z13_b891_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 53 first
+.src_ref 0 "0_0_reloadable3.cc" 55 79
+.function_start
+ 14128 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14129 "11000000" // /* MW 3 */
+ 14130 "01100000" // /* MW 2 */
+ 14131 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 55 79 first
+ 14132 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14133 "00011110" // /* MW 3 */
+ 14134 "00111100" // /* MW 2 */
+ 14135 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 56 47 first
+ 14136 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14137 "10011110" // /* MW 3 */
+ 14138 "11101100" // /* MW 2 */
+ 14139 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 58 81 first
+ 14140 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14141 "10011110" // /* MW 3 */
+ 14142 "00010101" // /* MW 2 */
+ 14143 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 57 80 first
+ 14144 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14145 "00011110" // /* MW 3 */
+ 14146 "00000101" // /* MW 2 */
+ 14147 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 54 4 first
+.tail_call
+ 14148 "10000100" // J #11488 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11488 delay_slots=5 */
+ 14149 "00000000" // /* MW 5 */
+ 14150 "00000000" // /* MW 4 */
+ 14151 "01110000" // /* MW 3 */
+ 14152 "00010110" // /* MW 2 */
+ 14153 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14155 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14157 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14159 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14161 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b891_wrapperPPv__end
+.label __Z13_b891_wrapperPPv___func_end0
+ 14163 "00000000" // /* MW 1 */
+.label __Z13_b919_wrapperPPv___func_begin0
+.label _Z13_b919_wrapperPPv
+.function _b919_wrapper _Z13_b919_wrapperPPv
+.src_ref 0 "0_0_reloadable3.cc" 62 first
+.src_ref 0 "0_0_reloadable3.cc" 64 79
+.function_start
+ 14176 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14177 "11000000" // /* MW 3 */
+ 14178 "01100000" // /* MW 2 */
+ 14179 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 64 79 first
+ 14180 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14181 "00011110" // /* MW 3 */
+ 14182 "00011100" // /* MW 2 */
+ 14183 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 65 79 first
+ 14184 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14185 "10011110" // /* MW 3 */
+ 14186 "00101100" // /* MW 2 */
+ 14187 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 67 81 first
+ 14188 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14189 "10011110" // /* MW 3 */
+ 14190 "11110101" // /* MW 2 */
+ 14191 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 66 47 first
+ 14192 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14193 "00011110" // /* MW 3 */
+ 14194 "00000101" // /* MW 2 */
+ 14195 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable3.cc" 63 4 first
+.tail_call
+ 14196 "10000100" // J #13504 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13504 delay_slots=5 */
+ 14197 "00000000" // /* MW 5 */
+ 14198 "00000000" // /* MW 4 */
+ 14199 "01100000" // /* MW 3 */
+ 14200 "00011010" // /* MW 2 */
+ 14201 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14203 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14205 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14207 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14209 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b919_wrapperPPv__end
+.label __Z13_b919_wrapperPPv___func_end0
+ 14211 "00000000" // /* MW 1 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 115 4 first
+.function_start
+ 14224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14225 "01000001" // /* MW 5 */
+ 14226 "10100000" // /* MW 4 */
+ 14227 "00101111" // /* MW 3 */
+ 14228 "11000000" // /* MW 2 */
+ 14229 "00000000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14231 "00011100" // /* MW 3 */
+ 14232 "11000110" // /* MW 2 */
+ 14233 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14235 "00011100" // /* MW 3 */
+ 14236 "11000110" // /* MW 2 */
+ 14237 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14239 "00011100" // /* MW 3 */
+ 14240 "11000110" // /* MW 2 */
+ 14241 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14243 "00011100" // /* MW 3 */
+ 14244 "11000110" // /* MW 2 */
+ 14245 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14247 "00011100" // /* MW 3 */
+ 14248 "11000110" // /* MW 2 */
+ 14249 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14251 "00011100" // /* MW 3 */
+ 14252 "11000110" // /* MW 2 */
+ 14253 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14255 "00011100" // /* MW 3 */
+ 14256 "11000110" // /* MW 2 */
+ 14257 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14259 "00011100" // /* MW 3 */
+ 14260 "11000110" // /* MW 2 */
+ 14261 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14263 "00011100" // /* MW 3 */
+ 14264 "11000110" // /* MW 2 */
+ 14265 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14267 "00011100" // /* MW 3 */
+ 14268 "11000110" // /* MW 2 */
+ 14269 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14271 "00011100" // /* MW 3 */
+ 14272 "11000110" // /* MW 2 */
+ 14273 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14275 "00011100" // /* MW 3 */
+ 14276 "11000110" // /* MW 2 */
+ 14277 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14279 "00011100" // /* MW 3 */
+ 14280 "11000110" // /* MW 2 */
+ 14281 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14283 "00011100" // /* MW 3 */
+ 14284 "11000110" // /* MW 2 */
+ 14285 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14287 "00011100" // /* MW 3 */
+ 14288 "11000110" // /* MW 2 */
+ 14289 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14291 "00011100" // /* MW 3 */
+ 14292 "11000110" // /* MW 2 */
+ 14293 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14295 "00011100" // /* MW 3 */
+ 14296 "11000110" // /* MW 2 */
+ 14297 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14299 "00011100" // /* MW 3 */
+ 14300 "11000110" // /* MW 2 */
+ 14301 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14303 "00011100" // /* MW 3 */
+ 14304 "11000110" // /* MW 2 */
+ 14305 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14307 "00011100" // /* MW 3 */
+ 14308 "11000110" // /* MW 2 */
+ 14309 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14311 "00011100" // /* MW 3 */
+ 14312 "11000110" // /* MW 2 */
+ 14313 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14315 "00011100" // /* MW 3 */
+ 14316 "11000110" // /* MW 2 */
+ 14317 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14319 "00011100" // /* MW 3 */
+ 14320 "11000110" // /* MW 2 */
+ 14321 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14323 "00011100" // /* MW 3 */
+ 14324 "11000110" // /* MW 2 */
+ 14325 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14327 "00011100" // /* MW 3 */
+ 14328 "11000110" // /* MW 2 */
+ 14329 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14331 "00011100" // /* MW 3 */
+ 14332 "11000110" // /* MW 2 */
+ 14333 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14335 "00011100" // /* MW 3 */
+ 14336 "11000110" // /* MW 2 */
+ 14337 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 14338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14339 "00011100" // /* MW 3 */
+ 14340 "11000110" // /* MW 2 */
+ 14341 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 119 first
+ 14342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 14343 "00000000" // /* MW 3 */
+ 14344 "00101000" // /* MW 2 */
+ 14345 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19 first
+.delay_slot
+ 14346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14347 "00011100" // /* MW 3 */
+ 14348 "11000110" // /* MW 2 */
+ 14349 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 14350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14351 "00011100" // /* MW 3 */
+ 14352 "11000110" // /* MW 2 */
+ 14353 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 14354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14355 "00011100" // /* MW 3 */
+ 14356 "11000110" // /* MW 2 */
+ 14357 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 14358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14359 "00011100" // /* MW 3 */
+ 14360 "11000110" // /* MW 2 */
+ 14361 "00010000" // /* MW 1 */
+.delay_slot
+ 14362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14363 "10100000" // /* MW 3 */
+ 14364 "10011111" // /* MW 2 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+ 14365 "00011000" // /* MW 1 */
+.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src"
+.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer"
+.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv"
+.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common"
+.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2"
+.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p"
+.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib"
+.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend"
+.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc"
+.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail"
+.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib"
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f228402b070aee7a7253fb79fa4b7ba5dbc4d5a3
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/Release/3_3_reloadable13.txt
@@ -0,0 +1,4968 @@
+Contents of the .debug_line section:
+
+sigmoid_carf_templated_lut.h:
+File name Line number Starting address View Stmt
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 205 0x2580 x
+elementwise_binary_shared.h 211 0x2580 1 x
+elementwise_binary_shared.h 216 0x2580 2
+elementwise_binary_shared.h 216 0x2580 3
+elementwise_binary_shared.h 216 0x258a
+elementwise_binary_shared.h 211 0x2598 x
+elementwise_binary_shared.h 212 0x259c x
+elementwise_binary_shared.h 212 0x25ac
+elementwise_binary_shared.h 213 0x25b0 x
+elementwise_binary_shared.h 213 0x25c0
+elementwise_binary_shared.h 214 0x25c4 x
+elementwise_binary_shared.h 214 0x25d4
+elementwise_binary_shared.h 216 0x25d8 x
+elementwise_binary_shared.h 217 0x25dc x
+elementwise_binary_shared.h 216 0x25e0
+elementwise_binary_shared.h 216 0x25e6 x
+elementwise_binary_shared.h 216 0x25ea
+elementwise_binary_shared.h 216 0x25ee
+elementwise_binary_shared.h 107 0x2650 x
+elementwise_binary_shared.h 119 0x2650 1
+elementwise_binary_shared.h 126 0x2650 2
+elementwise_binary_shared.h 131 0x2650 3
+elementwise_binary_shared.h 119 0x2654 x
+elementwise_binary_shared.h 122 0x2658 x
+elementwise_binary_shared.h 124 0x265c x
+elementwise_binary_shared.h 124 0x2668
+elementwise_binary_shared.h 107 0x266c
+elementwise_binary_shared.h 124 0x2672
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 65 0x2676
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 124 0x2676 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 65 0x2680 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 150 0x268c
+elementwise_binary_shared.h 119 0x2692 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x2696 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 126 0x2696 1
+elementwise_binary_shared.h 126 0x2696 2
+elementwise_binary_shared.h 131 0x2696 3
+elementwise_binary_shared.h 131 0x2696 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26a0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 126 0x26a0 1 x
+elementwise_binary_shared.h 131 0x26a0 2 x
+elementwise_binary_shared.h 171 0x26a0 3
+elementwise_binary_shared.h 131 0x26b2
+elementwise_binary_shared.h 131 0x26b2 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26b8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x26b8 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x26b8 2
+elementwise_binary_shared.h 166 0x26bc
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26c8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x26c8 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x26da x
+vector.hpp 1139 0x26e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x26e0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26e4
+vector.hpp 1159 0x26e4 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 171 0x26e4 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26f6
+vector.hpp 1139 0x26f6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x26f6 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x26f6 3
+elementwise_binary_shared.h 173 0x26f6 4
+elementwise_binary_shared.h 150 0x2710
+elementwise_binary_shared.h 150 0x2714 x
+elementwise_binary_shared.h 150 0x2718
+elementwise_binary_shared.h 150 0x271e
+elementwise_binary_shared.h 150 0x2724
+elementwise_binary_shared.h 166 0x2724 1
+elementwise_binary_shared.h 150 0x2730
+elementwise_binary_shared.h 150 0x2740
+elementwise_binary_shared.h 150 0x2740 1
+elementwise_binary_shared.h 150 0x2740 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x274a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x274a 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x274a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x274e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x274e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2752
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 171 0x2752 1
+elementwise_binary_shared.h 150 0x2758
+elementwise_binary_shared.h 150 0x275c
+elementwise_binary_shared.h 150 0x275c 1
+elementwise_binary_shared.h 150 0x2762
+elementwise_binary_shared.h 150 0x2766
+elementwise_binary_shared.h 150 0x276c
+elementwise_binary_shared.h 150 0x2774
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x2784 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x278a x
+vector.hpp 1139 0x2790 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 166 0x2790 1 x
+elementwise_binary_shared.h 166 0x2790 2 x
+elementwise_binary_shared.h 169 0x2790 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x279c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 166 0x279c 1
+elementwise_binary_shared.h 166 0x279c 2
+elementwise_binary_shared.h 171 0x279c 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27a8 x
+vector.hpp 1139 0x27a8 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x27a8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x27a8 3 x
+elementwise_binary_shared.h 173 0x27a8 4 x
+elementwise_binary_shared.h 177 0x27a8 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27b0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 166 0x27b0 1 x
+elementwise_binary_shared.h 171 0x27b0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x27b8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x27b8 2 x
+elementwise_binary_shared.h 166 0x27be x
+elementwise_binary_shared.h 166 0x27c2
+elementwise_binary_shared.h 177 0x27c2 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27ca x
+vector.hpp 1139 0x27ca 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x27ca 2 x
+elementwise_binary_shared.h 171 0x27ca 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27d0
+vector.hpp 1159 0x27d0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x27d0 2 x
+accum.hpp 1110 0x27d0 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x27d0 4 x
+elementwise_binary_shared.h 185 0x27d0 5
+elementwise_binary_shared.h 177 0x27f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2800 x
+vector.hpp 1139 0x2800 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x2800 2 x
+elementwise_binary_shared.h 171 0x2800 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2810
+vector.hpp 1159 0x2810 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x2810 2 x
+accum.hpp 1110 0x2810 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x2810 4 x
+elementwise_binary_shared.h 185 0x2810 5 x
+elementwise_binary_shared.h 177 0x2830 x
+elementwise_binary_shared.h 187 0x2840 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2846 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2846 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 185 0x2846 2 x
+elementwise_binary_shared.h 177 0x284c x
+elementwise_binary_shared.h 187 0x2852 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2856 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2856 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 185 0x2856 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2860
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2860 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 185 0x2860 2
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x31e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 199 0x31e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x31e4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x31e4 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x31ea
+io_buffer_main.h 125 0x31ea 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x31f0 x
+conv2d_dw_bf16.h 221 0x31f4 x
+conv2d_dw_bf16.h 221 0x31f8
+conv2d_dw_bf16.h 221 0x31fc
+conv2d_dw_bf16.h 221 0x3200
+conv2d_dw_bf16.h 221 0x3204
+conv2d_dw_bf16.h 222 0x3208 x
+conv2d_dw_bf16.h 222 0x320c
+conv2d_dw_bf16.h 222 0x3210
+conv2d_dw_bf16.h 222 0x3214
+conv2d_dw_bf16.h 222 0x3218
+conv2d_dw_bf16.h 223 0x321c x
+conv2d_dw_bf16.h 223 0x3220
+conv2d_dw_bf16.h 223 0x3224
+conv2d_dw_bf16.h 223 0x3228
+conv2d_dw_bf16.h 223 0x322c
+conv2d_dw_bf16.h 224 0x3230 x
+conv2d_dw_bf16.h 224 0x3234
+conv2d_dw_bf16.h 224 0x3238
+conv2d_dw_bf16.h 244 0x3238 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3242
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3242 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x3242 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3242 3 x
+conv2d_dw_bf16.h 225 0x3248
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x324c
+aie_core.h 81 0x324c 1
+aie_core.h 100 0x324c 2
+aie_core.h 100 0x324c 3
+aie_core.h 100 0x324c 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x324c 5
+vector.hpp 1139 0x324c 6
+vector.hpp 1139 0x324c 7 x
+vector.hpp 1139 0x324c 8 x
+vector.hpp 1159 0x324c 9
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x324c 10 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x324c 11
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3258
+aie_core.h 81 0x3258 1
+aie_core.h 100 0x3258 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3258 3
+vector.hpp 1139 0x3258 4
+vector.hpp 1159 0x3258 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x3258 6 x
+conv2d_dw_bf16.h 225 0x3258 7 x
+conv2d_dw_bf16.h 244 0x3258 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3266
+aie_core.h 100 0x3266 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3266 2
+vector.hpp 1159 0x3266 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x3266 4
+conv2d_dw_bf16.h 225 0x3266 5
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3270
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3270 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x3270 2
+conv2d_dw_bf16.h 225 0x3270 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x327a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x327a 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x327a 2
+conv2d_dw_bf16.h 244 0x327a 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3284
+shuffle.hpp 142 0x3284 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3284 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x328a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x328a 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x328a 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x3296
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3296 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3296 2 x
+conv2d_dw_bf16.h 250 0x3296 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x32a2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x32a2 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x32a2 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x32a8
+conv2d_dw_bf16.h 244 0x32ac
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x32b6
+shuffle.hpp 142 0x32b6 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 268 0x32b6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x32c0
+shuffle.hpp 142 0x32c0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x32c0 2
+conv2d_dw_bf16.h 271 0x32c0 3
+conv2d_dw_bf16.h 272 0x32c0 4
+conv2d_dw_bf16.h 273 0x32c0 5
+conv2d_dw_bf16.h 274 0x32c0 6
+conv2d_dw_bf16.h 275 0x32c0 7
+conv2d_dw_bf16.h 276 0x32c0 8
+conv2d_dw_bf16.h 277 0x32c0 9
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x32d0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x32d0 1
+accum.hpp 1110 0x32d0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 265 0x32d0 3 x
+conv2d_dw_bf16.h 270 0x32d0 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x32e0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x32e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x32e0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x32e0 3 x
+conv2d_dw_bf16.h 274 0x32e0 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x32f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x32f0 1 x
+vector.hpp 1139 0x32f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x32f0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x32fa
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 266 0x32fa 1 x
+conv2d_dw_bf16.h 271 0x32fa 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x3304 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3304 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3304 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3304 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x330e
+shuffle.hpp 142 0x3312
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 272 0x3312 1 x
+conv2d_dw_bf16.h 267 0x331a x
+conv2d_dw_bf16.h 276 0x331a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3322 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 268 0x3326 x
+conv2d_dw_bf16.h 273 0x3326 1 x
+conv2d_dw_bf16.h 265 0x332e x
+conv2d_dw_bf16.h 277 0x332e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3336 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x3340 x
+conv2d_dw_bf16.h 274 0x3350 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3360 x
+aie_core.h 100 0x3360 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3360 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 266 0x336a x
+conv2d_dw_bf16.h 271 0x336a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3372 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3372 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x337a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 267 0x337e x
+conv2d_dw_bf16.h 272 0x337e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3386 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 276 0x3386 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3390 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3390 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3390 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x3396 x
+conv2d_dw_bf16.h 273 0x3396 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33a0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x33a0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x33a0 2
+conv2d_dw_bf16.h 277 0x33a0 3 x
+conv2d_dw_bf16.h 250 0x33ac x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33b0 x
+vector.hpp 1139 0x33b4
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x33b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33b8 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x33bc x
+accum.hpp 1110 0x33c0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x33c4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 286 0x33c8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x33cc x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x33cc 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 285 0x33cc 2 x
+conv2d_dw_bf16.h 268 0x33d4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x33d8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x33d8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x33d8 2
+conv2d_dw_bf16.h 265 0x33e0 x
+conv2d_dw_bf16.h 270 0x33e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x33e8 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 274 0x33e8 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x33f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33f0 1 x
+vector.hpp 1139 0x33f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x33f0 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33fa
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 266 0x33fa 1 x
+conv2d_dw_bf16.h 271 0x33fa 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x3404 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3404 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3404 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3404 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x340e
+shuffle.hpp 142 0x3412
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 272 0x3412 1 x
+conv2d_dw_bf16.h 267 0x341a x
+conv2d_dw_bf16.h 276 0x341a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3422 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 268 0x3426 x
+conv2d_dw_bf16.h 273 0x3426 1 x
+conv2d_dw_bf16.h 265 0x342e x
+conv2d_dw_bf16.h 277 0x342e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3436 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x343c x
+conv2d_dw_bf16.h 274 0x3440 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3450 x
+aie_core.h 100 0x3450 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3450 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3450 3 x
+conv2d_dw_bf16.h 266 0x345c x
+conv2d_dw_bf16.h 271 0x345c 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3464 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3464 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x346c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 267 0x3470 x
+conv2d_dw_bf16.h 272 0x3470 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3478 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 276 0x3478 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3480
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 273 0x3484 x
+conv2d_dw_bf16.h 277 0x3488 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x3494 x
+accum.hpp 1110 0x3498
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 290 0x3498 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x349e x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 286 0x34a2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x34a6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x34aa x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 285 0x34aa 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x34ae x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x34ae 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 432 0xac0 x
+conv2d_bf16_params.h 438 0xac0 1 x
+conv2d_bf16_params.h 452 0xac0 2
+conv2d_bf16_params.h 453 0xac0 3
+conv2d_bf16_params.h 458 0xac0 4
+conv2d_bf16_params.h 470 0xac0 5
+conv2d_bf16_params.h 438 0xaca
+conv2d_bf16_params.h 438 0xaca 1 x
+conv2d_bf16_params.h 452 0xaca 2
+conv2d_bf16_params.h 462 0xaca 3
+conv2d_bf16_params.h 432 0xad4
+conv2d_bf16_params.h 444 0xad4 1
+conv2d_bf16_params.h 453 0xade
+conv2d_bf16_params.h 458 0xade 1
+conv2d_bf16_params.h 458 0xade 2
+conv2d_bf16_params.h 444 0xaea
+conv2d_bf16_params.h 470 0xaea 1
+conv2d_bf16_params.h 477 0xaea 2
+conv2d_bf16_params.h 557 0xaea 3
+conv2d_bf16_params.h 452 0xaf6
+conv2d_bf16_params.h 458 0xaf6 1
+conv2d_bf16_params.h 462 0xaf6 2
+conv2d_bf16_params.h 438 0xafe
+conv2d_bf16_params.h 438 0xb02
+conv2d_bf16_params.h 438 0xb06
+conv2d_bf16_params.h 438 0xb0a
+conv2d_bf16_params.h 438 0xb18
+conv2d_bf16_params.h 438 0xb1c
+conv2d_bf16_params.h 438 0xb20
+conv2d_bf16_params.h 438 0xb24
+conv2d_bf16_params.h 438 0xb32
+conv2d_bf16_params.h 438 0xb36
+conv2d_bf16_params.h 438 0xb3a
+conv2d_bf16_params.h 438 0xb3e
+conv2d_bf16_params.h 438 0xb4c
+conv2d_bf16_params.h 438 0xb50
+conv2d_bf16_params.h 444 0xb54 x
+conv2d_bf16_params.h 447 0xb58 x
+conv2d_bf16_params.h 448 0xb5c x
+conv2d_bf16_params.h 452 0xb60 x
+conv2d_bf16_params.h 453 0xb64 x
+conv2d_bf16_params.h 458 0xb68 x
+conv2d_bf16_params.h 444 0xb6e x
+conv2d_bf16_params.h 458 0xb72 x
+conv2d_bf16_params.h 462 0xb72 1 x
+conv2d_bf16_params.h 462 0xb78
+conv2d_bf16_params.h 452 0xb7c x
+conv2d_bf16_params.h 452 0xb80
+conv2d_bf16_params.h 462 0xb80 1 x
+conv2d_bf16_params.h 557 0xb80 2
+conv2d_bf16_params.h 462 0xb86
+conv2d_bf16_params.h 458 0xb8a x
+conv2d_bf16_params.h 458 0xb8e
+conv2d_bf16_params.h 458 0xb92
+conv2d_bf16_params.h 477 0xb92 1
+conv2d_bf16_params.h 557 0xb92 2 x
+conv2d_bf16_params.h 458 0xb98 x
+conv2d_bf16_params.h 458 0xb9e
+conv2d_bf16_params.h 477 0xb9e 1 x
+conv2d_bf16_params.h 458 0xba4 x
+conv2d_bf16_params.h 444 0xba8 x
+conv2d_bf16_params.h 462 0xbac x
+conv2d_bf16_params.h 470 0xbb0 x
+conv2d_bf16_params.h 470 0xbb4
+conv2d_bf16_params.h 477 0xbb4 1 x
+conv2d_bf16_params.h 477 0xbb8
+conv2d_bf16_params.h 491 0xbc8
+conv2d_bf16_params.h 492 0xbc8 1
+conv2d_bf16_params.h 495 0xbc8 2
+conv2d_bf16_params.h 502 0xbc8 3
+conv2d_bf16_params.h 533 0xbc8 4
+conv2d_bf16_params.h 539 0xbc8 5
+conv2d_bf16_params.h 557 0xbc8 6
+conv2d_bf16_params.h 621 0xbc8 7
+conv2d_bf16_params.h 645 0xbc8 8
+conv2d_bf16_params.h 709 0xbc8 9
+conv2d_bf16_params.h 477 0xbd2
+conv2d_bf16_params.h 481 0xbd2 1
+conv2d_bf16_params.h 500 0xbd2 2
+conv2d_bf16_params.h 506 0xbd2 3
+conv2d_bf16_params.h 507 0xbd2 4
+conv2d_bf16_params.h 524 0xbd2 5
+conv2d_bf16_params.h 539 0xbd2 6
+conv2d_bf16_params.h 655 0xbd2 7
+conv2d_bf16_params.h 477 0xbdc
+conv2d_bf16_params.h 504 0xbdc 1
+conv2d_bf16_params.h 510 0xbdc 2
+conv2d_bf16_params.h 520 0xbdc 3
+conv2d_bf16_params.h 700 0xbdc 4
+conv2d_bf16_params.h 477 0xbe2
+conv2d_bf16_params.h 539 0xbe2 1
+conv2d_bf16_params.h 578 0xbe2 2
+conv2d_bf16_params.h 642 0xbe2 3
+conv2d_bf16_params.h 529 0xbe6
+conv2d_bf16_params.h 642 0xbe6 1
+conv2d_bf16_params.h 642 0xbe6 2
+conv2d_bf16_params.h 655 0xbea
+conv2d_bf16_params.h 453 0xbf0
+conv2d_bf16_params.h 453 0xbf0 1
+conv2d_bf16_params.h 477 0xbf0 2
+conv2d_bf16_params.h 504 0xbf0 3
+conv2d_bf16_params.h 655 0xbf0 4
+conv2d_bf16_params.h 453 0xbfc x
+conv2d_bf16_params.h 477 0xbfc 1
+conv2d_bf16_params.h 481 0xbfc 2
+conv2d_bf16_params.h 500 0xbfc 3
+conv2d_bf16_params.h 506 0xbfc 4
+conv2d_bf16_params.h 507 0xbfc 5
+conv2d_bf16_params.h 524 0xbfc 6
+conv2d_bf16_params.h 539 0xbfc 7
+conv2d_bf16_params.h 491 0xc06
+conv2d_bf16_params.h 492 0xc06 1
+conv2d_bf16_params.h 495 0xc06 2
+conv2d_bf16_params.h 502 0xc06 3
+conv2d_bf16_params.h 510 0xc06 4
+conv2d_bf16_params.h 520 0xc06 5
+conv2d_bf16_params.h 533 0xc06 6
+conv2d_bf16_params.h 539 0xc06 7
+conv2d_bf16_params.h 557 0xc06 8
+conv2d_bf16_params.h 621 0xc06 9
+conv2d_bf16_params.h 645 0xc06 10
+conv2d_bf16_params.h 655 0xc06 11
+conv2d_bf16_params.h 700 0xc06 12
+conv2d_bf16_params.h 709 0xc06 13
+conv2d_bf16_params.h 477 0xc10
+conv2d_bf16_params.h 529 0xc10 1
+conv2d_bf16_params.h 539 0xc10 2
+conv2d_bf16_params.h 578 0xc10 3
+conv2d_bf16_params.h 642 0xc10 4
+conv2d_bf16_params.h 642 0xc10 5
+conv2d_bf16_params.h 642 0xc10 6
+conv2d_bf16_params.h 477 0xc20 x
+conv2d_bf16_params.h 495 0xc20 1 x
+conv2d_bf16_params.h 495 0xc20 2
+conv2d_bf16_params.h 682 0xc20 3
+conv2d_bf16_params.h 477 0xc2a
+conv2d_bf16_params.h 481 0xc2a 1 x
+conv2d_bf16_params.h 495 0xc2a 2
+conv2d_bf16_params.h 495 0xc2a 3
+conv2d_bf16_params.h 477 0xc34 x
+conv2d_bf16_params.h 496 0xc34 1
+conv2d_bf16_params.h 504 0xc34 2
+conv2d_bf16_params.h 539 0xc34 3
+conv2d_bf16_params.h 578 0xc34 4
+conv2d_bf16_params.h 496 0xc3e
+conv2d_bf16_params.h 499 0xc3e 1
+conv2d_bf16_params.h 504 0xc3e 2 x
+conv2d_bf16_params.h 509 0xc3e 3
+conv2d_bf16_params.h 519 0xc3e 4
+conv2d_bf16_params.h 700 0xc3e 5
+conv2d_bf16_params.h 492 0xc48 x
+conv2d_bf16_params.h 497 0xc48 1
+conv2d_bf16_params.h 509 0xc48 2
+conv2d_bf16_params.h 500 0xc52
+conv2d_bf16_params.h 520 0xc52 1 x
+conv2d_bf16_params.h 502 0xc58
+conv2d_bf16_params.h 520 0xc58 1
+conv2d_bf16_params.h 502 0xc62
+conv2d_bf16_params.h 507 0xc62 1 x
+conv2d_bf16_params.h 495 0xc68 x
+conv2d_bf16_params.h 495 0xc6c
+conv2d_bf16_params.h 495 0xc6c 1
+conv2d_bf16_params.h 610 0xc6c 2
+conv2d_bf16_params.h 709 0xc6c 3
+conv2d_bf16_params.h 507 0xc72 x
+conv2d_bf16_params.h 495 0xc76 x
+conv2d_bf16_params.h 495 0xc7a
+conv2d_bf16_params.h 506 0xc7a 1
+conv2d_bf16_params.h 519 0xc7a 2 x
+conv2d_bf16_params.h 496 0xc84 x
+conv2d_bf16_params.h 504 0xc84 1 x
+conv2d_bf16_params.h 522 0xc84 2
+conv2d_bf16_params.h 509 0xc8e x
+conv2d_bf16_params.h 496 0xc94 x
+conv2d_bf16_params.h 520 0xc94 1 x
+conv2d_bf16_params.h 529 0xc94 2
+conv2d_bf16_params.h 497 0xc9e x
+conv2d_bf16_params.h 509 0xc9e 1 x
+conv2d_bf16_params.h 533 0xc9e 2
+conv2d_bf16_params.h 539 0xca8 x
+conv2d_bf16_params.h 499 0xcac x
+conv2d_bf16_params.h 499 0xcb0
+conv2d_bf16_params.h 529 0xcb4 x
+conv2d_bf16_params.h 507 0xcb8 x
+conv2d_bf16_params.h 511 0xcb8 1
+conv2d_bf16_params.h 491 0xcbe x
+conv2d_bf16_params.h 507 0xcbe 1
+conv2d_bf16_params.h 500 0xcc8 x
+conv2d_bf16_params.h 511 0xcc8 1 x
+conv2d_bf16_params.h 500 0xcce
+conv2d_bf16_params.h 534 0xcce 1
+conv2d_bf16_params.h 502 0xcd6 x
+conv2d_bf16_params.h 509 0xcd6 1 x
+conv2d_bf16_params.h 642 0xcd6 2
+conv2d_bf16_params.h 510 0xce2 x
+conv2d_bf16_params.h 506 0xce6 x
+conv2d_bf16_params.h 527 0xcea x
+conv2d_bf16_params.h 502 0xcf4 x
+conv2d_bf16_params.h 502 0xcf8
+conv2d_bf16_params.h 506 0xcfc x
+conv2d_bf16_params.h 506 0xd0c
+conv2d_bf16_params.h 506 0xd10
+conv2d_bf16_params.h 510 0xd14 x
+conv2d_bf16_params.h 510 0xd18
+conv2d_bf16_params.h 510 0xd1e
+conv2d_bf16_params.h 510 0xd22
+conv2d_bf16_params.h 510 0xd28
+conv2d_bf16_params.h 539 0xd28 1
+conv2d_bf16_params.h 642 0xd28 2
+conv2d_bf16_params.h 511 0xd2e x
+conv2d_bf16_params.h 524 0xd2e 1
+conv2d_bf16_params.h 539 0xd2e 2
+conv2d_bf16_params.h 512 0xd34 x
+conv2d_bf16_params.h 524 0xd34 1 x
+conv2d_bf16_params.h 524 0xd3a
+conv2d_bf16_params.h 524 0xd3e
+conv2d_bf16_params.h 520 0xd42 x
+conv2d_bf16_params.h 511 0xd46 x
+conv2d_bf16_params.h 522 0xd46 1 x
+conv2d_bf16_params.h 524 0xd4c x
+conv2d_bf16_params.h 529 0xd4c 1 x
+conv2d_bf16_params.h 539 0xd4c 2 x
+conv2d_bf16_params.h 534 0xd56
+conv2d_bf16_params.h 539 0xd56 1
+conv2d_bf16_params.h 527 0xd5c x
+conv2d_bf16_params.h 533 0xd5c 1 x
+conv2d_bf16_params.h 529 0xd6a x
+conv2d_bf16_params.h 533 0xd6a 1
+conv2d_bf16_params.h 539 0xd70 x
+conv2d_bf16_params.h 529 0xd76 x
+conv2d_bf16_params.h 529 0xd76 1
+conv2d_bf16_params.h 529 0xd7c
+conv2d_bf16_params.h 534 0xd80 x
+conv2d_bf16_params.h 534 0xd84
+conv2d_bf16_params.h 539 0xd84 1 x
+conv2d_bf16_params.h 555 0xd84 2
+conv2d_bf16_params.h 559 0xd84 3
+conv2d_bf16_params.h 700 0xd84 4
+conv2d_bf16_params.h 669 0xd8e
+conv2d_bf16_params.h 700 0xd8e 1
+conv2d_bf16_params.h 539 0xd92
+conv2d_bf16_params.h 539 0xda2
+conv2d_bf16_params.h 539 0xdb2
+conv2d_bf16_params.h 539 0xdb2 1
+conv2d_bf16_params.h 539 0xdb2 2
+conv2d_bf16_params.h 539 0xdb2 3
+conv2d_bf16_params.h 539 0xdbc
+conv2d_bf16_params.h 539 0xdc0
+conv2d_bf16_params.h 539 0xdc4
+conv2d_bf16_params.h 539 0xdc4 1
+conv2d_bf16_params.h 539 0xdca
+conv2d_bf16_params.h 539 0xdce
+conv2d_bf16_params.h 539 0xdd2
+conv2d_bf16_params.h 669 0xdd2 1
+conv2d_bf16_params.h 539 0xdd8
+conv2d_bf16_params.h 539 0xddc
+conv2d_bf16_params.h 539 0xde0
+conv2d_bf16_params.h 539 0xde4
+conv2d_bf16_params.h 555 0xde8 x
+conv2d_bf16_params.h 642 0xdf0
+conv2d_bf16_params.h 669 0xdf0 1
+conv2d_bf16_params.h 669 0xdf0 2
+conv2d_bf16_params.h 669 0xdfa x
+conv2d_bf16_params.h 497 0xdfe x
+conv2d_bf16_params.h 641 0xdfe 1 x
+conv2d_bf16_params.h 645 0xdfe 2
+conv2d_bf16_params.h 559 0xe08 x
+conv2d_bf16_params.h 640 0xe08 1
+conv2d_bf16_params.h 642 0xe08 2
+conv2d_bf16_params.h 642 0xe08 3
+conv2d_bf16_params.h 642 0xe12 x
+conv2d_bf16_params.h 578 0xe16 x
+conv2d_bf16_params.h 640 0xe1a x
+conv2d_bf16_params.h 557 0xe1e
+conv2d_bf16_params.h 645 0xe1e 1
+conv2d_bf16_params.h 641 0xe28 x
+conv2d_bf16_params.h 642 0xe28 1 x
+conv2d_bf16_params.h 642 0xe2e
+conv2d_bf16_params.h 642 0xe2e 1
+conv2d_bf16_params.h 558 0xe32 x
+conv2d_bf16_params.h 645 0xe32 1
+conv2d_bf16_params.h 540 0xe38
+conv2d_bf16_params.h 645 0xe38 1 x
+conv2d_bf16_params.h 540 0xe3e x
+conv2d_bf16_params.h 557 0xe3e 1
+conv2d_bf16_params.h 642 0xe44 x
+conv2d_bf16_params.h 557 0xe48 x
+conv2d_bf16_params.h 655 0xe48 1
+conv2d_bf16_params.h 558 0xe4e
+conv2d_bf16_params.h 655 0xe4e 1 x
+conv2d_bf16_params.h 558 0xe54 x
+conv2d_bf16_params.h 540 0xe58 x
+conv2d_bf16_params.h 655 0xe58 1
+conv2d_bf16_params.h 655 0xe58 2
+conv2d_bf16_params.h 679 0xe58 3
+conv2d_bf16_params.h 655 0xe62 x
+conv2d_bf16_params.h 558 0xe66 x
+conv2d_bf16_params.h 655 0xe66 1
+conv2d_bf16_params.h 655 0xe66 2
+conv2d_bf16_params.h 679 0xe66 3
+conv2d_bf16_params.h 655 0xe70 x
+conv2d_bf16_params.h 126 0xe74 x
+conv2d_bf16_params.h 559 0xe74 1 x
+conv2d_bf16_params.h 669 0xe7a x
+conv2d_bf16_params.h 700 0xe7a 1
+conv2d_bf16_params.h 558 0xe80 x
+conv2d_bf16_params.h 700 0xe86 x
+conv2d_bf16_params.h 578 0xe8a x
+conv2d_bf16_params.h 559 0xe8e x
+conv2d_bf16_params.h 578 0xe92 x
+conv2d_bf16_params.h 610 0xe96 x
+conv2d_bf16_params.h 611 0xe96 1
+conv2d_bf16_params.h 621 0xe96 2
+conv2d_bf16_params.h 621 0xe96 3
+conv2d_bf16_params.h 629 0xe96 4
+conv2d_bf16_params.h 621 0xea2
+conv2d_bf16_params.h 621 0xea2 1 x
+conv2d_bf16_params.h 645 0xea2 2
+conv2d_bf16_params.h 649 0xea2 3
+conv2d_bf16_params.h 645 0xea8
+conv2d_bf16_params.h 554 0xeae x
+conv2d_bf16_params.h 645 0xeae 1 x
+conv2d_bf16_params.h 554 0xeb8
+conv2d_bf16_params.h 555 0xeb8 1
+conv2d_bf16_params.h 555 0xeb8 2 x
+conv2d_bf16_params.h 645 0xeb8 3
+conv2d_bf16_params.h 555 0xec4
+conv2d_bf16_params.h 621 0xec4 1
+conv2d_bf16_params.h 621 0xec4 2 x
+conv2d_bf16_params.h 645 0xec4 3
+conv2d_bf16_params.h 558 0xece x
+conv2d_bf16_params.h 559 0xece 1
+conv2d_bf16_params.h 621 0xece 2
+conv2d_bf16_params.h 621 0xece 3
+conv2d_bf16_params.h 645 0xece 4
+conv2d_bf16_params.h 559 0xeda x
+conv2d_bf16_params.h 621 0xeda 1 x
+conv2d_bf16_params.h 645 0xeda 2 x
+conv2d_bf16_params.h 610 0xee0 x
+conv2d_bf16_params.h 621 0xee0 1
+conv2d_bf16_params.h 655 0xee0 2
+conv2d_bf16_params.h 679 0xee0 3
+conv2d_bf16_params.h 621 0xeec
+conv2d_bf16_params.h 649 0xeec 1
+conv2d_bf16_params.h 655 0xeec 2 x
+conv2d_bf16_params.h 661 0xeec 3
+conv2d_bf16_params.h 127 0xef6 x
+conv2d_bf16_params.h 127 0xef6 1 x
+conv2d_bf16_params.h 621 0xef6 2
+conv2d_bf16_params.h 649 0xef6 3
+conv2d_bf16_params.h 655 0xef6 4
+conv2d_bf16_params.h 679 0xef6 5
+conv2d_bf16_params.h 710 0xef6 6
+conv2d_bf16_params.h 710 0xef6 7
+conv2d_bf16_params.h 655 0xf00 x
+conv2d_bf16_params.h 679 0xf00 1 x
+conv2d_bf16_params.h 621 0xf06 x
+conv2d_bf16_params.h 649 0xf06 1 x
+conv2d_bf16_params.h 655 0xf06 2
+conv2d_bf16_params.h 655 0xf06 3
+conv2d_bf16_params.h 700 0xf06 4
+conv2d_bf16_params.h 700 0xf06 5
+conv2d_bf16_params.h 655 0xf10 x
+conv2d_bf16_params.h 700 0xf10 1 x
+conv2d_bf16_params.h 629 0xf14 x
+conv2d_bf16_params.h 611 0xf18 x
+conv2d_bf16_params.h 643 0xf26 x
+conv2d_bf16_params.h 664 0xf2a
+conv2d_bf16_params.h 621 0xf30 x
+conv2d_bf16_params.h 629 0xf30 1
+conv2d_bf16_params.h 684 0xf30 2 x
+conv2d_bf16_params.h 629 0xf3a x
+conv2d_bf16_params.h 127 0xf40 x
+conv2d_bf16_params.h 644 0xf40 1
+conv2d_bf16_params.h 700 0xf40 2 x
+conv2d_bf16_params.h 705 0xf40 3
+conv2d_bf16_params.h 705 0xf40 4
+conv2d_bf16_params.h 645 0xf4a x
+conv2d_bf16_params.h 700 0xf4a 1
+conv2d_bf16_params.h 700 0xf4a 2
+conv2d_bf16_params.h 705 0xf4a 3
+conv2d_bf16_params.h 644 0xf54
+conv2d_bf16_params.h 649 0xf54 1 x
+conv2d_bf16_params.h 674 0xf54 2
+conv2d_bf16_params.h 644 0xf5e x
+conv2d_bf16_params.h 662 0xf5e 1
+conv2d_bf16_params.h 664 0xf5e 2 x
+conv2d_bf16_params.h 127 0xf68 x
+conv2d_bf16_params.h 663 0xf68 1 x
+conv2d_bf16_params.h 664 0xf68 2
+conv2d_bf16_params.h 126 0xf6e x
+conv2d_bf16_params.h 664 0xf6e 1 x
+conv2d_bf16_params.h 126 0xf74
+conv2d_bf16_params.h 664 0xf74 1
+conv2d_bf16_params.h 127 0xf7a x
+conv2d_bf16_params.h 127 0xf7a 1 x
+conv2d_bf16_params.h 664 0xf7a 2
+conv2d_bf16_params.h 664 0xf7a 3
+conv2d_bf16_params.h 675 0xf7a 4
+conv2d_bf16_params.h 696 0xf7a 5
+conv2d_bf16_params.h 644 0xf84 x
+conv2d_bf16_params.h 664 0xf84 1 x
+conv2d_bf16_params.h 705 0xf84 2
+conv2d_bf16_params.h 664 0xf8e
+conv2d_bf16_params.h 705 0xf8e 1 x
+conv2d_bf16_params.h 705 0xf8e 2 x
+conv2d_bf16_params.h 127 0xf94
+conv2d_bf16_params.h 674 0xf94 1 x
+conv2d_bf16_params.h 675 0xf94 2 x
+conv2d_bf16_params.h 682 0xf94 3
+conv2d_bf16_params.h 718 0xf94 4
+conv2d_bf16_params.h 720 0xf94 5
+conv2d_bf16_params.h 127 0xf9e x
+conv2d_bf16_params.h 642 0xf9e 1
+conv2d_bf16_params.h 675 0xf9e 2
+conv2d_bf16_params.h 675 0xfa8 x
+conv2d_bf16_params.h 707 0xfa8 1 x
+conv2d_bf16_params.h 642 0xfae
+conv2d_bf16_params.h 674 0xfae 1 x
+conv2d_bf16_params.h 675 0xfae 2
+conv2d_bf16_params.h 642 0xfb8 x
+conv2d_bf16_params.h 655 0xfb8 1
+conv2d_bf16_params.h 655 0xfb8 2
+conv2d_bf16_params.h 675 0xfb8 3 x
+conv2d_bf16_params.h 679 0xfb8 4
+conv2d_bf16_params.h 679 0xfb8 5
+conv2d_bf16_params.h 655 0xfc4 x
+conv2d_bf16_params.h 679 0xfc4 1 x
+conv2d_bf16_params.h 713 0xfc4 2
+conv2d_bf16_params.h 691 0xfca x
+conv2d_bf16_params.h 675 0xfce
+conv2d_bf16_params.h 675 0xfce 1 x
+conv2d_bf16_params.h 709 0xfce 2 x
+conv2d_bf16_params.h 675 0xfd8
+conv2d_bf16_params.h 706 0xfd8 1 x
+conv2d_bf16_params.h 706 0xfd8 2
+conv2d_bf16_params.h 709 0xfd8 3
+conv2d_bf16_params.h 682 0xfe4 x
+conv2d_bf16_params.h 706 0xfe4 1
+conv2d_bf16_params.h 126 0xfea x
+conv2d_bf16_params.h 696 0xfea 1 x
+conv2d_bf16_params.h 127 0xff0 x
+conv2d_bf16_params.h 127 0xff0 1 x
+conv2d_bf16_params.h 696 0xff0 2
+conv2d_bf16_params.h 696 0xff6 x
+conv2d_bf16_params.h 713 0xff6 1 x
+conv2d_bf16_params.h 696 0xffc
+conv2d_bf16_params.h 706 0xffc 1
+conv2d_bf16_params.h 706 0xffc 2 x
+conv2d_bf16_params.h 706 0x1006
+conv2d_bf16_params.h 696 0x100a x
+conv2d_bf16_params.h 707 0x100a 1 x
+conv2d_bf16_params.h 696 0x1010
+conv2d_bf16_params.h 709 0x1010 1 x
+conv2d_bf16_params.h 696 0x1016 x
+conv2d_bf16_params.h 709 0x1016 1
+conv2d_bf16_params.h 707 0x1020 x
+conv2d_bf16_params.h 708 0x1020 1
+conv2d_bf16_params.h 710 0x1020 2 x
+conv2d_bf16_params.h 710 0x1020 3 x
+conv2d_bf16_params.h 708 0x102c x
+conv2d_bf16_params.h 713 0x102c 1 x
+conv2d_bf16_params.h 709 0x1036 x
+conv2d_bf16_params.h 800 0x1036 1 x
+conv2d_bf16_params.h 710 0x103c x
+conv2d_bf16_params.h 718 0x1044 x
+conv2d_bf16_params.h 718 0x1048
+conv2d_bf16_params.h 720 0x104c x
+conv2d_bf16_params.h 800 0x104c 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x1060
+utils.h 531 0x1060 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 689 0x1060 2 x
+conv2d_bf16.h 698 0x1060 3
+conv2d_bf16.h 704 0x1060 4
+conv2d_bf16.h 707 0x1060 5
+conv2d_bf16.h 707 0x1060 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x106c
+utils.h 526 0x106c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 698 0x106c 2 x
+conv2d_bf16.h 704 0x106c 3 x
+conv2d_bf16.h 707 0x106c 4
+conv2d_bf16.h 707 0x106c 5
+conv2d_bf16.h 698 0x107a
+conv2d_bf16.h 702 0x107a 1
+conv2d_bf16.h 698 0x1084
+conv2d_bf16.h 702 0x1084 1 x
+conv2d_bf16.h 699 0x108e x
+conv2d_bf16.h 702 0x108e 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1098
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 702 0x1098 1 x
+conv2d_bf16.h 702 0x109e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x10a6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x10a6 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x10ac x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 704 0x10b0 x
+conv2d_bf16.h 702 0x10b4 x
+conv2d_bf16.h 705 0x10b4 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x10ba x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 707 0x10ba 1
+conv2d_bf16.h 707 0x10ba 2
+conv2d_bf16.h 704 0x10c0 x
+conv2d_bf16.h 705 0x10c6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x10d0 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x10d0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x10d0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x10e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x10e0 1 x
+conv2d_bf16.h 704 0x10f0 x
+conv2d_bf16.h 705 0x1100 x
+conv2d_bf16.h 707 0x1100 1 x
+conv2d_bf16.h 707 0x1100 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1110 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x1110 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x1110 2
+conv2d_bf16.h 708 0x1110 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1120
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1120 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x1120 2 x
+conv2d_bf16.h 707 0x1132 x
+conv2d_bf16.h 707 0x1132 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1136 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x1136 1 x
+conv2d_bf16.h 708 0x1136 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x113e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x113e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1142 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1146 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x1146 1 x
+conv2d_bf16.h 707 0x1146 2 x
+conv2d_bf16.h 707 0x1146 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x114e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x114e 1 x
+conv2d_bf16.h 708 0x114e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1156 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 707 0x115a x
+conv2d_bf16.h 707 0x115a 1 x
+conv2d_bf16.h 723 0x115a 2 x
+conv2d_bf16.h 708 0x1160 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1164 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1170
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1836 0x1170 1
+conv2d_bf16.h 1836 0x1170 2 x
+conv2d_bf16.h 1836 0x1170 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 240 0x1170 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1836 0x117e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 241 0x117e 1
+conv2d_bf16_params.h 242 0x117e 2
+conv2d_bf16_params.h 250 0x117e 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 866 0x118a
+conv2d_bf16.h 876 0x118a 1
+conv2d_bf16.h 876 0x118a 2
+conv2d_bf16.h 881 0x118a 3
+conv2d_bf16.h 1836 0x118a 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 242 0x118a 5
+conv2d_bf16_params.h 242 0x118a 6
+conv2d_bf16_params.h 242 0x118a 7
+conv2d_bf16_params.h 242 0x118a 8
+conv2d_bf16_params.h 242 0x118a 9
+conv2d_bf16_params.h 243 0x118a 10
+conv2d_bf16_params.h 245 0x118a 11
+conv2d_bf16_params.h 250 0x118a 12
+conv2d_bf16_params.h 250 0x118a 13
+conv2d_bf16_params.h 240 0x1196
+conv2d_bf16_params.h 240 0x1196 1 x
+conv2d_bf16_params.h 242 0x11a2
+conv2d_bf16_params.h 245 0x11a2 1
+conv2d_bf16_params.h 242 0x11ae
+conv2d_bf16_params.h 244 0x11ae 1
+conv2d_bf16_params.h 244 0x11ae 2
+conv2d_bf16_params.h 249 0x11ae 3
+conv2d_bf16_params.h 243 0x11ba
+conv2d_bf16_params.h 244 0x11ba 1
+conv2d_bf16_params.h 250 0x11ba 2
+conv2d_bf16_params.h 244 0x11c6
+conv2d_bf16_params.h 240 0x11d4
+conv2d_bf16_params.h 240 0x11d8
+conv2d_bf16_params.h 241 0x11d8 1 x
+conv2d_bf16_params.h 242 0x11de x
+conv2d_bf16_params.h 242 0x11de 1 x
+conv2d_bf16_params.h 245 0x11e4 x
+conv2d_bf16_params.h 242 0x11f2 x
+conv2d_bf16_params.h 242 0x11f6
+conv2d_bf16_params.h 242 0x11fa
+conv2d_bf16_params.h 241 0x11fe x
+conv2d_bf16_params.h 242 0x11fe 1
+conv2d_bf16_params.h 242 0x1204 x
+conv2d_bf16_params.h 242 0x1208
+conv2d_bf16_params.h 242 0x120c
+conv2d_bf16_params.h 242 0x1210
+conv2d_bf16_params.h 242 0x1210 1
+conv2d_bf16_params.h 242 0x1216
+conv2d_bf16_params.h 243 0x121a x
+conv2d_bf16_params.h 242 0x121e x
+conv2d_bf16_params.h 243 0x121e 1
+conv2d_bf16_params.h 244 0x1224 x
+conv2d_bf16_params.h 245 0x1224 1 x
+conv2d_bf16_params.h 244 0x1236
+conv2d_bf16_params.h 244 0x1236 1
+conv2d_bf16_params.h 245 0x123c
+conv2d_bf16_params.h 244 0x1242
+conv2d_bf16_params.h 244 0x1246
+conv2d_bf16_params.h 244 0x124a
+conv2d_bf16_params.h 244 0x124e
+conv2d_bf16_params.h 244 0x1252
+conv2d_bf16_params.h 245 0x1256
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 876 0x1268
+conv2d_bf16.h 876 0x1268 1
+conv2d_bf16.h 1849 0x1276
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 250 0x1280 x
+conv2d_bf16_params.h 250 0x1280 1
+conv2d_bf16_params.h 250 0x128c
+conv2d_bf16_params.h 250 0x1290
+conv2d_bf16_params.h 250 0x1294
+conv2d_bf16_params.h 250 0x1298
+conv2d_bf16_params.h 250 0x1298 1
+conv2d_bf16_params.h 250 0x129e
+conv2d_bf16_params.h 249 0x12a2 x
+conv2d_bf16_params.h 249 0x12a6
+conv2d_bf16_params.h 250 0x12aa x
+conv2d_bf16_params.h 258 0x12b0 x
+conv2d_bf16_params.h 259 0x12c8
+conv2d_bf16_params.h 259 0x12ce x
+conv2d_bf16_params.h 259 0x12d2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1841 0x12e0 x
+conv2d_bf16.h 1849 0x12e0 1
+conv2d_bf16.h 1849 0x12e0 2 x
+conv2d_bf16.h 876 0x12ea
+conv2d_bf16.h 881 0x12ea 1
+conv2d_bf16.h 1841 0x12ea 2
+conv2d_bf16.h 1842 0x12ea 3
+conv2d_bf16.h 1842 0x12ea 4
+conv2d_bf16.h 1842 0x12ea 5
+conv2d_bf16.h 1845 0x12ea 6
+conv2d_bf16.h 1849 0x12ea 7
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x12ea 8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1842 0x12f4 x
+conv2d_bf16.h 1842 0x12f4 1
+conv2d_bf16.h 1849 0x12f4 2
+conv2d_bf16.h 862 0x1300
+conv2d_bf16.h 1842 0x1300 1
+conv2d_bf16.h 1845 0x1300 2
+conv2d_bf16.h 1845 0x130c x
+conv2d_bf16.h 862 0x1310 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1314 x
+io_buffer_main.h 125 0x1318
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1841 0x1318 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x131e x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1842 0x1322 x
+conv2d_bf16.h 1845 0x1328 x
+conv2d_bf16.h 866 0x132c x
+conv2d_bf16.h 866 0x1330
+conv2d_bf16.h 1842 0x1336 x
+conv2d_bf16.h 1842 0x1336 1 x
+conv2d_bf16.h 1842 0x133c
+conv2d_bf16.h 1845 0x133c 1 x
+conv2d_bf16.h 1841 0x1342 x
+conv2d_bf16.h 881 0x134a
+conv2d_bf16.h 885 0x134a 1
+conv2d_bf16.h 1845 0x134e x
+conv2d_bf16.h 867 0x1352
+conv2d_bf16.h 867 0x1358
+conv2d_bf16.h 867 0x1358 1 x
+conv2d_bf16.h 867 0x1360
+conv2d_bf16.h 867 0x1366
+conv2d_bf16.h 867 0x1372
+conv2d_bf16.h 867 0x1372 1
+conv2d_bf16.h 867 0x1378
+conv2d_bf16.h 867 0x137c
+conv2d_bf16.h 867 0x1382
+conv2d_bf16.h 867 0x138a
+conv2d_bf16.h 881 0x13a0
+conv2d_bf16.h 883 0x13a0 1
+conv2d_bf16.h 884 0x13a0 2
+conv2d_bf16.h 876 0x13ac x
+conv2d_bf16.h 876 0x13ac 1 x
+conv2d_bf16.h 881 0x13ac 2 x
+conv2d_bf16.h 883 0x13ac 3
+conv2d_bf16.h 884 0x13ac 4
+conv2d_bf16.h 885 0x13b8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x13b8 1
+conv2d_bf16_params.h 243 0x13b8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 883 0x13c2 x
+conv2d_bf16.h 884 0x13c8 x
+conv2d_bf16.h 876 0x13ce x
+conv2d_bf16.h 876 0x13d2
+conv2d_bf16.h 881 0x13d6 x
+conv2d_bf16.h 881 0x13da
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x13da 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 881 0x13e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x13e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 885 0x13f0
+conv2d_bf16.h 885 0x13f4 x
+conv2d_bf16.h 885 0x13fe
+conv2d_bf16.h 885 0x1402
+conv2d_bf16.h 885 0x1406
+conv2d_bf16.h 896 0x1410
+conv2d_bf16.h 1115 0x1410 1
+conv2d_bf16.h 1115 0x1410 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x141a
+vector.hpp 1152 0x141a 1
+vector.hpp 1152 0x141a 2
+vector.hpp 1152 0x141a 3
+vector.hpp 1152 0x141a 4
+vector.hpp 1152 0x141a 5
+vector.hpp 1152 0x141a 6
+vector.hpp 1152 0x141a 7
+vector.hpp 1152 0x141a 8
+vector.hpp 1152 0x141a 9
+vector.hpp 1152 0x141a 10
+vector.hpp 1152 0x141a 11
+vector.hpp 1152 0x141a 12
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x141a 13
+accum.hpp 149 0x141a 14
+accum.hpp 149 0x141a 15
+accum.hpp 149 0x141a 16
+accum.hpp 149 0x141a 17
+accum.hpp 149 0x141a 18
+accum.hpp 149 0x141a 19
+accum.hpp 149 0x141a 20
+accum.hpp 149 0x141a 21
+accum.hpp 149 0x141a 22
+accum.hpp 149 0x141a 23
+accum.hpp 149 0x141a 24
+accum.hpp 149 0x141a 25
+accum.hpp 149 0x141a 26
+accum.hpp 149 0x141a 27
+accum.hpp 149 0x141a 28
+accum.hpp 1110 0x141a 29
+accum.hpp 1110 0x141a 30
+accum.hpp 1110 0x141a 31
+accum.hpp 1110 0x141a 32
+accum.hpp 1110 0x141a 33
+accum.hpp 1110 0x141a 34
+accum.hpp 1110 0x141a 35
+accum.hpp 1110 0x141a 36
+accum.hpp 1110 0x141a 37
+accum.hpp 1110 0x141a 38
+accum.hpp 1110 0x141a 39
+accum.hpp 1110 0x141a 40
+accum.hpp 1110 0x141a 41
+accum.hpp 1110 0x141a 42
+accum.hpp 1110 0x141a 43
+accum.hpp 1110 0x141a 44
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 886 0x141a 45
+conv2d_bf16.h 896 0x141a 46 x
+conv2d_bf16.h 1123 0x141a 47
+conv2d_bf16.h 896 0x1420
+conv2d_bf16.h 896 0x1424
+conv2d_bf16.h 896 0x1428
+conv2d_bf16.h 896 0x142c
+conv2d_bf16.h 896 0x1430
+conv2d_bf16.h 896 0x1434
+conv2d_bf16.h 897 0x1438 x
+conv2d_bf16.h 897 0x143c
+conv2d_bf16.h 897 0x1440
+conv2d_bf16.h 897 0x1444
+conv2d_bf16.h 897 0x1448
+conv2d_bf16.h 897 0x144c
+conv2d_bf16.h 897 0x1450
+conv2d_bf16.h 898 0x1454 x
+conv2d_bf16.h 898 0x1458
+conv2d_bf16.h 898 0x145c
+conv2d_bf16.h 898 0x1460
+conv2d_bf16.h 898 0x1464
+conv2d_bf16.h 898 0x1468
+conv2d_bf16.h 1115 0x146c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1470
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 898 0x1474 x
+conv2d_bf16.h 1115 0x1480 x
+conv2d_bf16.h 1115 0x1484
+conv2d_bf16.h 886 0x148a
+conv2d_bf16.h 886 0x1490 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1494 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1123 0x149c
+conv2d_bf16.h 1123 0x149c 1
+conv2d_bf16.h 1123 0x149c 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14a6
+aie_core.h 100 0x14a6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14a6 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x14a6 3
+accum.hpp 946 0x14a6 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1123 0x14a6 5
+conv2d_bf16.h 1125 0x14a6 6
+conv2d_bf16.h 1154 0x14a6 7
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14b0
+aie_core.h 100 0x14b0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14b0 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x14b0 3
+accum.hpp 946 0x14b0 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1123 0x14b0 5
+conv2d_bf16.h 1125 0x14b0 6
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14ba
+aie_core.h 100 0x14ba 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14ba 2
+vector.hpp 1152 0x14ba 3
+vector.hpp 1152 0x14ba 4
+vector.hpp 1152 0x14ba 5
+vector.hpp 1152 0x14ba 6
+vector.hpp 1152 0x14ba 7
+vector.hpp 1152 0x14ba 8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x14ba 9
+accum.hpp 149 0x14ba 10
+accum.hpp 149 0x14ba 11
+accum.hpp 149 0x14ba 12
+accum.hpp 149 0x14ba 13
+accum.hpp 149 0x14ba 14
+accum.hpp 149 0x14ba 15
+accum.hpp 149 0x14ba 16
+accum.hpp 578 0x14ba 17
+accum.hpp 946 0x14ba 18
+accum.hpp 1110 0x14ba 19
+accum.hpp 1110 0x14ba 20
+accum.hpp 1110 0x14ba 21
+accum.hpp 1110 0x14ba 22
+accum.hpp 1110 0x14ba 23
+accum.hpp 1110 0x14ba 24
+accum.hpp 1110 0x14ba 25
+accum.hpp 1110 0x14ba 26
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 746 0x14ba 27
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14c6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 738 0x14c6 1
+conv2d_bf16.h 1187 0x14c6 2
+conv2d_bf16.h 1199 0x14c6 3
+conv2d_bf16.h 1200 0x14c6 4
+conv2d_bf16.h 1201 0x14c6 5
+conv2d_bf16.h 1202 0x14c6 6
+conv2d_bf16.h 1143 0x14d2
+conv2d_bf16.h 1218 0x14d2 1
+conv2d_bf16.h 749 0x14dc
+conv2d_bf16.h 750 0x14dc 1
+conv2d_bf16.h 751 0x14dc 2
+conv2d_bf16.h 752 0x14dc 3
+conv2d_bf16.h 1123 0x14dc 4
+conv2d_bf16.h 736 0x14e6
+conv2d_bf16.h 738 0x14e6 1
+conv2d_bf16.h 1123 0x14e6 2
+conv2d_bf16.h 1873 0x14e6 3
+conv2d_bf16.h 1125 0x14f2 x
+conv2d_bf16.h 1125 0x14f6
+conv2d_bf16.h 1125 0x14fa
+conv2d_bf16.h 1149 0x14fe x
+conv2d_bf16.h 1154 0x1502 x
+conv2d_bf16.h 743 0x1506 x
+conv2d_bf16.h 745 0x150a x
+conv2d_bf16.h 746 0x150e x
+conv2d_bf16.h 1125 0x150e 1 x
+conv2d_bf16.h 1143 0x1514 x
+conv2d_bf16.h 1206 0x1518 x
+conv2d_bf16.h 1149 0x151c
+conv2d_bf16.h 1154 0x1524
+conv2d_bf16.h 1125 0x1528 x
+conv2d_bf16.h 1149 0x152c x
+conv2d_bf16.h 1154 0x1530 x
+conv2d_bf16.h 1287 0x1536
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1540 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1540 1 x
+accum.hpp 946 0x1540 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x1540 3
+conv2d_bf16.h 738 0x1540 4
+conv2d_bf16.h 1147 0x1540 5 x
+conv2d_bf16.h 1187 0x1540 6 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x154c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x154c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x154c 2
+accum.hpp 946 0x154c 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x154c 4 x
+conv2d_bf16.h 738 0x154c 5 x
+conv2d_bf16.h 1188 0x154c 6 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1558
+aie_core.h 100 0x1558 1
+aie_core.h 100 0x1558 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1558 3
+vector.hpp 1139 0x1558 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1558 5
+accum.hpp 578 0x1558 6
+accum.hpp 946 0x1558 7
+accum.hpp 946 0x1558 8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 737 0x1558 9 x
+conv2d_bf16.h 742 0x1558 10 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1564
+aie_core.h 100 0x1564 1
+aie_core.h 100 0x1564 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1564 3
+vector.hpp 1139 0x1564 4
+vector.hpp 1139 0x1564 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1564 6
+accum.hpp 578 0x1564 7
+accum.hpp 578 0x1564 8 x
+accum.hpp 946 0x1564 9
+accum.hpp 946 0x1564 10
+accum.hpp 946 0x1564 11 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 740 0x1564 12 x
+conv2d_bf16.h 1149 0x1564 13 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1570
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1570 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1570 2
+accum.hpp 946 0x1570 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1570 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 743 0x1570 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x157a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x157a 1 x
+accum.hpp 946 0x157a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x157a 3 x
+conv2d_bf16.h 1152 0x157a 4 x
+conv2d_bf16.h 1206 0x157a 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1586
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1586 1
+accum.hpp 946 0x1586 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 737 0x1586 3 x
+conv2d_bf16.h 1154 0x1586 4 x
+conv2d_bf16.h 1206 0x1586 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1592 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1592 1 x
+accum.hpp 946 0x1592 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 740 0x1592 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1598
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1598 1
+accum.hpp 946 0x1598 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1598 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1157 0x1598 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x159e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x159e 1 x
+accum.hpp 946 0x159e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x159e 3 x
+conv2d_bf16.h 1159 0x159e 4 x
+conv2d_bf16.h 737 0x15a4 x
+conv2d_bf16.h 738 0x15a4 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15aa x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15aa 1 x
+accum.hpp 946 0x15aa 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 740 0x15aa 3 x
+conv2d_bf16.h 1192 0x15aa 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15b0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15b0 1
+accum.hpp 946 0x15b0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x15b0 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x15b0 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15ba
+vector.hpp 1139 0x15ba 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15ba 2
+accum.hpp 578 0x15ba 3 x
+accum.hpp 946 0x15ba 4
+accum.hpp 946 0x15ba 5 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x15ba 6 x
+conv2d_bf16.h 746 0x15ba 7 x
+conv2d_bf16.h 1162 0x15ba 8
+conv2d_bf16.h 737 0x15c6 x
+conv2d_bf16.h 742 0x15c6 1 x
+conv2d_bf16.h 749 0x15c6 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x15d0 x
+aie_core.h 143 0x15d0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15d0 2 x
+vector.hpp 1152 0x15d0 3
+vector.hpp 1152 0x15d0 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15d0 5 x
+accum.hpp 946 0x15d0 6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 750 0x15d0 7 x
+conv2d_bf16.h 1286 0x15d0 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x15de
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15de 1
+vector.hpp 1139 0x15de 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15de 3
+accum.hpp 578 0x15de 4
+accum.hpp 946 0x15de 5
+accum.hpp 946 0x15de 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x15de 7 x
+conv2d_bf16.h 751 0x15de 8 x
+conv2d_bf16.h 1162 0x15de 9 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x15ec
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15ec 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15ec 2
+accum.hpp 946 0x15ec 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 743 0x15ec 4 x
+conv2d_bf16.h 746 0x15ec 5 x
+conv2d_bf16.h 1199 0x15ec 6 x
+conv2d_bf16.h 738 0x15fa x
+conv2d_bf16.h 1200 0x15fa 1 x
+conv2d_bf16.h 742 0x1602 x
+conv2d_bf16.h 1201 0x1602 1 x
+conv2d_bf16.h 743 0x160a x
+conv2d_bf16.h 752 0x160a 1 x
+conv2d_bf16.h 738 0x1612 x
+conv2d_bf16.h 740 0x1612 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1618 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x161c x
+conv2d_bf16.h 742 0x161c 1 x
+conv2d_bf16.h 1202 0x161c 2 x
+conv2d_bf16.h 1206 0x161c 3 x
+conv2d_bf16.h 737 0x1628 x
+conv2d_bf16.h 743 0x1628 1 x
+conv2d_bf16.h 749 0x1628 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1632
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 738 0x1632 1 x
+conv2d_bf16.h 740 0x1632 2 x
+conv2d_bf16.h 751 0x1632 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1640 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x1640 1 x
+conv2d_bf16.h 750 0x1640 2 x
+conv2d_bf16.h 736 0x1650 x
+conv2d_bf16.h 742 0x1650 1 x
+conv2d_bf16.h 746 0x1650 2 x
+conv2d_bf16.h 752 0x1650 3 x
+conv2d_bf16.h 737 0x1660 x
+conv2d_bf16.h 743 0x1660 1 x
+conv2d_bf16.h 749 0x1660 2 x
+conv2d_bf16.h 738 0x1670 x
+conv2d_bf16.h 740 0x1670 1 x
+conv2d_bf16.h 751 0x1670 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1680 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x1680 1 x
+conv2d_bf16.h 750 0x1680 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1690
+aie_core.h 100 0x1690 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1690 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1690 3
+accum.hpp 946 0x1690 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 742 0x1690 5 x
+conv2d_bf16.h 746 0x1690 6 x
+conv2d_bf16.h 752 0x1690 7 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x169e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x169e 1
+vector.hpp 1152 0x169e 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 743 0x169e 3 x
+conv2d_bf16.h 749 0x169e 4 x
+conv2d_bf16.h 1286 0x169e 5
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16ac
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x16ac 1
+vector.hpp 1152 0x16ac 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 751 0x16ac 3 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16b6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x16b6 1 x
+conv2d_bf16.h 750 0x16b6 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16c0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 746 0x16c0 1 x
+conv2d_bf16.h 752 0x16c0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x16ca
+vector.hpp 1152 0x16ca 1
+vector.hpp 1152 0x16ca 2
+vector.hpp 1152 0x16ca 3
+vector.hpp 1152 0x16ca 4
+vector.hpp 1152 0x16ca 5
+vector.hpp 1152 0x16ca 6
+vector.hpp 1152 0x16ca 7
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 749 0x16ca 8 x
+conv2d_bf16.h 1285 0x16ca 9 x
+conv2d_bf16.h 1286 0x16ca 10
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x16d6
+aie_core.h 100 0x16d6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x16d6 2
+vector.hpp 1152 0x16d6 3
+vector.hpp 1152 0x16d6 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x16d6 5
+accum.hpp 946 0x16d6 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 751 0x16d6 7 x
+conv2d_bf16.h 746 0x16e0 x
+conv2d_bf16.h 750 0x16e0 1 x
+conv2d_bf16.h 745 0x16e8 x
+conv2d_bf16.h 752 0x16e8 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16f0
+aie_core.h 143 0x16f4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 750 0x16f4 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16fc
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 749 0x16fc 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1704 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 752 0x1704 1 x
+conv2d_bf16.h 1286 0x1704 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x170e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x170e 1
+vector.hpp 1152 0x170e 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 751 0x170e 3 x
+conv2d_bf16.h 1286 0x170e 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x171a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x171a 1
+vector.hpp 1152 0x171a 2
+vector.hpp 1152 0x171a 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x171a 4
+accum.hpp 946 0x171a 5
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1722
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1287 0x1722 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x172a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x172a 1 x
+accum.hpp 1110 0x172a 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1732
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1732 1
+accum.hpp 1110 0x1732 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x173a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x173a 1
+conv2d_bf16.h 1287 0x173a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1744 x
+accum.hpp 1110 0x1744 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1287 0x1744 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x174c
+accum.hpp 1110 0x174c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1218 0x174c 2 x
+conv2d_bf16.h 1287 0x174c 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1758 x
+accum.hpp 1110 0x1758 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 738 0x1758 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1760
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1760 1
+accum.hpp 1110 0x1760 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1768
+vector.hpp 1152 0x1768 1
+vector.hpp 1152 0x1768 2
+vector.hpp 1152 0x1768 3
+vector.hpp 1152 0x1768 4
+vector.hpp 1152 0x1768 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1768 6
+accum.hpp 1110 0x1768 7
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1772
+vector.hpp 1152 0x1772 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1772 2 x
+accum.hpp 1110 0x1772 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1286 0x1772 4
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x177a
+aie_core.h 143 0x177a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x177a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x177a 3
+accum.hpp 946 0x177a 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1187 0x177a 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1782 x
+max_min.hpp 20 0x1786
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x178a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x178a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1792
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1792 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x179a x
+vector.hpp 1152 0x17a4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x17a4 1 x
+max_min.hpp 20 0x17ac
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17b0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x17b0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17b8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x17b8 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17c0 x
+vector.hpp 1152 0x17d0
+vector.hpp 1152 0x17d4
+vector.hpp 1152 0x17d8
+vector.hpp 1152 0x17dc
+vector.hpp 1152 0x17e0
+vector.hpp 1152 0x17e4
+vector.hpp 1152 0x17e8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x17f0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17f0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1143 0x17f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x17fc
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x17fc 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x17fc 2
+accum.hpp 946 0x17fc 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1800
+aie_core.h 100 0x1804 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1804 1
+vector.hpp 1152 0x1804 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x180a
+aie_core.h 143 0x1820
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x1820 1
+conv2d_bf16.h 1364 0x1820 2
+conv2d_bf16.h 1364 0x1820 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x182c
+aie_core.h 143 0x182c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x182c 2
+vector.hpp 1152 0x182c 3
+vector.hpp 1152 0x182c 4
+vector.hpp 1152 0x182c 5
+vector.hpp 1152 0x182c 6
+vector.hpp 1152 0x182c 7
+vector.hpp 1152 0x182c 8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x182c 9
+accum.hpp 149 0x182c 10
+accum.hpp 149 0x182c 11
+accum.hpp 149 0x182c 12
+accum.hpp 149 0x182c 13
+accum.hpp 149 0x182c 14
+accum.hpp 149 0x182c 15
+accum.hpp 149 0x182c 16
+accum.hpp 1110 0x182c 17
+accum.hpp 1110 0x182c 18
+accum.hpp 1110 0x182c 19
+accum.hpp 1110 0x182c 20
+accum.hpp 1110 0x182c 21
+accum.hpp 1110 0x182c 22
+accum.hpp 1110 0x182c 23
+accum.hpp 1110 0x182c 24
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1369 0x182c 25
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1838
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 807 0x1838 1
+conv2d_bf16.h 808 0x1838 2
+conv2d_bf16.h 809 0x1838 3
+conv2d_bf16.h 810 0x1838 4
+conv2d_bf16.h 1436 0x1838 5
+conv2d_bf16.h 1437 0x1838 6
+conv2d_bf16.h 1438 0x1838 7
+conv2d_bf16.h 1439 0x1838 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1842
+aie_core.h 143 0x1842 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 802 0x1842 2
+conv2d_bf16.h 1428 0x1842 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x184e
+aie_core.h 143 0x184e 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x184e 2
+conv2d_bf16.h 794 0x184e 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x185a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 794 0x185a 1
+conv2d_bf16.h 1455 0x185a 2
+conv2d_bf16.h 1337 0x1864
+conv2d_bf16.h 1364 0x186e x
+conv2d_bf16.h 1873 0x186e 1
+conv2d_bf16.h 1364 0x1874
+conv2d_bf16.h 1369 0x1878 x
+conv2d_bf16.h 799 0x187c x
+conv2d_bf16.h 801 0x1880 x
+conv2d_bf16.h 802 0x1884 x
+conv2d_bf16.h 1337 0x1888 x
+conv2d_bf16.h 1443 0x188c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1892
+vector.hpp 1152 0x1892 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1369 0x1892 2
+conv2d_bf16.h 1364 0x1896
+conv2d_bf16.h 1518 0x1896 1
+conv2d_bf16.h 1364 0x189a
+conv2d_bf16.h 1364 0x189e x
+conv2d_bf16.h 1369 0x18a2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x18a8
+vector.hpp 1152 0x18a8 1
+vector.hpp 1139 0x18b0
+vector.hpp 1139 0x18b0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18b0 2
+accum.hpp 578 0x18b0 3
+accum.hpp 578 0x18b0 4 x
+accum.hpp 946 0x18b0 5
+accum.hpp 946 0x18b0 6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x18b0 7 x
+conv2d_bf16.h 1362 0x18b0 8 x
+conv2d_bf16.h 1429 0x18b0 9
+conv2d_bf16.h 1443 0x18b0 10 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18be
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18be 1
+accum.hpp 946 0x18be 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 793 0x18be 3 x
+conv2d_bf16.h 1364 0x18be 4 x
+conv2d_bf16.h 1443 0x18be 5
+conv2d_bf16.h 794 0x18ca x
+conv2d_bf16.h 795 0x18ca 1 x
+conv2d_bf16.h 1428 0x18ca 2 x
+conv2d_bf16.h 1443 0x18ca 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18d6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18d6 1
+accum.hpp 578 0x18d6 2
+accum.hpp 946 0x18d6 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 796 0x18d6 4 x
+conv2d_bf16.h 799 0x18d6 5 x
+conv2d_bf16.h 1429 0x18d6 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18e0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18e0 1 x
+accum.hpp 946 0x18e0 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 797 0x18e0 3 x
+conv2d_bf16.h 1367 0x18e0 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18e6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18e6 1
+accum.hpp 946 0x18e6 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x18e6 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1369 0x18e6 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18ec x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18ec 1 x
+accum.hpp 946 0x18ec 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x18ec 3 x
+conv2d_bf16.h 1372 0x18ec 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18f2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18f2 1
+accum.hpp 946 0x18f2 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 793 0x18f2 3 x
+conv2d_bf16.h 1374 0x18f2 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18f8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18f8 1 x
+accum.hpp 946 0x18f8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 795 0x18f8 3 x
+conv2d_bf16.h 1377 0x18f8 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18fe
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18fe 1
+accum.hpp 946 0x18fe 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 796 0x18fe 3 x
+conv2d_bf16.h 1379 0x18fe 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1904 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1904 1 x
+accum.hpp 946 0x1904 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 797 0x1904 3 x
+conv2d_bf16.h 1429 0x1904 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x190a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x190a 1
+accum.hpp 946 0x190a 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x190a 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 801 0x190a 4 x
+conv2d_bf16.h 1429 0x190a 5
+conv2d_bf16.h 792 0x1914 x
+conv2d_bf16.h 794 0x1914 1 x
+conv2d_bf16.h 802 0x1914 2 x
+conv2d_bf16.h 793 0x191e x
+conv2d_bf16.h 799 0x191e 1 x
+conv2d_bf16.h 803 0x191e 2 x
+conv2d_bf16.h 807 0x191e 3 x
+conv2d_bf16.h 794 0x192a x
+conv2d_bf16.h 804 0x192a 1 x
+conv2d_bf16.h 808 0x192a 2 x
+conv2d_bf16.h 809 0x1934 x
+conv2d_bf16.h 810 0x1938 x
+conv2d_bf16.h 795 0x193c x
+conv2d_bf16.h 802 0x193c 1 x
+conv2d_bf16.h 1437 0x193c 2 x
+conv2d_bf16.h 796 0x1946 x
+conv2d_bf16.h 1436 0x1946 1 x
+conv2d_bf16.h 797 0x194e x
+conv2d_bf16.h 1438 0x194e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1956 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 799 0x1956 1 x
+conv2d_bf16.h 1439 0x1956 2 x
+conv2d_bf16.h 792 0x1960 x
+conv2d_bf16.h 801 0x1960 1 x
+conv2d_bf16.h 793 0x1966 x
+conv2d_bf16.h 804 0x1966 1 x
+conv2d_bf16.h 808 0x1966 2 x
+conv2d_bf16.h 795 0x1970 x
+conv2d_bf16.h 803 0x1970 1 x
+conv2d_bf16.h 807 0x1970 2 x
+conv2d_bf16.h 796 0x197a x
+conv2d_bf16.h 810 0x197a 1 x
+conv2d_bf16.h 794 0x1982 x
+conv2d_bf16.h 797 0x1982 1 x
+conv2d_bf16.h 809 0x1982 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1990 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 799 0x1990 1 x
+conv2d_bf16.h 802 0x1990 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x19a0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x19a0 1
+vector.hpp 1152 0x19a0 2
+vector.hpp 1152 0x19a0 3
+vector.hpp 1152 0x19a0 4
+vector.hpp 1152 0x19a0 5
+vector.hpp 1152 0x19a0 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 801 0x19a0 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x19ac
+vector.hpp 1152 0x19ac 1
+vector.hpp 1152 0x19ac 2
+vector.hpp 1152 0x19ac 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 804 0x19ac 4 x
+conv2d_bf16.h 808 0x19ac 5 x
+conv2d_bf16.h 1517 0x19ac 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x19b8
+vector.hpp 1152 0x19b8 1
+vector.hpp 1152 0x19b8 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 803 0x19b8 3 x
+conv2d_bf16.h 807 0x19b8 4 x
+conv2d_bf16.h 1518 0x19b8 5 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x19c4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 810 0x19c4 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x19cc x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 809 0x19cc 1 x
+conv2d_bf16.h 1428 0x19cc 2
+conv2d_bf16.h 801 0x19d6 x
+conv2d_bf16.h 802 0x19da x
+conv2d_bf16.h 803 0x19de x
+conv2d_bf16.h 807 0x19de 1 x
+conv2d_bf16.h 804 0x19e6 x
+conv2d_bf16.h 808 0x19e6 1 x
+conv2d_bf16.h 809 0x19ee x
+conv2d_bf16.h 810 0x19f2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x19fa x
+accum.hpp 1110 0x19fa 1 x
+accum.hpp 149 0x19fe
+accum.hpp 1110 0x19fe 1
+accum.hpp 149 0x1a02
+accum.hpp 1110 0x1a02 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1455 0x1a02 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1a0c x
+accum.hpp 1110 0x1a0c 1 x
+accum.hpp 149 0x1a10
+accum.hpp 1110 0x1a10 1
+accum.hpp 149 0x1a14
+accum.hpp 1110 0x1a14 1
+accum.hpp 149 0x1a18
+accum.hpp 1110 0x1a18 1
+accum.hpp 149 0x1a1c
+accum.hpp 1110 0x1a1c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a20 x
+max_min.hpp 20 0x1a24
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a28 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a28 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a30
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a30 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a38 x
+vector.hpp 1152 0x1a42
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a42 1 x
+max_min.hpp 20 0x1a4a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a4e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a4e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a56
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a56 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a60 x
+vector.hpp 1152 0x1a70
+vector.hpp 1152 0x1a74
+vector.hpp 1152 0x1a78
+vector.hpp 1152 0x1a7c
+vector.hpp 1152 0x1a80
+vector.hpp 1152 0x1a84
+vector.hpp 1152 0x1a88
+vector.hpp 1152 0x1a90
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1337 0x1a90 1 x
+conv2d_bf16.h 1873 0x1ac8 x
+conv2d_bf16.h 1873 0x1acc
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 74 0x1ae0 x
+superkernels.cpp 79 0x1ae0 1
+superkernels.cpp 81 0x1ae0 2
+superkernels.cpp 79 0x1aea x
+superkernels.cpp 81 0x1aea 1
+superkernels.cpp 74 0x1af4
+superkernels.cpp 79 0x1b06
+superkernels.cpp 79 0x1b06 1
+superkernels.cpp 81 0x1b1c
+superkernels.cpp 113 0x1b22
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x1b22 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 81 0x1b2c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x1b2c 1
+tile.hpp 86 0x1b2c 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 81 0x1b3c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x1b44
+tile.hpp 74 0x1b48
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 113 0x1b4c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x1b4c 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 81 0x1b54
+superkernels.cpp 81 0x1b60
+superkernels.cpp 87 0x1b64
+superkernels.cpp 87 0x1b64 1 x
+superkernels.cpp 88 0x1b6e x
+superkernels.cpp 89 0x1b6e 1
+superkernels.cpp 88 0x1b78
+superkernels.cpp 88 0x1b7e
+superkernels.cpp 87 0x1b86 x
+superkernels.cpp 113 0x1b86 1
+superkernels.cpp 88 0x1b8e x
+superkernels.cpp 88 0x1b94
+superkernels.cpp 89 0x1b9a x
+superkernels.cpp 89 0x1ba0
+superkernels.cpp 113 0x1ba0 1
+superkernels.cpp 106 0x1bb0
+superkernels.cpp 113 0x1bb0 1
+superkernels.cpp 117 0x1bb0 2
+superkernels.cpp 136 0x1bb0 3
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x1bb0 4
+io_buffer_main.h 324 0x1bb0 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 106 0x1bba x
+superkernels.cpp 108 0x1bba 1
+superkernels.cpp 107 0x1bc4
+superkernels.cpp 108 0x1bc4 1 x
+superkernels.cpp 139 0x1bc4 2
+superkernels.cpp 140 0x1bc4 3
+superkernels.cpp 107 0x1bce x
+superkernels.cpp 110 0x1bda x
+superkernels.cpp 110 0x1bda 1 x
+superkernels.cpp 108 0x1be0 x
+superkernels.cpp 107 0x1be4 x
+superkernels.cpp 108 0x1be4 1
+superkernels.cpp 106 0x1bea x
+superkernels.cpp 106 0x1bee
+superkernels.cpp 107 0x1bf2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x1bf6 x
+io_buffer_main.h 218 0x1bfa
+io_buffer_main.h 218 0x1bfe
+io_buffer_main.h 218 0x1c02
+io_buffer_main.h 235 0x1c08 x
+io_buffer_main.h 218 0x1c14 x
+io_buffer_main.h 218 0x1c14 1 x
+io_buffer_main.h 218 0x1c18
+io_buffer_main.h 395 0x1c1c
+io_buffer_main.h 395 0x1c26 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 113 0x1c30 x
+superkernels.cpp 113 0x1c36
+superkernels.cpp 113 0x1c42
+superkernels.cpp 117 0x1c50 x
+superkernels.cpp 117 0x1c50 1
+superkernels.cpp 117 0x1c5a
+superkernels.cpp 117 0x1c6c
+superkernels.cpp 117 0x1c70
+superkernels.cpp 136 0x1c76
+superkernels.cpp 140 0x1c76 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x1c82 x
+io_buffer_main.h 327 0x1c82 1
+io_buffer_main.h 425 0x1c82 2
+io_buffer_main.h 324 0x1c88
+io_buffer_main.h 425 0x1c98 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 136 0x1c9c x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x1c9c 1 x
+io_buffer_main.h 327 0x1cae
+io_buffer_main.h 327 0x1cb2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 139 0x1cc0 x
+superkernels.cpp 139 0x1cc0 1
+superkernels.cpp 139 0x1cca
+superkernels.cpp 142 0x1cd2
+superkernels.cpp 139 0x1cde
+superkernels.cpp 139 0x1ce2
+superkernels.cpp 140 0x1cf4 x
+superkernels.cpp 142 0x1d04 x
+superkernels.cpp 142 0x1d08
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 141 0x1d20 x
+elementwise_binary.h 142 0x1d20 1
+elementwise_binary.h 144 0x1d20 2 x
+elementwise_binary.h 141 0x1d26
+elementwise_binary.h 141 0x1d2a
+elementwise_binary.h 142 0x1d2e x
+elementwise_binary.h 142 0x1d32
+elementwise_binary.h 130 0x1d40 x
+elementwise_binary.h 133 0x1d40 1 x
+elementwise_binary.h 130 0x1d44
+elementwise_binary.h 133 0x1d58 x
+elementwise_binary.h 134 0x1d5c x
+elementwise_binary.h 134 0x1d6c
+elementwise_binary.h 135 0x1d70 x
+elementwise_binary.h 135 0x1d80
+elementwise_binary.h 136 0x1d84 x
+elementwise_binary.h 137 0x1d8c x
+elementwise_binary.h 136 0x1d98 x
+elementwise_binary.h 137 0x1d9c
+elementwise_binary.h 137 0x1da0
+elementwise_binary.h 139 0x1da0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 146 0x1da0 2
+add_impl.h 146 0x1daa
+add_impl.h 147 0x1daa 1
+add_impl.h 147 0x1daa 2
+add_impl.h 146 0x1db4 x
+add_impl.h 147 0x1db4 1
+add_impl.h 147 0x1dbe x
+add_impl.h 147 0x1dc6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x1dca x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 147 0x1dce
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x1dd2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 147 0x1dd8 x
+add_impl.h 147 0x1ddc
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 81 0x1df0
+elementwise_binary_broadcasting.h 81 0x1df0 1 x
+elementwise_binary_broadcasting.h 82 0x1df0 2
+elementwise_binary_broadcasting.h 82 0x1df0 3
+elementwise_binary_broadcasting.h 83 0x1df0 4
+elementwise_binary_broadcasting.h 81 0x1dfa
+elementwise_binary_broadcasting.h 82 0x1dfa 1
+elementwise_binary_broadcasting.h 82 0x1e00 x
+elementwise_binary_broadcasting.h 84 0x1e0e x
+elementwise_binary_broadcasting.h 82 0x1e12 x
+elementwise_binary_broadcasting.h 83 0x1e16 x
+elementwise_binary_broadcasting.h 82 0x1e1a x
+elementwise_binary_broadcasting.h 83 0x1e1a 1
+elementwise_binary_broadcasting.h 82 0x1e20
+elementwise_binary_broadcasting.h 82 0x1e24
+elementwise_binary_broadcasting.h 76 0x1e30
+elementwise_binary_broadcasting.h 76 0x1e30 1 x
+elementwise_binary_broadcasting.h 77 0x1e3a x
+elementwise_binary_broadcasting.h 78 0x1e44
+elementwise_binary_broadcasting.h 78 0x1e54
+elementwise_binary_broadcasting.h 78 0x1e58 x
+elementwise_binary_broadcasting.h 78 0x1e5e
+elementwise_binary_broadcasting.h 79 0x1e62 x
+elementwise_binary_broadcasting.h 89 0x1e70 x
+elementwise_binary_broadcasting.h 96 0x1e70 1 x
+elementwise_binary_broadcasting.h 102 0x1e70 2
+elementwise_binary_broadcasting.h 102 0x1e76 x
+elementwise_binary_broadcasting.h 117 0x1e76 1
+elementwise_binary_broadcasting.h 102 0x1e88
+elementwise_binary_broadcasting.h 102 0x1e88 1
+elementwise_binary_broadcasting.h 96 0x1e8e
+elementwise_binary_broadcasting.h 96 0x1e92 x
+elementwise_binary_broadcasting.h 103 0x1e9c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1eb0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1eb6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 106 0x1ec0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1ed0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1ed6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1ee0
+add_accum.hpp 19 0x1ee0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 117 0x1ee0 2 x
+elementwise_binary_broadcasting.h 117 0x1ee0 3 x
+elementwise_binary_broadcasting.h 117 0x1eea
+elementwise_binary_broadcasting.h 117 0x1eea 1
+elementwise_binary_broadcasting.h 117 0x1ef4
+elementwise_binary_broadcasting.h 117 0x1efa
+elementwise_binary_broadcasting.h 117 0x1f00
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f08 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f08 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f08 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f0c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f0c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f0c 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f10 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f10 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f10 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f14
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f14 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f14 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f18 x
+vector.hpp 1159 0x1f18 1
+vector.hpp 1159 0x1f18 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f18 3 x
+accum.hpp 1110 0x1f18 4
+accum.hpp 1110 0x1f18 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f18 6 x
+elementwise_binary.h 195 0x1f18 7
+elementwise_binary.h 218 0x1f18 8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f1e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f1e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f1e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f1e 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f26 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f26 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f26 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f2a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f2a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f2a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f2a 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f32 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f32 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f32 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f36
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f36 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f36 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f36 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f3e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f3e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f3e 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f42
+vector.hpp 1159 0x1f42 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f42 2
+accum.hpp 1110 0x1f42 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f42 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f42 5 x
+elementwise_binary.h 218 0x1f42 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f50 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f50 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f50 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f54
+vector.hpp 1159 0x1f54 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f54 2
+accum.hpp 1110 0x1f54 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f54 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f54 5 x
+elementwise_binary.h 195 0x1f54 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f60 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f60 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f60 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f70
+vector.hpp 1159 0x1f70 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f70 2
+accum.hpp 1110 0x1f70 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f70 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f70 5 x
+elementwise_binary.h 218 0x1f70 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f82
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f82 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f82 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x1f82 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f8c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f8c 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f8c 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x1f8c 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f96
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f96 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x1f96 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 121 0x1f96 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f9e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f9e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x1f9e 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1fa4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1fa4 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x1fa4 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h:
+ise_binary_attribute_broadcasting.h 82 0x1fb0
+ise_binary_attribute_broadcasting.h 82 0x1fb0 1 x
+ise_binary_attribute_broadcasting.h 90 0x1fb6
+ise_binary_attribute_broadcasting.h 90 0x1fbe x
+ise_binary_attribute_broadcasting.h 117 0x1fbe 1
+ise_binary_attribute_broadcasting.h 92 0x1fc6 x
+ise_binary_attribute_broadcasting.h 92 0x1fc6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x1fd6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp:
+vector_native_types.hpp 374 0x1fd6 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h:
+ise_binary_attribute_broadcasting.h 117 0x1fe2 x
+ise_binary_attribute_broadcasting.h 92 0x1fe8
+ise_binary_attribute_broadcasting.h 92 0x1fee x
+ise_binary_attribute_broadcasting.h 92 0x1ff2
+ise_binary_attribute_broadcasting.h 117 0x1ff2 1
+ise_binary_attribute_broadcasting.h 117 0x1ff8
+ise_binary_attribute_broadcasting.h 118 0x2000
+ise_binary_attribute_broadcasting.h 118 0x2010 x
+ise_binary_attribute_broadcasting.h 118 0x2014
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 147 0x2030 x
+superkernels.cpp 152 0x2030 1
+superkernels.cpp 152 0x2036 x
+superkernels.cpp 147 0x203c
+superkernels.cpp 149 0x204a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2054
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 152 0x205c
+superkernels.cpp 152 0x205c 1
+superkernels.cpp 149 0x2062 x
+superkernels.cpp 149 0x2066
+superkernels.cpp 149 0x206e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x206e 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 155 0x2076
+superkernels.cpp 166 0x2076 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x207c
+tile.hpp 74 0x2082
+tile.hpp 86 0x2082 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 155 0x208e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2098
+tile.hpp 74 0x209c
+tile.hpp 74 0x20a0 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 159 0x20b0
+superkernels.cpp 159 0x20b6 x
+superkernels.cpp 159 0x20b6 1
+superkernels.cpp 157 0x20c0
+superkernels.cpp 159 0x20c0 1
+superkernels.cpp 166 0x20c0 2
+superkernels.cpp 157 0x20ca x
+superkernels.cpp 159 0x20ca 1
+superkernels.cpp 164 0x20ca 2
+superkernels.cpp 157 0x20de
+superkernels.cpp 159 0x20e6 x
+superkernels.cpp 157 0x20ea x
+superkernels.cpp 159 0x20f0 x
+superkernels.cpp 164 0x2100
+superkernels.cpp 166 0x2100 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2110 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 163 0x2118
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2118 1
+io_buffer_main.h 218 0x2122
+io_buffer_main.h 218 0x2126
+io_buffer_main.h 235 0x212a x
+io_buffer_main.h 218 0x2138 x
+io_buffer_main.h 218 0x2138 1 x
+io_buffer_main.h 218 0x213c
+io_buffer_main.h 395 0x2140
+io_buffer_main.h 395 0x214a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 164 0x214e
+superkernels.cpp 163 0x2158 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x215c x
+io_buffer_main.h 324 0x215c 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 164 0x2162 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2166 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 166 0x216c x
+superkernels.cpp 163 0x2174 x
+superkernels.cpp 163 0x2178
+superkernels.cpp 164 0x217c x
+superkernels.cpp 164 0x2180
+superkernels.cpp 168 0x2190
+superkernels.cpp 169 0x2190 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2190 2 x
+io_buffer_main.h 327 0x219a
+io_buffer_main.h 425 0x219a 1
+io_buffer_main.h 425 0x21a8 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 168 0x21ac
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x21ac 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 168 0x21b6 x
+superkernels.cpp 168 0x21ba
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x21c6 x
+io_buffer_main.h 327 0x21ca
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 168 0x21ce x
+superkernels.cpp 168 0x21d2
+superkernels.cpp 169 0x21e2
+superkernels.cpp 169 0x21e6 x
+superkernels.cpp 171 0x21f0
+superkernels.cpp 171 0x2204 x
+superkernels.cpp 171 0x220c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 124 0x2220 x
+elementwise_unary.h 126 0x2220 1 x
+elementwise_unary.h 126 0x2230 x
+elementwise_unary.h 127 0x2234 x
+elementwise_unary.h 127 0x2244
+elementwise_unary.h 128 0x2248 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 113 0x224c x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 128 0x225a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 113 0x225e x
+clip_impl.h 114 0x226e x
+clip_impl.h 114 0x2272
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 130 0x2276 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2290
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 136 0x2290 1 x
+elementwise_unary.h 142 0x2290 2
+elementwise_unary.h 154 0x2290 3 x
+elementwise_unary.h 171 0x2290 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x229c x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 142 0x229c 1
+elementwise_unary.h 154 0x229c 2 x
+elementwise_unary.h 190 0x229c 3 x
+elementwise_unary.h 136 0x22a8
+elementwise_unary.h 136 0x22ac x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 103 0x22b0 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 142 0x22b4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22b8 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 171 0x22b8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 104 0x22b8 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22c4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 190 0x22c4 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22cc x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x22cc 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 142 0x22cc 2 x
+elementwise_unary.h 171 0x22cc 3 x
+elementwise_unary.h 154 0x22d6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x22de x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22e2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x22e2 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 190 0x22e2 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x22f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x22f0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x22f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2300 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2310 x
+vector.hpp 1159 0x2310 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x2310 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 171 0x2310 3 x
+elementwise_unary.h 176 0x2310 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2320
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2320 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 190 0x2320 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2330 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x2330 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x2330 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2340 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2350 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x2350 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 176 0x2350 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2358 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x235c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x235c 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x235c 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2364 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 158 0x2364 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x236a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x236a 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 176 0x236a 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2372 x
+max_min.hpp 21 0x2376 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x237a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x237a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x237e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 176 0x237e 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 176 0x2390 x
+superkernels.cpp 181 0x2390 1
+superkernels.cpp 181 0x2396 x
+superkernels.cpp 176 0x239c
+superkernels.cpp 178 0x23aa
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x23b4
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 181 0x23bc
+superkernels.cpp 181 0x23bc 1
+superkernels.cpp 178 0x23c2 x
+superkernels.cpp 178 0x23c6
+superkernels.cpp 178 0x23ce
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x23ce 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 184 0x23d6
+superkernels.cpp 195 0x23d6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x23dc
+tile.hpp 74 0x23e2
+tile.hpp 86 0x23e2 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 184 0x23ee x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x23f8
+tile.hpp 74 0x23fc
+tile.hpp 74 0x2400 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 188 0x2410
+superkernels.cpp 188 0x2416 x
+superkernels.cpp 188 0x2416 1
+superkernels.cpp 186 0x2420
+superkernels.cpp 188 0x2420 1
+superkernels.cpp 195 0x2420 2
+superkernels.cpp 186 0x242a x
+superkernels.cpp 188 0x242a 1
+superkernels.cpp 193 0x242a 2
+superkernels.cpp 186 0x243e
+superkernels.cpp 188 0x2446 x
+superkernels.cpp 186 0x244a x
+superkernels.cpp 188 0x2450 x
+superkernels.cpp 193 0x2460
+superkernels.cpp 195 0x2460 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2470 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 192 0x2478
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2478 1
+io_buffer_main.h 218 0x2482
+io_buffer_main.h 218 0x2486
+io_buffer_main.h 235 0x248a x
+io_buffer_main.h 218 0x2498 x
+io_buffer_main.h 218 0x2498 1 x
+io_buffer_main.h 218 0x249c
+io_buffer_main.h 395 0x24a0
+io_buffer_main.h 395 0x24aa x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 193 0x24ae
+superkernels.cpp 192 0x24b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x24bc x
+io_buffer_main.h 324 0x24bc 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 193 0x24c2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x24c6 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 195 0x24cc x
+superkernels.cpp 192 0x24d4 x
+superkernels.cpp 192 0x24d8
+superkernels.cpp 193 0x24dc x
+superkernels.cpp 193 0x24e0
+superkernels.cpp 197 0x24f0
+superkernels.cpp 198 0x24f0 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x24f0 2 x
+io_buffer_main.h 327 0x24fa
+io_buffer_main.h 425 0x24fa 1
+io_buffer_main.h 425 0x2508 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 197 0x250c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x250c 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 197 0x2516 x
+superkernels.cpp 197 0x251a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2526 x
+io_buffer_main.h 327 0x252a
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 197 0x252e x
+superkernels.cpp 197 0x2532
+superkernels.cpp 198 0x2542
+superkernels.cpp 198 0x2546 x
+superkernels.cpp 200 0x2550
+superkernels.cpp 200 0x2564 x
+superkernels.cpp 200 0x256c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 219 0x2600
+elementwise_binary_shared.h 219 0x2600 1 x
+elementwise_binary_shared.h 220 0x260a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 193 0x2614
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 222 0x2620
+elementwise_binary_shared.h 222 0x2632 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 193 0x263c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 222 0x2640
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 193 0x2640 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x2870
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 237 0x2870 1 x
+elementwise_binary_shared.h 244 0x2870 2
+elementwise_binary_shared.h 245 0x2870 3
+elementwise_binary_shared.h 247 0x2870 4
+elementwise_binary_shared.h 250 0x2870 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x287a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 244 0x287a 1 x
+elementwise_binary_shared.h 245 0x287a 2
+elementwise_binary_shared.h 247 0x287a 3
+elementwise_binary_shared.h 244 0x288c
+elementwise_binary_shared.h 244 0x288c 1
+elementwise_binary_shared.h 237 0x2892
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x28a0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp:
+vector_native_types.hpp 374 0x28a0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 247 0x28a6 x
+elementwise_binary_shared.h 245 0x28d0 x
+elementwise_binary_shared.h 245 0x28d6
+elementwise_binary_shared.h 245 0x28d6 1
+elementwise_binary_shared.h 250 0x28f0
+elementwise_binary_shared.h 250 0x28f4 x
+elementwise_binary_shared.h 250 0x28f8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 205 0x2910 x
+superkernels.cpp 210 0x2910 1
+superkernels.cpp 210 0x2916 x
+superkernels.cpp 205 0x291c
+superkernels.cpp 207 0x292a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2934
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 210 0x293c
+superkernels.cpp 210 0x293c 1
+superkernels.cpp 207 0x2942 x
+superkernels.cpp 207 0x2946
+superkernels.cpp 207 0x294e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x294e 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 213 0x2956
+superkernels.cpp 224 0x2956 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x295c
+tile.hpp 74 0x2962
+tile.hpp 86 0x2962 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 213 0x296e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2978
+tile.hpp 74 0x297c
+tile.hpp 74 0x2980 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 217 0x2990
+superkernels.cpp 217 0x2996 x
+superkernels.cpp 217 0x2996 1
+superkernels.cpp 215 0x29a0
+superkernels.cpp 217 0x29a0 1
+superkernels.cpp 224 0x29a0 2
+superkernels.cpp 215 0x29aa x
+superkernels.cpp 217 0x29aa 1
+superkernels.cpp 222 0x29aa 2
+superkernels.cpp 215 0x29be
+superkernels.cpp 217 0x29c6 x
+superkernels.cpp 215 0x29ca x
+superkernels.cpp 217 0x29d0 x
+superkernels.cpp 222 0x29e0
+superkernels.cpp 224 0x29e0 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x29f0 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 221 0x29f8
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x29f8 1
+io_buffer_main.h 218 0x2a02
+io_buffer_main.h 218 0x2a06
+io_buffer_main.h 235 0x2a0a x
+io_buffer_main.h 218 0x2a18 x
+io_buffer_main.h 218 0x2a18 1 x
+io_buffer_main.h 218 0x2a1c
+io_buffer_main.h 395 0x2a20
+io_buffer_main.h 395 0x2a2a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 222 0x2a2e
+superkernels.cpp 221 0x2a38 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2a3c x
+io_buffer_main.h 324 0x2a3c 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 222 0x2a42 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2a46 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 224 0x2a4c x
+superkernels.cpp 221 0x2a54 x
+superkernels.cpp 221 0x2a58
+superkernels.cpp 222 0x2a5c x
+superkernels.cpp 222 0x2a60
+superkernels.cpp 226 0x2a70
+superkernels.cpp 227 0x2a70 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2a70 2 x
+io_buffer_main.h 327 0x2a7a
+io_buffer_main.h 425 0x2a7a 1
+io_buffer_main.h 425 0x2a88 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 226 0x2a8c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2a8c 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 226 0x2a96 x
+superkernels.cpp 226 0x2a9a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2aa6 x
+io_buffer_main.h 327 0x2aaa
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 226 0x2aae x
+superkernels.cpp 226 0x2ab2
+superkernels.cpp 227 0x2ac2
+superkernels.cpp 227 0x2ac6 x
+superkernels.cpp 229 0x2ad0
+superkernels.cpp 229 0x2ae4 x
+superkernels.cpp 229 0x2aec
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 141 0x2b00 x
+elementwise_binary.h 142 0x2b00 1
+elementwise_binary.h 144 0x2b00 2 x
+elementwise_binary.h 141 0x2b06
+elementwise_binary.h 141 0x2b0a
+elementwise_binary.h 142 0x2b0e x
+elementwise_binary.h 142 0x2b12
+elementwise_binary.h 130 0x2b20 x
+elementwise_binary.h 133 0x2b20 1 x
+elementwise_binary.h 130 0x2b24
+elementwise_binary.h 133 0x2b36 x
+elementwise_binary.h 134 0x2b3a x
+elementwise_binary.h 134 0x2b4a
+elementwise_binary.h 135 0x2b4e x
+elementwise_binary.h 135 0x2b5e
+elementwise_binary.h 136 0x2b62 x
+elementwise_binary.h 137 0x2b6a x
+elementwise_binary.h 136 0x2b78 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 134 0x2b7c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x2b80
+elementwise_binary.h 139 0x2b92 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 134 0x2b9c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x2ba0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 134 0x2ba0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 149 0x2bb0 x
+elementwise_binary.h 156 0x2bb0 1
+elementwise_binary.h 168 0x2bb0 2 x
+elementwise_binary.h 156 0x2bba x
+elementwise_binary.h 168 0x2bba 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2bc4
+mul_acc32_fp.hpp 36 0x2bc4 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 156 0x2bc4 2
+elementwise_binary.h 156 0x2bc4 3
+elementwise_binary.h 156 0x2bce
+elementwise_binary.h 156 0x2bce 1
+elementwise_binary.h 156 0x2bd8
+elementwise_binary.h 156 0x2be2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2be6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 168 0x2be6 1
+elementwise_binary.h 187 0x2be6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2bec
+vector.hpp 1139 0x2bec 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x2bec 2 x
+elementwise_binary.h 211 0x2bec 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2bf2 x
+vector.hpp 1139 0x2bf2 1 x
+vector.hpp 1159 0x2bf2 2
+vector.hpp 1159 0x2bf2 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2bf2 4
+accum.hpp 1110 0x2bf2 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2bf2 6 x
+elementwise_binary.h 195 0x2bf2 7
+elementwise_binary.h 213 0x2bf2 8 x
+elementwise_binary.h 218 0x2bf2 9
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2bfa
+vector.hpp 1139 0x2bfa 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x2bfa 2 x
+elementwise_binary.h 211 0x2bfa 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c00 x
+vector.hpp 1139 0x2c00 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2c00 2 x
+elementwise_binary.h 213 0x2c00 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c06
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x2c06 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c0a x
+vector.hpp 1139 0x2c0a 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2c0a 2 x
+elementwise_binary.h 213 0x2c0a 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c10
+vector.hpp 1139 0x2c10 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2c10 2 x
+elementwise_binary.h 189 0x2c10 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c16 x
+vector.hpp 1139 0x2c16 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c16 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2c16 3 x
+elementwise_binary.h 213 0x2c16 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c20
+vector.hpp 1139 0x2c20 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c20 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2c20 3 x
+elementwise_binary.h 189 0x2c20 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c2a x
+vector.hpp 1139 0x2c2a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c2a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2c2a 3 x
+elementwise_binary.h 213 0x2c2a 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c34
+vector.hpp 1139 0x2c34 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c34 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2c34 3 x
+elementwise_binary.h 189 0x2c34 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c3e x
+vector.hpp 1139 0x2c3e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c3e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2c3e 3 x
+elementwise_binary.h 213 0x2c3e 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c48
+vector.hpp 1139 0x2c48 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c48 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2c48 3 x
+elementwise_binary.h 189 0x2c48 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c52 x
+vector.hpp 1139 0x2c52 1 x
+vector.hpp 1159 0x2c52 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2c52 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c52 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2c52 5 x
+elementwise_binary.h 213 0x2c52 6 x
+elementwise_binary.h 218 0x2c52 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c60
+vector.hpp 1139 0x2c60 1
+vector.hpp 1159 0x2c60 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2c60 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c60 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2c60 5 x
+elementwise_binary.h 189 0x2c60 6 x
+elementwise_binary.h 195 0x2c60 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2c70 x
+vector.hpp 1139 0x2c70 1 x
+vector.hpp 1159 0x2c70 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2c70 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c70 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2c70 5 x
+elementwise_binary.h 213 0x2c70 6 x
+elementwise_binary.h 218 0x2c70 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2c80
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2c80 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c80 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2c80 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2c88 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2c88 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c88 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2c88 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2c90
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2c90 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c90 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2c90 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2c98 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2c98 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2c98 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2c98 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2ca0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2ca0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2ca0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2ca0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2ca8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2ca8 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2ca8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2ca8 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2cb0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2cb0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2cb0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2cb0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2cb8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2cb8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2cb8 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2cbc
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2cbc 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 172 0x2cbc 2 x
+elementwise_binary.h 195 0x2cbc 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2cc2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2cc2 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2cc2 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2cc6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2cc6 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2cc6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2cca x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2cca 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2cca 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2cce
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2cce 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2cce 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 369 0x2ce0 x
+superkernels.cpp 374 0x2ce0 1
+superkernels.cpp 374 0x2ce6 x
+superkernels.cpp 369 0x2cec
+superkernels.cpp 371 0x2cf2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2cf2 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 374 0x2d0e x
+superkernels.cpp 374 0x2d0e 1 x
+superkernels.cpp 371 0x2d14 x
+superkernels.cpp 371 0x2d18
+superkernels.cpp 371 0x2d1e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2d26
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 377 0x2d2a
+superkernels.cpp 379 0x2d2a 1
+superkernels.cpp 381 0x2d2a 2
+superkernels.cpp 393 0x2d2a 3
+superkernels.cpp 377 0x2d34
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2d34 1
+tile.hpp 74 0x2d3e
+tile.hpp 86 0x2d3e 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 377 0x2d4a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2d54
+tile.hpp 74 0x2d58
+tile.hpp 74 0x2d5c x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 381 0x2d60
+superkernels.cpp 381 0x2d60 1 x
+superkernels.cpp 381 0x2d6a
+superkernels.cpp 381 0x2d6a 1
+superkernels.cpp 390 0x2d6a 2
+superkernels.cpp 379 0x2d74 x
+superkernels.cpp 382 0x2d74 1
+superkernels.cpp 391 0x2d74 2
+superkernels.cpp 379 0x2d8a
+superkernels.cpp 381 0x2d90 x
+superkernels.cpp 379 0x2d94 x
+superkernels.cpp 381 0x2d98 x
+superkernels.cpp 382 0x2d9c x
+superkernels.cpp 390 0x2da0
+superkernels.cpp 391 0x2da6
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2db0 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2db4
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2db4 1
+io_buffer_main.h 218 0x2dbe
+io_buffer_main.h 218 0x2dc2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2dc6 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 235 0x2dca x
+io_buffer_main.h 218 0x2dd6 x
+io_buffer_main.h 218 0x2dd6 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2dda x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2dda 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2de0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 395 0x2de4
+io_buffer_main.h 395 0x2de4 1
+io_buffer_main.h 395 0x2dee x
+io_buffer_main.h 218 0x2df2 x
+io_buffer_main.h 218 0x2dfa
+io_buffer_main.h 218 0x2dfe
+io_buffer_main.h 218 0x2e02
+io_buffer_main.h 235 0x2e06 x
+io_buffer_main.h 218 0x2e14 x
+io_buffer_main.h 218 0x2e14 1 x
+io_buffer_main.h 218 0x2e18
+io_buffer_main.h 395 0x2e24 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 390 0x2e28
+superkernels.cpp 391 0x2e28 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2e28 2
+io_buffer_main.h 125 0x2e36
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 390 0x2e3a x
+superkernels.cpp 391 0x2e40 x
+superkernels.cpp 393 0x2e40 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2e46 x
+io_buffer_main.h 125 0x2e4a
+io_buffer_main.h 327 0x2e4e
+io_buffer_main.h 327 0x2e4e 1
+io_buffer_main.h 125 0x2e54
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 393 0x2e5a x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2e60
+io_buffer_main.h 327 0x2e60 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 390 0x2e64 x
+superkernels.cpp 391 0x2e68 x
+superkernels.cpp 391 0x2e6c
+superkernels.cpp 390 0x2e70 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2e80 x
+io_buffer_main.h 327 0x2e80 1
+io_buffer_main.h 327 0x2e80 2
+io_buffer_main.h 327 0x2e80 3
+io_buffer_main.h 327 0x2e80 4
+io_buffer_main.h 425 0x2e80 5
+io_buffer_main.h 425 0x2e80 6
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2e8a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 425 0x2e9a x
+io_buffer_main.h 327 0x2e9e x
+io_buffer_main.h 324 0x2ea2
+io_buffer_main.h 327 0x2eb0
+io_buffer_main.h 324 0x2eb4 x
+io_buffer_main.h 327 0x2eb4 1
+io_buffer_main.h 425 0x2ec6 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2eca
+superkernels.cpp 398 0x2eca 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2eca 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2ed4 x
+superkernels.cpp 397 0x2ed8
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2ee4 x
+io_buffer_main.h 327 0x2ee8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2eec x
+superkernels.cpp 397 0x2ef0
+superkernels.cpp 398 0x2f00
+superkernels.cpp 398 0x2f04 x
+superkernels.cpp 400 0x2f10
+superkernels.cpp 400 0x2f26 x
+superkernels.cpp 400 0x2f2e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16_params.h:
+conv2d_dw_bf16_params.h 211 0x2f40 x
+conv2d_dw_bf16_params.h 215 0x2f40 1
+conv2d_dw_bf16_params.h 215 0x2f40 2 x
+conv2d_dw_bf16_params.h 215 0x2f4a x
+conv2d_dw_bf16_params.h 218 0x2f4a 1
+conv2d_dw_bf16_params.h 218 0x2f4a 2
+conv2d_dw_bf16_params.h 211 0x2f54
+conv2d_dw_bf16_params.h 218 0x2f5a
+conv2d_dw_bf16_params.h 215 0x2f6e
+conv2d_dw_bf16_params.h 215 0x2f72
+conv2d_dw_bf16_params.h 215 0x2f76
+conv2d_dw_bf16_params.h 215 0x2f7a
+conv2d_dw_bf16_params.h 215 0x2f88
+conv2d_dw_bf16_params.h 215 0x2f8c
+conv2d_dw_bf16_params.h 218 0x2f90 x
+conv2d_dw_bf16_params.h 218 0x2f94
+conv2d_dw_bf16_params.h 218 0x2f98
+conv2d_dw_bf16_params.h 218 0x2fa4
+conv2d_dw_bf16_params.h 218 0x2faa
+conv2d_dw_bf16_params.h 218 0x2fb0
+conv2d_dw_bf16_params.h 218 0x2fb6
+conv2d_dw_bf16_params.h 218 0x2fbc
+conv2d_dw_bf16_params.h 218 0x2fc0
+conv2d_dw_bf16_params.h 218 0x2fd0
+conv2d_dw_bf16_params.h 218 0x2fd0 1
+conv2d_dw_bf16_params.h 219 0x2fd0 2
+conv2d_dw_bf16_params.h 218 0x2fd6
+conv2d_dw_bf16_params.h 219 0x2fd6 1 x
+conv2d_dw_bf16_params.h 219 0x2fdc
+conv2d_dw_bf16_params.h 219 0x2fe0
+conv2d_dw_bf16_params.h 218 0x2fea x
+conv2d_dw_bf16_params.h 218 0x2fee
+conv2d_dw_bf16_params.h 219 0x2ff2 x
+conv2d_dw_bf16_params.h 219 0x2ff8
+conv2d_dw_bf16_params.h 218 0x3002 x
+conv2d_dw_bf16_params.h 219 0x3006 x
+conv2d_dw_bf16_params.h 219 0x300a
+conv2d_dw_bf16_params.h 218 0x300e x
+conv2d_dw_bf16_params.h 218 0x3012
+conv2d_dw_bf16_params.h 219 0x3012 1 x
+conv2d_dw_bf16_params.h 219 0x3020
+conv2d_dw_bf16_params.h 226 0x3020 1
+conv2d_dw_bf16_params.h 231 0x3020 2
+conv2d_dw_bf16_params.h 219 0x302a
+conv2d_dw_bf16_params.h 219 0x302a 1
+conv2d_dw_bf16_params.h 220 0x302a 2
+conv2d_dw_bf16_params.h 220 0x302a 3
+conv2d_dw_bf16_params.h 232 0x302a 4
+conv2d_dw_bf16_params.h 234 0x302a 5
+conv2d_dw_bf16_params.h 234 0x302a 6
+conv2d_dw_bf16_params.h 243 0x302a 7
+conv2d_dw_bf16_params.h 250 0x302a 8
+conv2d_dw_bf16_params.h 253 0x302a 9
+conv2d_dw_bf16_params.h 260 0x302a 10
+conv2d_dw_bf16_params.h 264 0x302a 11
+conv2d_dw_bf16_params.h 220 0x3034
+conv2d_dw_bf16_params.h 234 0x3034 1
+conv2d_dw_bf16_params.h 246 0x3034 2
+conv2d_dw_bf16_params.h 253 0x3034 3
+conv2d_dw_bf16_params.h 226 0x303e x
+conv2d_dw_bf16_params.h 234 0x303e 1
+conv2d_dw_bf16_params.h 234 0x303e 2
+conv2d_dw_bf16_params.h 231 0x3048
+conv2d_dw_bf16_params.h 232 0x3048 1
+conv2d_dw_bf16_params.h 232 0x3048 2
+conv2d_dw_bf16_params.h 235 0x3052
+conv2d_dw_bf16_params.h 235 0x3052 1
+conv2d_dw_bf16_params.h 242 0x3052 2
+conv2d_dw_bf16_params.h 242 0x3052 3
+conv2d_dw_bf16_params.h 243 0x3052 4
+conv2d_dw_bf16_params.h 250 0x3052 5
+conv2d_dw_bf16_params.h 255 0x3052 6
+conv2d_dw_bf16_params.h 260 0x3052 7
+conv2d_dw_bf16_params.h 264 0x3052 8
+conv2d_dw_bf16_params.h 234 0x305c
+conv2d_dw_bf16_params.h 239 0x305c 1
+conv2d_dw_bf16_params.h 242 0x305c 2
+conv2d_dw_bf16_params.h 248 0x305c 3
+conv2d_dw_bf16_params.h 253 0x305c 4
+conv2d_dw_bf16_params.h 264 0x305c 5
+conv2d_dw_bf16_params.h 219 0x3066 x
+conv2d_dw_bf16_params.h 219 0x306a
+conv2d_dw_bf16_params.h 219 0x306e
+conv2d_dw_bf16_params.h 220 0x306e 1
+conv2d_dw_bf16_params.h 219 0x3074
+conv2d_dw_bf16_params.h 243 0x3074 1
+conv2d_dw_bf16_params.h 247 0x3074 2
+conv2d_dw_bf16_params.h 220 0x307a x
+conv2d_dw_bf16_params.h 250 0x307a 1
+conv2d_dw_bf16_params.h 219 0x3080 x
+conv2d_dw_bf16_params.h 220 0x3084 x
+conv2d_dw_bf16_params.h 231 0x3084 1
+conv2d_dw_bf16_params.h 219 0x308a x
+conv2d_dw_bf16_params.h 231 0x308a 1 x
+conv2d_dw_bf16_params.h 220 0x3090 x
+conv2d_dw_bf16_params.h 253 0x3090 1 x
+conv2d_dw_bf16_params.h 240 0x3096
+conv2d_dw_bf16_params.h 246 0x3096 1 x
+conv2d_dw_bf16_params.h 232 0x309c x
+conv2d_dw_bf16_params.h 226 0x30a0 x
+conv2d_dw_bf16_params.h 231 0x30a4 x
+conv2d_dw_bf16_params.h 238 0x30a4 1
+conv2d_dw_bf16_params.h 234 0x30aa x
+conv2d_dw_bf16_params.h 231 0x30ae x
+conv2d_dw_bf16_params.h 232 0x30ae 1 x
+conv2d_dw_bf16_params.h 234 0x30b4 x
+conv2d_dw_bf16_params.h 232 0x30b8 x
+conv2d_dw_bf16_params.h 227 0x30bc x
+conv2d_dw_bf16_params.h 232 0x30bc 1
+conv2d_dw_bf16_params.h 234 0x30c2 x
+conv2d_dw_bf16_params.h 235 0x30c2 1 x
+conv2d_dw_bf16_params.h 235 0x30c8
+conv2d_dw_bf16_params.h 243 0x30c8 1 x
+conv2d_dw_bf16_params.h 238 0x30ce x
+conv2d_dw_bf16_params.h 242 0x30ce 1 x
+conv2d_dw_bf16_params.h 242 0x30d4
+conv2d_dw_bf16_params.h 243 0x30d4 1 x
+conv2d_dw_bf16_params.h 239 0x30da x
+conv2d_dw_bf16_params.h 242 0x30da 1 x
+conv2d_dw_bf16_params.h 243 0x30e0 x
+conv2d_dw_bf16_params.h 250 0x30e0 1 x
+conv2d_dw_bf16_params.h 234 0x30e6 x
+conv2d_dw_bf16_params.h 240 0x30e6 1 x
+conv2d_dw_bf16_params.h 253 0x30e6 2 x
+conv2d_dw_bf16_params.h 247 0x30ec x
+conv2d_dw_bf16_params.h 242 0x30f0 x
+conv2d_dw_bf16_params.h 247 0x30f0 1
+conv2d_dw_bf16_params.h 241 0x30f6 x
+conv2d_dw_bf16_params.h 243 0x30f6 1 x
+conv2d_dw_bf16_params.h 243 0x30fc
+conv2d_dw_bf16_params.h 245 0x30fc 1 x
+conv2d_dw_bf16_params.h 243 0x3102 x
+conv2d_dw_bf16_params.h 248 0x3102 1 x
+conv2d_dw_bf16_params.h 245 0x3108 x
+conv2d_dw_bf16_params.h 250 0x3108 1 x
+conv2d_dw_bf16_params.h 246 0x310e x
+conv2d_dw_bf16_params.h 250 0x310e 1
+conv2d_dw_bf16_params.h 247 0x3114 x
+conv2d_dw_bf16_params.h 248 0x3114 1 x
+conv2d_dw_bf16_params.h 250 0x311a x
+conv2d_dw_bf16_params.h 250 0x311a 1 x
+conv2d_dw_bf16_params.h 248 0x3120 x
+conv2d_dw_bf16_params.h 250 0x3120 1
+conv2d_dw_bf16_params.h 249 0x3126 x
+conv2d_dw_bf16_params.h 255 0x3126 1 x
+conv2d_dw_bf16_params.h 258 0x3126 2
+conv2d_dw_bf16_params.h 258 0x3126 3
+conv2d_dw_bf16_params.h 252 0x3130 x
+conv2d_dw_bf16_params.h 253 0x3130 1 x
+conv2d_dw_bf16_params.h 253 0x3136
+conv2d_dw_bf16_params.h 255 0x3136 1 x
+conv2d_dw_bf16_params.h 254 0x313c x
+conv2d_dw_bf16_params.h 255 0x313c 1
+conv2d_dw_bf16_params.h 256 0x313c 2
+conv2d_dw_bf16_params.h 258 0x313c 3 x
+conv2d_dw_bf16_params.h 258 0x313c 4 x
+conv2d_dw_bf16_params.h 259 0x313c 5
+conv2d_dw_bf16_params.h 263 0x313c 6
+conv2d_dw_bf16_params.h 255 0x3148 x
+conv2d_dw_bf16_params.h 256 0x314c x
+conv2d_dw_bf16_params.h 260 0x314c 1 x
+conv2d_dw_bf16_params.h 258 0x3152 x
+conv2d_dw_bf16_params.h 260 0x3152 1
+conv2d_dw_bf16_params.h 259 0x3158 x
+conv2d_dw_bf16_params.h 264 0x3158 1 x
+conv2d_dw_bf16_params.h 260 0x315e x
+conv2d_dw_bf16_params.h 264 0x315e 1
+conv2d_dw_bf16_params.h 262 0x3164 x
+conv2d_dw_bf16_params.h 263 0x3168 x
+conv2d_dw_bf16_params.h 264 0x316c x
+conv2d_dw_bf16_params.h 266 0x3170 x
+conv2d_dw_bf16_params.h 266 0x3180
+conv2d_dw_bf16_params.h 266 0x3180 1
+conv2d_dw_bf16_params.h 266 0x3186
+conv2d_dw_bf16_params.h 266 0x318a
+conv2d_dw_bf16_params.h 266 0x3196
+conv2d_dw_bf16_params.h 266 0x31a0
+conv2d_dw_bf16_params.h 267 0x31a0 1
+conv2d_dw_bf16_params.h 266 0x31aa
+conv2d_dw_bf16_params.h 266 0x31aa 1
+conv2d_dw_bf16_params.h 266 0x31b0
+conv2d_dw_bf16_params.h 266 0x31b6
+conv2d_dw_bf16_params.h 267 0x31bc x
+conv2d_dw_bf16_params.h 266 0x31c6 x
+conv2d_dw_bf16_params.h 266 0x31ca
+conv2d_dw_bf16_params.h 267 0x31ca 1 x
+conv2d_dw_bf16_params.h 266 0x31d0 x
+conv2d_dw_bf16_params.h 266 0x31d8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 444 0x34c0 x
+superkernels.cpp 449 0x34c0 1
+superkernels.cpp 449 0x34c6 x
+superkernels.cpp 444 0x34cc
+superkernels.cpp 467 0x34da
+superkernels.cpp 452 0x34ea
+superkernels.cpp 449 0x34f2
+superkernels.cpp 449 0x34f2 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x34f8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 446 0x34fc x
+superkernels.cpp 446 0x3500
+superkernels.cpp 446 0x3504
+superkernels.cpp 446 0x350a
+superkernels.cpp 461 0x350e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x350e 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 451 0x3518
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x3518 1
+tile.hpp 86 0x3518 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 451 0x3526 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x3530
+tile.hpp 74 0x3534
+tile.hpp 74 0x3538 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 452 0x3540
+superkernels.cpp 461 0x3540 1
+superkernels.cpp 452 0x3548 x
+superkernels.cpp 453 0x354c
+superkernels.cpp 453 0x354c 1 x
+superkernels.cpp 452 0x355e
+superkernels.cpp 457 0x355e 1
+superkernels.cpp 452 0x3568 x
+superkernels.cpp 453 0x356c x
+superkernels.cpp 457 0x3570
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3580 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 457 0x3584
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3584 1
+io_buffer_main.h 218 0x358e
+io_buffer_main.h 218 0x3592
+io_buffer_main.h 235 0x3596 x
+io_buffer_main.h 218 0x35a4 x
+io_buffer_main.h 218 0x35a4 1 x
+io_buffer_main.h 218 0x35a8
+io_buffer_main.h 395 0x35ac
+io_buffer_main.h 395 0x35b6 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 456 0x35ba
+superkernels.cpp 459 0x35ba 1
+superkernels.cpp 464 0x35ba 2
+superkernels.cpp 465 0x35ba 3
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x35ba 4
+io_buffer_main.h 425 0x35ba 5
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h:
+io_buffer_impl.h 52 0x35c4
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x35ce
+io_buffer_main.h 324 0x35ce 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 457 0x35d4 x
+superkernels.cpp 457 0x35d8
+superkernels.cpp 461 0x35d8 1
+superkernels.cpp 456 0x35e2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x35ec x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 457 0x35f2 x
+superkernels.cpp 456 0x35f6 x
+superkernels.cpp 459 0x35fa x
+superkernels.cpp 461 0x35fe x
+superkernels.cpp 456 0x3604 x
+superkernels.cpp 459 0x3608 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h:
+io_buffer_impl.h 201 0x360c x
+io_buffer_impl.h 52 0x3610 x
+io_buffer_impl.h 52 0x3614
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x3620
+io_buffer_main.h 324 0x3624 x
+io_buffer_main.h 425 0x3634 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 464 0x3638
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x3638 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 464 0x3642 x
+superkernels.cpp 464 0x3646
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x3656 x
+io_buffer_main.h 327 0x365a
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 464 0x365e x
+superkernels.cpp 464 0x3662
+superkernels.cpp 465 0x3668
+superkernels.cpp 465 0x3674 x
+superkernels.cpp 467 0x3680
+superkernels.cpp 467 0x368a x
+superkernels.cpp 467 0x368e
+superkernels.cpp - 0x368f
+
+
+superkernels.cpp:
+File name Line number Starting address View Stmt
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc:
+0_0_reloadable3.cc 20 0x36a0 x
+0_0_reloadable3.cc 22 0x36a0 1
+0_0_reloadable3.cc 22 0x36a4 x
+0_0_reloadable3.cc 23 0x36a8 x
+0_0_reloadable3.cc 25 0x36ac x
+0_0_reloadable3.cc 24 0x36b0 x
+0_0_reloadable3.cc 21 0x36b4 x
+0_0_reloadable3.cc 29 0x36d0 x
+0_0_reloadable3.cc 31 0x36d0 1
+0_0_reloadable3.cc 31 0x36d4 x
+0_0_reloadable3.cc 33 0x36d8 x
+0_0_reloadable3.cc 32 0x36dc x
+0_0_reloadable3.cc 30 0x36e0 x
+0_0_reloadable3.cc 37 0x36f0 x
+0_0_reloadable3.cc 39 0x36f0 1
+0_0_reloadable3.cc 39 0x36f4 x
+0_0_reloadable3.cc 41 0x36f8 x
+0_0_reloadable3.cc 40 0x36fc x
+0_0_reloadable3.cc 38 0x3700 x
+0_0_reloadable3.cc 45 0x3710 x
+0_0_reloadable3.cc 47 0x3710 1
+0_0_reloadable3.cc 47 0x3714 x
+0_0_reloadable3.cc 49 0x3718 x
+0_0_reloadable3.cc 48 0x371c x
+0_0_reloadable3.cc 46 0x3720 x
+0_0_reloadable3.cc 53 0x3730 x
+0_0_reloadable3.cc 55 0x3730 1
+0_0_reloadable3.cc 55 0x3734 x
+0_0_reloadable3.cc 56 0x3738 x
+0_0_reloadable3.cc 58 0x373c x
+0_0_reloadable3.cc 57 0x3740 x
+0_0_reloadable3.cc 54 0x3744 x
+0_0_reloadable3.cc 62 0x3760 x
+0_0_reloadable3.cc 64 0x3760 1
+0_0_reloadable3.cc 64 0x3764 x
+0_0_reloadable3.cc 65 0x3768 x
+0_0_reloadable3.cc 67 0x376c x
+0_0_reloadable3.cc 66 0x3770 x
+0_0_reloadable3.cc 63 0x3774 x
+0_0_reloadable3.cc 82 0x930 x
+0_0_reloadable3.cc 84 0x930 1 x
+0_0_reloadable3.cc 84 0x930 2
+0_0_reloadable3.cc 86 0x930 3
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x930 4
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc:
+0_0_reloadable3.cc 82 0x936
+0_0_reloadable3.cc 84 0x944
+0_0_reloadable3.cc 86 0x944 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x944 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc:
+0_0_reloadable3.cc 84 0x94c x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x952
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 590 0x958 x
+io_buffer_compiler.h 590 0x95c
+io_buffer_compiler.h 590 0x960
+io_buffer_compiler.h 590 0x964
+io_buffer_compiler.h 590 0x968
+io_buffer_compiler.h 195 0x978 x
+io_buffer_compiler.h 195 0x978 1 x
+io_buffer_compiler.h 194 0x97c x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x980
+io_buffer_main.h 410 0x980 1
+io_buffer_main.h 410 0x98a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc:
+0_0_reloadable3.cc 86 0x98e
+0_0_reloadable3.cc 90 0x98e 1
+0_0_reloadable3.cc 86 0x992 x
+0_0_reloadable3.cc 86 0x996
+0_0_reloadable3.cc 86 0x99a
+0_0_reloadable3.cc 86 0x9a8
+0_0_reloadable3.cc 86 0x9ac
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 590 0x9b0 x
+io_buffer_compiler.h 590 0x9b8
+io_buffer_compiler.h 590 0x9bc
+io_buffer_compiler.h 590 0x9c0
+io_buffer_compiler.h 590 0x9c4
+io_buffer_compiler.h 195 0x9d4 x
+io_buffer_compiler.h 195 0x9d4 1 x
+io_buffer_compiler.h 194 0x9d8 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x9e4 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc:
+0_0_reloadable3.cc 90 0x9e8 x
+0_0_reloadable3.cc 90 0x9ec
+0_0_reloadable3.cc 90 0x9f0
+0_0_reloadable3.cc 90 0x9f6
+0_0_reloadable3.cc 90 0xa08
+0_0_reloadable3.cc 93 0xa0c
+0_0_reloadable3.cc 95 0xa0c 1
+0_0_reloadable3.cc 93 0xa20 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa20 1
+io_buffer_compiler.h 606 0xa20 2
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa20 3
+io_buffer_main.h 440 0xa20 4
+io_buffer_main.h 440 0xa26
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc:
+0_0_reloadable3.cc 95 0xa2a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa2e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa2e 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 605 0xa38 x
+io_buffer_compiler.h 605 0xa3c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa4a
+io_buffer_main.h 440 0xa4e x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa52
+io_buffer_compiler.h 606 0xa52 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc:
+0_0_reloadable3.cc 95 0xa58 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa66 x
+io_buffer_compiler.h 605 0xa6a x
+io_buffer_compiler.h 606 0xa6a 1
+io_buffer_compiler.h 605 0xa70
+io_buffer_compiler.h 606 0xa70 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa82 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc:
+0_0_reloadable3.cc 98 0xa86
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa8a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc:
+0_0_reloadable3.cc 98 0xa96 x
+0_0_reloadable3.cc 98 0xaa0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xaa4
+io_buffer_compiler.h 606 0xaa8 x
+io_buffer_compiler.h 606 0xaac
+io_buffer_compiler.h 606 0xab0
+io_buffer_compiler.h - 0xab1
+
+
+CU: me_div.c:
+File name Line number Starting address View Stmt
+
+./me_div.c:[++]
+me_div.c 108 0x3790
+me_div.c 108 0x3790 1
+me_div.c 115 0x3790 2 x
+me_div.c 108 0x3796
+me_div.c 108 0x379a
+me_div.c 108 0x379e
+me_div.c 108 0x37a2
+me_div.c 108 0x37a6
+me_div.c 108 0x37aa
+me_div.c 108 0x37ae
+me_div.c 108 0x37b2
+me_div.c 108 0x37b6
+me_div.c 108 0x37ba
+me_div.c 108 0x37be
+me_div.c 108 0x37c2
+me_div.c 108 0x37c6
+me_div.c 108 0x37ca
+me_div.c 108 0x37ce
+me_div.c 108 0x37d2
+me_div.c 108 0x37d6
+me_div.c 108 0x37da
+me_div.c 108 0x37de
+me_div.c 108 0x37e2
+me_div.c 108 0x37e6
+me_div.c 108 0x37ea
+me_div.c 108 0x37ee
+me_div.c 108 0x37f2
+me_div.c 108 0x37f6
+me_div.c 108 0x37fa
+me_div.c 108 0x37fe
+me_div.c 108 0x3802
+me_div.c 119 0x3806 x
+me_div.c 108 0x380a x
+me_div.c 108 0x380e
+me_div.c 108 0x3812
+me_div.c 108 0x3816
+me_div.c - 0x3817
+
+
+CU: No directory table
+CU: Empty file name table
+ - 0x1
+
+
+CU: No directory table
+CU: Empty file name table
+ - 0x1
+
+
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/scripts/3_3_reloadable13.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/scripts/3_3_reloadable13.bcf
new file mode 100644
index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/scripts/3_3_reloadable13.bcf
@@ -0,0 +1,16 @@
+_reserved DMb 0x0 0x40000
+
+_reserved PM 0x0 0x930 //reserved for main elf
+
+_entry_point _Z13kernelWrapperPPvjjjj
+_symbol _Z13kernelWrapperPPvjjjj 0x930
+
+_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers
+_reserved DMb 0x7ba80 0x40 //reserved for sync buffer
+_stack DM_stack 0x7bac0 0x940 //stack for core
+_reserved DMb 0x7c400 0x40 //reserved for main elf heap
+//space for synopsys compiler at 0x7c440 0x880//heap
+_reserved DMb 0x40000 0x3b280
+
+_reserved DMb 0x7ccc0 0x3340
+_reserved DMb 0x80000 0x80000 // And everything else the core can't see
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/scripts/3_3_reloadable13.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/scripts/3_3_reloadable13.prx
new file mode 100644
index 0000000000000000000000000000000000000000..01290dc45d0eb94c4eae769f9ff170b67c6f935a
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/scripts/3_3_reloadable13.prx
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/src/3_3_reloadable13.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/src/3_3_reloadable13.cc
new file mode 100644
index 0000000000000000000000000000000000000000..49ee750344fb3fb512b745064140a08205682319
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable13/src/3_3_reloadable13.cc
@@ -0,0 +1,98 @@
+// Automatically generated processor driver using AIEngine tool-chain
+
+#include
+#include
+#include
+
+
+// Declare Kernel functions and initializers
+void conv2d_maxpool(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_add1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_clip1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_mul1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_mul1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_conv2d_dwc(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+
+// Declare Kernel objects and external arrays
+
+
+void _b896_wrapper(void* args[])
+{
+ conv2d_maxpool(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast*>(args[1]),
+ *reinterpret_cast(args[3]),
+ *reinterpret_cast*>(args[2]));
+}
+
+void _b901_wrapper(void* args[])
+{
+ superkernel_add1d_attribute_broadcasting(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[2]),
+ *reinterpret_cast*>(args[1]));
+}
+
+void _b906_wrapper(void* args[])
+{
+ superkernel_clip1d(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[2]),
+ *reinterpret_cast*>(args[1]));
+}
+
+void _b881_wrapper(void* args[])
+{
+ superkernel_mul1d_attribute_broadcasting(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[2]),
+ *reinterpret_cast*>(args[1]));
+}
+
+void _b891_wrapper(void* args[])
+{
+ superkernel_mul1d(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[3]),
+ *reinterpret_cast*>(args[1]),
+ *reinterpret_cast*>(args[2]));
+}
+
+void _b919_wrapper(void* args[])
+{
+ superkernel_conv2d_dwc(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast*>(args[1]),
+ *reinterpret_cast(args[3]),
+ *reinterpret_cast*>(args[2]));
+}
+
+using UniformKernelFunc = void (*)(void **);
+
+static UniformKernelFunc g_uniformKernelFuncs[6] = {
+ _b896_wrapper,
+ _b901_wrapper,
+ _b906_wrapper,
+ _b881_wrapper,
+ _b891_wrapper,
+ _b919_wrapper
+};
+
+__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut)
+{
+ uint32 idx = 0;
+ reinterpret_cast(args[idx])->acquire(numSyncIn > 0);
+ idx += (numSyncIn > 0) ? 1 : 0;
+ reinterpret_cast(args[idx])->acquire(numSyncIn > 1);
+ idx += (numSyncIn > 1) ? 1 : 0;
+ idx += numAsyncIn;
+
+ (*(g_uniformKernelFuncs[kernelId]))(args);
+
+ idx = 0;
+ reinterpret_cast(args[idx])->release(numSyncIn > 0);
+ idx += (numSyncIn > 0) ? 1 : 0;
+ reinterpret_cast(args[idx])->release(numSyncIn > 1);
+ idx += (numSyncIn > 1) ? 1 : 0;
+ idx += numAsyncIn;
+}
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.calltree
new file mode 100644
index 0000000000000000000000000000000000000000..0d87486df8d685214c85a56d2c420e80fd5d49bc
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.calltree
@@ -0,0 +1,54 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:39 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme
+
+
+// Release: ipp V-2024.06-TGT-241219
+
+_Z13kernelWrapperPPvjjjj
+ _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+ _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ _ZN12me_primitive10udiv_dstepEjjRjS0_
+ _ZN12me_primitive10udiv_dstepEjjRjS0_ (*)
+ int32_to_float32
+ _ZL28normalizeRoundAndPackFloat32iij
+ _ZL19roundAndPackFloat32iij
+ float32_add
+ _ZL14addFloat32Sigsjji
+ _ZL19propagateFloat32NaNjj
+ _ZL19roundAndPackFloat32iij (*)
+ _ZL14subFloat32Sigsjji
+ _ZL19propagateFloat32NaNjj (*)
+ _ZL28normalizeRoundAndPackFloat32iij (*)
+
+
+Call tree stack and functions sizes:
+
+stack stack stack call func func function name
+ desc level level desc
+----- ----- ----- ----- ----- ----- --------------------------------------------------------------
+ 64 448 0 0 220 10058 _Z13kernelWrapperPPvjjjj
+ 128 384 1 1 2676 9838 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 2 1588 1588 _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ 0 0 2 2 670 670 _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+ 256 256 2 2 2680 2822 _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 0 0 3 3 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_
+ 0 0 2 2 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_
+ 0 0 2 2 114 530 int32_to_float32
+ 0 0 2 3 24 416 _ZL28normalizeRoundAndPackFloat32iij
+ 0 0 2 4 392 392 _ZL19roundAndPackFloat32iij
+ 0 0 2 2 64 1968 float32_add
+ 0 0 3 3 624 1128 _ZL14addFloat32Sigsjji
+ 0 0 4 4 112 112 _ZL19propagateFloat32NaNjj
+ 0 0 3 4 392 392 _ZL19roundAndPackFloat32iij
+ 0 0 2 3 752 1280 _ZL14subFloat32Sigsjji
+ 0 0 3 4 112 112 _ZL19propagateFloat32NaNjj
+ 0 0 2 4 24 416 _ZL28normalizeRoundAndPackFloat32iij (*)
+
+
+Maximum call level : 4
+Maximum stack level: 4
+Maximum stack size : 448
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.cmic2
new file mode 100644
index 0000000000000000000000000000000000000000..d037f49ea23915d17f1d140dbcf225735acc1af1
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.cmic2
@@ -0,0 +1,14427 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:41 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me
+
+// Release: ipp V-2024.06-TGT-241219
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function kernelWrapper _Z13kernelWrapperPPvjjjj
+.src_ref 0 "0_0_reloadable2.cc" 29 first
+.src_ref 0 "0_0_reloadable2.cc" 31 60 first
+.function_start
+ 2352 "11010100" // LDA r16, [p0]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2353 "01000001" // /* MW 5 */
+ 2354 "00101111" // /* MW 4 */
+ 2355 "11010000" // /* MW 3 */
+ 2356 "11000010" // /* MW 2 */
+ 2357 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 29
+ 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2359 "00000001" // /* MW 5 */
+ 2360 "00000000" // /* MW 4 */
+ 2361 "00000000" // /* MW 3 */
+ 2362 "00001000" // /* MW 2 */
+ 2363 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 31 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2364 "00000010" // ST p7, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2365 "01110000" // /* MW 7 */
+ 2366 "01010000" // /* MW 6 */
+ 2367 "11101000" // /* MW 5 */
+ 2368 "00000001" // /* MW 4 */
+ 2369 "10110000" // /* MW 3 */
+ 2370 "01110011" // /* MW 2 */
+ 2371 "11111111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 17 79
+.src_ref 0 "0_0_reloadable2.cc" 31 110 first
+ 2372 "00111010" // ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2373 "01111001" // /* MW 9 */
+ 2374 "01100000" // /* MW 8 */
+ 2375 "10110000" // /* MW 7 */
+ 2376 "10000011" // /* MW 6 */
+ 2377 "10100111" // /* MW 5 */
+ 2378 "00011111" // /* MW 4 */
+ 2379 "10110000" // /* MW 3 */
+ 2380 "10000010" // /* MW 2 */
+ 2381 "11111111" // /* MW 1 */
+ 2382 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2383 "00111101" // /* MW 3 */
+ 2384 "11110100" // /* MW 2 */
+ 2385 "00001111" // /* MW 1 */
+ 2386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2387 "00000000" // /* MW 1 */
+ 2388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2389 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2390 "00011000" // ADD.NC p0, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2391 "00000010" // /* MW 3 */
+ 2392 "01101000" // /* MW 2 */
+ 2393 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2394 "10011000" // LDA r16, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2395 "00010110" // /* MW 3 */
+ 2396 "00011110" // /* MW 2 */
+ 2397 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2398 "10011000" // LDA r18, [p0], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2399 "01010110" // /* MW 3 */
+ 2400 "00111110" // /* MW 2 */
+ 2401 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2402 "10011000" // LDA r17, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2403 "00110110" // /* MW 3 */
+ 2404 "11101110" // /* MW 2 */
+ 2405 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2406 "10011000" // LDA r27, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2407 "01110110" // /* MW 3 */
+ 2408 "00000111" // /* MW 2 */
+ 2409 "00000000" // /* MW 1 */
+ 2410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2411 "00000000" // /* MW 1 */
+ 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2413 "00000000" // /* MW 1 */
+ 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2415 "00000000" // /* MW 1 */
+ 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2417 "00000000" // /* MW 1 */
+ 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2419 "00000000" // /* MW 1 */
+ 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2421 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2422 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2423 "00100010" // /* MW 3 */
+ 2424 "00100001" // /* MW 2 */
+ 2425 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2426 "10011000" // ST r16, [p0, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2427 "00010001" // /* MW 3 */
+ 2428 "11010110" // /* MW 2 */
+ 2429 "00001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2430 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2431 "11111101" // /* MW 3 */
+ 2432 "11100000" // /* MW 2 */
+ 2433 "00010111" // /* MW 1 */
+ 2434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2435 "00000000" // /* MW 1 */
+ 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2437 "00000000" // /* MW 1 */
+ 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2439 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2440 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2441 "00001000" // /* MW 3 */
+ 2442 "01010111" // /* MW 2 */
+ 2443 "00010100" // /* MW 1 */
+ 2444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2445 "00000000" // /* MW 1 */
+ 2446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2447 "00000000" // /* MW 1 */
+ 2448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2449 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 17 79 first
+ 2450 "10011000" // LDA p0, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2451 "00011110" // /* MW 3 */
+ 2452 "00101100" // /* MW 2 */
+ 2453 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 18 47 first
+ 2454 "10011000" // LDA p1, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2455 "10011110" // /* MW 3 */
+ 2456 "11111100" // /* MW 2 */
+ 2457 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 19 81 first
+ 2458 "10011000" // LDA p2, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2459 "00011110" // /* MW 3 */
+ 2460 "00000101" // /* MW 2 */
+ 2461 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 16 4 first
+.no_stack_arguments
+ 2462 "00000100" // JL #7536 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7536 delay_slots=5 */
+ 2463 "00000001" // /* MW 5 */
+ 2464 "00000000" // /* MW 4 */
+ 2465 "10111000" // /* MW 3 */
+ 2466 "00001110" // /* MW 2 */
+ 2467 "00000000" // /* MW 1 */
+.delay_slot
+ 2468 "10011000" // ST r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2469 "01010101" // /* MW 3 */
+ 2470 "11110011" // /* MW 2 */
+ 2471 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2475 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2479 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 38 60 first
+.return_address
+ 2480 "10011000" // LDA r16, [p7, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2481 "00010110" // /* MW 3 */
+ 2482 "11110110" // /* MW 2 */
+ 2483 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2484 "00011000" // LDA r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2485 "01010001" // /* MW 3 */
+ 2486 "11110011" // /* MW 2 */
+ 2487 "00000111" // /* MW 1 */
+ 2488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2489 "00000000" // /* MW 1 */
+ 2490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2491 "00000000" // /* MW 1 */
+ 2492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2493 "00000000" // /* MW 1 */
+ 2494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2495 "00000000" // /* MW 1 */
+ 2496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2497 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+ 2498 "00011000" // ADD.NC p0, r16, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2499 "00001000" // /* MW 3 */
+ 2500 "01101000" // /* MW 2 */
+ 2501 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+ 2502 "10011000" // LDA r16, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2503 "00010110" // /* MW 3 */
+ 2504 "00000110" // /* MW 2 */
+ 2505 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2506 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2507 "00000101" // /* MW 3 */
+ 2508 "00100010" // /* MW 2 */
+ 2509 "00010000" // /* MW 1 */
+ 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2511 "00000000" // /* MW 1 */
+ 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2513 "00000000" // /* MW 1 */
+ 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2515 "00000000" // /* MW 1 */
+ 2516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2517 "00000000" // /* MW 1 */
+ 2518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2519 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+ 2520 "00011000" // REL.COND r16, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2521 "00011000" // /* MW 3 */
+ 2522 "00010101" // /* MW 2 */
+ 2523 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2524 "11010100" // LDA lr, [sp, #-12]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2525 "01000001" // /* MW 5 */
+ 2526 "10101111" // /* MW 4 */
+ 2527 "00101101" // /* MW 3 */
+ 2528 "10000111" // /* MW 2 */
+ 2529 "11111110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2530 "10011000" // LDA r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2531 "00010110" // /* MW 3 */
+ 2532 "11110110" // /* MW 2 */
+ 2533 "00000000" // /* MW 1 */
+ 2534 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2535 "10011001" // /* MW 3 */
+ 2536 "11111011" // /* MW 2 */
+ 2537 "00000111" // /* MW 1 */
+ 2538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2539 "00000000" // /* MW 1 */
+ 2540 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2541 "11110001" // /* MW 3 */
+ 2542 "11111101" // /* MW 2 */
+ 2543 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41 first
+ 2544 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2545 "00000001" // /* MW 5 */
+ 2546 "00000000" // /* MW 4 */
+ 2547 "00000000" // /* MW 3 */
+ 2548 "11111000" // /* MW 2 */
+ 2549 "11111111" // /* MW 1 */
+ 2550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2551 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41
+ 2552 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 2553 "00000000" // /* MW 3 */
+ 2554 "00101000" // /* MW 2 */
+ 2555 "00010000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+.delay_slot
+ 2556 "10011000" // SUB r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2557 "00000001" // /* MW 3 */
+ 2558 "01100011" // /* MW 2 */
+ 2559 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2560 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2561 "00010010" // /* MW 3 */
+ 2562 "00100001" // /* MW 2 */
+ 2563 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 22
+.delay_slot
+ 2564 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2565 "00010001" // /* MW 3 */
+ 2566 "11110110" // /* MW 2 */
+ 2567 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+ 2571 "00000000" // /* MW 1 */
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.function setup _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.src_ref 2 "reduce_base_c8.h" 218 first
+.src_ref 2 "reduce_base_c8.h" 220 27 first
+.src_ref 2 "reduce_base_c8.h" 290 63
+.src_ref 2 "reduce_base_c8.h" 348 46
+.function_start
+ 2576 "01110110" // LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2577 "01111000" // /* MW 11 */
+ 2578 "01100000" // /* MW 10 */
+ 2579 "00001001" // /* MW 9 */
+ 2580 "01101000" // /* MW 8 */
+ 2581 "01100111" // /* MW 7 */
+ 2582 "00111110" // /* MW 6 */
+ 2583 "10001011" // /* MW 5 */
+ 2584 "10000000" // /* MW 4 */
+ 2585 "11010011" // /* MW 3 */
+ 2586 "10001110" // /* MW 2 */
+ 2587 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 287 40
+.src_ref 2 "reduce_base_c8.h" 287 40
+.src_ref 2 "reduce_base_c8.h" 348 46 first
+ 2588 "10111010" // MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2589 "00001000" // /* MW 9 */
+ 2590 "00000111" // /* MW 8 */
+ 2591 "00110000" // /* MW 7 */
+ 2592 "00001001" // /* MW 6 */
+ 2593 "00100101" // /* MW 5 */
+ 2594 "00111110" // /* MW 4 */
+ 2595 "00000000" // /* MW 3 */
+ 2596 "00000111" // /* MW 2 */
+ 2597 "00000010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 293 77
+.src_ref 2 "reduce_base_c8.h" 298 44
+.src_ref 2 "reduce_base_c8.h" 299 40
+.src_ref 2 "reduce_base_c8.h" 300 59
+.src_ref 2 "reduce_base_c8.h" 326 79
+ 2598 "10111010" // MOVA r30, #3; MOVX r1, #-3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2599 "01111000" // /* MW 9 */
+ 2600 "01100000" // /* MW 8 */
+ 2601 "00001000" // /* MW 7 */
+ 2602 "10101000" // /* MW 6 */
+ 2603 "00010111" // /* MW 5 */
+ 2604 "00111110" // /* MW 4 */
+ 2605 "00000000" // /* MW 3 */
+ 2606 "01111110" // /* MW 2 */
+ 2607 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 57
+.src_ref 2 "reduce_base_c8.h" 301 81
+.src_ref 2 "reduce_base_c8.h" 305 77
+ 2608 "10111010" // MOVA r5, #-1; MOVXM r4, #65528 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2609 "00010000" // /* MW 9 */
+ 2610 "11111100" // /* MW 8 */
+ 2611 "10001111" // /* MW 7 */
+ 2612 "00111100" // /* MW 6 */
+ 2613 "00000000" // /* MW 5 */
+ 2614 "00000000" // /* MW 4 */
+ 2615 "00000000" // /* MW 3 */
+ 2616 "11100101" // /* MW 2 */
+ 2617 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 218
+.src_ref 2 "reduce_base_c8.h" 280 76
+.src_ref 2 "reduce_base_c8.h" 312 98
+ 2618 "10111010" // MOVA r16, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2619 "01110000" // /* MW 9 */
+ 2620 "00000000" // /* MW 8 */
+ 2621 "00000000" // /* MW 7 */
+ 2622 "00000000" // /* MW 6 */
+ 2623 "00000010" // /* MW 5 */
+ 2624 "00000000" // /* MW 4 */
+ 2625 "00000000" // /* MW 3 */
+ 2626 "10010000" // /* MW 2 */
+ 2627 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 298 44 first
+ 2628 "00011000" // ADD.NC p4, r0, #46 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2629 "00010111" // /* MW 3 */
+ 2630 "01100000" // /* MW 2 */
+ 2631 "00011100" // /* MW 1 */
+ 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2633 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 220 25 first
+ 2634 "10011000" // ST r3, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2635 "01110001" // /* MW 3 */
+ 2636 "00011100" // /* MW 2 */
+ 2637 "00001000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 221 28 first
+ 2638 "10011000" // LDA r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2639 "01010110" // /* MW 3 */
+ 2640 "00011111" // /* MW 2 */
+ 2641 "00000001" // /* MW 1 */
+ 2642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2643 "00000000" // /* MW 1 */
+ 2644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2645 "00000000" // /* MW 1 */
+ 2646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2647 "00000000" // /* MW 1 */
+ 2648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2649 "00000000" // /* MW 1 */
+ 2650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2651 "00000000" // /* MW 1 */
+ 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2653 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 221 26
+.src_ref 2 "reduce_base_c8.h" 301 81 first
+ 2654 "01011100" // ST r26, [p0], #4; AND r17, r26, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2655 "10001001" // /* MW 5 */
+ 2656 "01000100" // /* MW 4 */
+ 2657 "00111101" // /* MW 3 */
+ 2658 "11101010" // /* MW 2 */
+ 2659 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 222 26 first
+.src_ref 2 "reduce_base_c8.h" 293 58 first
+.src_ref 2 "reduce_base_c8.h" 301 81
+ 2660 "10111010" // LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2661 "10101000" // /* MW 9 */
+ 2662 "01001000" // /* MW 8 */
+ 2663 "11001100" // /* MW 7 */
+ 2664 "01111110" // /* MW 6 */
+ 2665 "01001101" // /* MW 5 */
+ 2666 "00000110" // /* MW 4 */
+ 2667 "11010000" // /* MW 3 */
+ 2668 "11110110" // /* MW 2 */
+ 2669 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 63 first
+ 2670 "10011000" // LSHL r18, r26, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2671 "01101101" // /* MW 3 */
+ 2672 "10100100" // /* MW 2 */
+ 2673 "00010110" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 293 77 first
+ 2674 "10011000" // LSHL r6, r4, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2675 "00011101" // /* MW 3 */
+ 2676 "00001100" // /* MW 2 */
+ 2677 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 41 first
+.src_ref 2 "reduce_base_c8.h" 300 59 first
+ 2678 "00100100" // LSHL r17, r26, r1; ADD.NC r1, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2679 "11111111" // /* MW 5 */
+ 2680 "10110010" // /* MW 4 */
+ 2681 "10110000" // /* MW 3 */
+ 2682 "01000011" // /* MW 2 */
+ 2683 "11010100" // /* MW 1 */
+ 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2685 "00000000" // /* MW 1 */
+ 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2687 "00000000" // /* MW 1 */
+ 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2689 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 222 24 first
+.src_ref 2 "reduce_base_c8.h" 287 40 first
+ 2690 "01011100" // ST r29, [p0], #4; MAC r7, r7, r29, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2691 "01001100" // /* MW 5 */
+ 2692 "10011100" // /* MW 4 */
+ 2693 "00111110" // /* MW 3 */
+ 2694 "11110110" // /* MW 2 */
+ 2695 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 223 29 first
+.src_ref 2 "reduce_base_c8.h" 312 60 first
+ 2696 "11111010" // LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2697 "10101111" // /* MW 9 */
+ 2698 "01001001" // /* MW 8 */
+ 2699 "00000111" // /* MW 7 */
+ 2700 "10000000" // /* MW 6 */
+ 2701 "10110101" // /* MW 5 */
+ 2702 "11111111" // /* MW 4 */
+ 2703 "11010111" // /* MW 3 */
+ 2704 "10001010" // /* MW 2 */
+ 2705 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 305 57 first
+ 2706 "10011000" // MUL r20, r3, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2707 "11011111" // /* MW 3 */
+ 2708 "11101001" // /* MW 2 */
+ 2709 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 78 first
+ 2710 "10011000" // MUL r28, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2711 "01001111" // /* MW 3 */
+ 2712 "11111000" // /* MW 2 */
+ 2713 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 299 40 first
+ 2714 "10011000" // LSHL r21, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2715 "11101101" // /* MW 3 */
+ 2716 "01101011" // /* MW 2 */
+ 2717 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 57 first
+.src_ref 2 "reduce_base_c8.h" 299 40
+ 2718 "00100100" // LSHL r18, r29, r5; ADD.NC r27, r21, #-48 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2719 "11010000" // /* MW 5 */
+ 2720 "10110101" // /* MW 4 */
+ 2721 "10111101" // /* MW 3 */
+ 2722 "10001011" // /* MW 2 */
+ 2723 "11101100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 41
+ 2724 "00011000" // ADD r23, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2725 "11111111" // /* MW 3 */
+ 2726 "10101111" // /* MW 2 */
+ 2727 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 301 85 first
+ 2728 "10011000" // MUL r29, r29, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2729 "01101111" // /* MW 3 */
+ 2730 "01111011" // /* MW 2 */
+ 2731 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 223 27 first
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2732 "01011100" // ST r2, [p0], #4; LT r24, r30, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2733 "01010101" // /* MW 5 */
+ 2734 "01100000" // /* MW 4 */
+ 2735 "00111111" // /* MW 3 */
+ 2736 "10001010" // /* MW 2 */
+ 2737 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 224 33 first
+ 2738 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2739 "00101110" // /* MW 3 */
+ 2740 "00011100" // /* MW 2 */
+ 2741 "00000001" // /* MW 1 */
+ 2742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2743 "00000000" // /* MW 1 */
+ 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2745 "00000000" // /* MW 1 */
+ 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2747 "00000000" // /* MW 1 */
+ 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2749 "00000000" // /* MW 1 */
+ 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2751 "00000000" // /* MW 1 */
+ 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2753 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 224 31
+.src_ref 2 "reduce_base_c8.h" 318 64
+ 2754 "00000010" // ST el0, [p0], #4; MOV r31, el0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2755 "01110000" // /* MW 7 */
+ 2756 "00001110" // /* MW 6 */
+ 2757 "11110000" // /* MW 5 */
+ 2758 "00000011" // /* MW 4 */
+ 2759 "00110000" // /* MW 3 */
+ 2760 "10000101" // /* MW 2 */
+ 2761 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 225 34 first
+ 2762 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2763 "00001110" // /* MW 3 */
+ 2764 "00000100" // /* MW 2 */
+ 2765 "00000001" // /* MW 1 */
+ 2766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2767 "00000000" // /* MW 1 */
+ 2768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2769 "00000000" // /* MW 1 */
+ 2770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2771 "00000000" // /* MW 1 */
+ 2772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2773 "00000000" // /* MW 1 */
+ 2774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2775 "00000000" // /* MW 1 */
+ 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2777 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 225 32
+.src_ref 2 "reduce_base_c8.h" 318 64
+ 2778 "00000010" // ST eh0, [p0]; MOV r25, eh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2779 "01110000" // /* MW 7 */
+ 2780 "10001110" // /* MW 6 */
+ 2781 "00110000" // /* MW 5 */
+ 2782 "00000011" // /* MW 4 */
+ 2783 "00110000" // /* MW 3 */
+ 2784 "10000001" // /* MW 2 */
+ 2785 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 226 32 first
+ 2786 "10011000" // LDA r30, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2787 "11010110" // /* MW 3 */
+ 2788 "00010111" // /* MW 2 */
+ 2789 "00000001" // /* MW 1 */
+ 2790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2791 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2792 "10000100" // JNZ r24, #2912 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=2912 delay_slots=5 */
+ 2793 "00000001" // /* MW 5 */
+ 2794 "01000000" // /* MW 4 */
+ 2795 "10110000" // /* MW 3 */
+ 2796 "00000101" // /* MW 2 */
+ 2797 "11000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 98 first
+.delay_slot
+ 2798 "10011000" // LSHL r19, r28, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2799 "00001101" // /* MW 3 */
+ 2800 "00100111" // /* MW 2 */
+ 2801 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 318 64 first
+.delay_slot
+ 2802 "10011000" // MUL r25, r31, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2803 "10011111" // /* MW 3 */
+ 2804 "11110011" // /* MW 2 */
+ 2805 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 300 41 first
+.src_ref 2 "reduce_base_c8.h" 305 77 first
+.delay_slot
+ 2806 "00100100" // LSHL r20, r20, r5; ADD.NC r5, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2807 "11111111" // /* MW 5 */
+ 2808 "10110001" // /* MW 4 */
+ 2809 "10110010" // /* MW 3 */
+ 2810 "00001011" // /* MW 2 */
+ 2811 "10100101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 280 76 first
+.delay_slot
+ 2812 "10011000" // LSHL r16, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2813 "00001101" // /* MW 3 */
+ 2814 "00100001" // /* MW 2 */
+ 2815 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 226 30 first
+.src_ref 2 "reduce_base_c8.h" 318 88 first
+.delay_slot
+ 2816 "01011100" // ST r30, [p0, #4]; MUL r31, r25, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2817 "11011111" // /* MW 5 */
+ 2818 "11111111" // /* MW 4 */
+ 2819 "00111100" // /* MW 3 */
+ 2820 "11111010" // /* MW 2 */
+ 2821 "00000010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2822 "00011000" // MOVX r28, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2823 "00000101" // /* MW 3 */
+ 2824 "00111000" // /* MW 2 */
+ 2825 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2826 "10011000" // EQ r28, r2, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2827 "11000111" // /* MW 3 */
+ 2828 "10111001" // /* MW 2 */
+ 2829 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2830 "10000100" // JNZ r28, #4032 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4032 delay_slots=5 */
+ 2831 "00000001" // /* MW 5 */
+ 2832 "01000000" // /* MW 4 */
+ 2833 "11100000" // /* MW 3 */
+ 2834 "00000111" // /* MW 2 */
+ 2835 "11100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2837 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2839 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2841 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2843 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 242 41 first
+.delay_slot
+ 2844 "00011000" // ADD r22, r3, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2845 "11111111" // /* MW 3 */
+ 2846 "11101101" // /* MW 2 */
+ 2847 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2848 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2849 "00001001" // /* MW 3 */
+ 2850 "00100010" // /* MW 2 */
+ 2851 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2852 "10011000" // EQ r17, r17, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2853 "00100111" // /* MW 3 */
+ 2854 "01100010" // /* MW 2 */
+ 2855 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2856 "10000100" // JNZ r17, #3904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3904 delay_slots=5 */
+ 2857 "00000001" // /* MW 5 */
+ 2858 "01000000" // /* MW 4 */
+ 2859 "10100000" // /* MW 3 */
+ 2860 "00000111" // /* MW 2 */
+ 2861 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2863 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2865 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2869 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.delay_slot
+ 2870 "00011000" // MOVX r7, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2871 "00001101" // /* MW 3 */
+ 2872 "00001110" // /* MW 2 */
+ 2873 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2874 "10011000" // EQ r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2875 "00100111" // /* MW 3 */
+ 2876 "11000100" // /* MW 2 */
+ 2877 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2878 "10000100" // JNZ r2, #3744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3744 delay_slots=5 */
+ 2879 "00000001" // /* MW 5 */
+ 2880 "01000000" // /* MW 4 */
+ 2881 "01010000" // /* MW 3 */
+ 2882 "00000111" // /* MW 2 */
+ 2883 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2885 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2887 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2889 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2891 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2893 "00000000" // /* MW 1 */
+ 2894 "10000100" // J #3552 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3552 delay_slots=5 */
+ 2895 "00000000" // /* MW 5 */
+ 2896 "00000000" // /* MW 4 */
+ 2897 "11110000" // /* MW 3 */
+ 2898 "00000110" // /* MW 2 */
+ 2899 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 2900 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2901 "00010001" // /* MW 3 */
+ 2902 "00110100" // /* MW 2 */
+ 2903 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2905 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2907 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2909 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2911 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2912 "00011000" // MOVX r29, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2913 "00010101" // /* MW 3 */
+ 2914 "00111010" // /* MW 2 */
+ 2915 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2916 "10011000" // LT r24, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2917 "00101010" // /* MW 3 */
+ 2918 "01110000" // /* MW 2 */
+ 2919 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2920 "10000100" // JNZ r24, #3232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3232 delay_slots=5 */
+ 2921 "00000001" // /* MW 5 */
+ 2922 "01000000" // /* MW 4 */
+ 2923 "01010000" // /* MW 3 */
+ 2924 "00000110" // /* MW 2 */
+ 2925 "11000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2933 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 316 38
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 2934 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2935 "00010001" // /* MW 3 */
+ 2936 "00110100" // /* MW 2 */
+ 2937 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2938 "10011000" // EQ r17, r26, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2939 "00100111" // /* MW 3 */
+ 2940 "10100010" // /* MW 2 */
+ 2941 "00010110" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2942 "10000100" // JNZ r17, #3104 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3104 delay_slots=5 */
+ 2943 "00000001" // /* MW 5 */
+ 2944 "01000000" // /* MW 4 */
+ 2945 "00010000" // /* MW 3 */
+ 2946 "00000110" // /* MW 2 */
+ 2947 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2949 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2951 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2953 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2957 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2958 "10011000" // NE r2, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2959 "00101000" // /* MW 3 */
+ 2960 "01000100" // /* MW 2 */
+ 2961 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2962 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */
+ 2963 "00000001" // /* MW 5 */
+ 2964 "01000000" // /* MW 4 */
+ 2965 "11110000" // /* MW 3 */
+ 2966 "00000110" // /* MW 2 */
+ 2967 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2969 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2971 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2973 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2975 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2977 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 286 44 first
+.src_ref 2 "reduce_base_c8.h" 289 38
+.src_ref 2 "reduce_base_c8.h" 291 40
+.src_ref 2 "reduce_base_c8.h" 291 40
+ 2978 "10111010" // ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2979 "01011000" // /* MW 9 */
+ 2980 "11101100" // /* MW 8 */
+ 2981 "00000111" // /* MW 7 */
+ 2982 "00001000" // /* MW 6 */
+ 2983 "00100010" // /* MW 5 */
+ 2984 "00000000" // /* MW 4 */
+ 2985 "11100000" // /* MW 3 */
+ 2986 "11010110" // /* MW 2 */
+ 2987 "10000011" // /* MW 1 */
+ 2988 "11111000" // MOV r30, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2989 "10100000" // /* MW 3 */
+ 2990 "10011100" // /* MW 2 */
+ 2991 "00011111" // /* MW 1 */
+ 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2993 "00000000" // /* MW 1 */
+ 2994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2995 "00000000" // /* MW 1 */
+ 2996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2997 "00000000" // /* MW 1 */
+ 2998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2999 "00000000" // /* MW 1 */
+ 3000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3001 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 287 38 first
+ 3002 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3003 "11110111" // /* MW 3 */
+ 3004 "00011100" // /* MW 2 */
+ 3005 "00000100" // /* MW 1 */
+ 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3007 "00000000" // /* MW 1 */
+ 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3009 "00000000" // /* MW 1 */
+ 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3011 "00000000" // /* MW 1 */
+ 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3013 "00000000" // /* MW 1 */
+ 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3015 "00000000" // /* MW 1 */
+ 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3017 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 288 39 first
+ 3018 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3019 "11110111" // /* MW 3 */
+ 3020 "00011110" // /* MW 2 */
+ 3021 "00000100" // /* MW 1 */
+ 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3023 "00000000" // /* MW 1 */
+ 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3025 "00000000" // /* MW 1 */
+ 3026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3027 "00000000" // /* MW 1 */
+ 3028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3029 "00000000" // /* MW 1 */
+ 3030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3031 "00000000" // /* MW 1 */
+ 3032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3033 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 289 38 first
+ 3034 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3035 "01010111" // /* MW 3 */
+ 3036 "00011100" // /* MW 2 */
+ 3037 "00000100" // /* MW 1 */
+ 3038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3039 "00000000" // /* MW 1 */
+ 3040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3041 "00000000" // /* MW 1 */
+ 3042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3043 "00000000" // /* MW 1 */
+ 3044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3045 "00000000" // /* MW 1 */
+ 3046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3047 "00000000" // /* MW 1 */
+ 3048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3049 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 39 first
+ 3050 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3051 "00110111" // /* MW 3 */
+ 3052 "00011100" // /* MW 2 */
+ 3053 "00000100" // /* MW 1 */
+ 3054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3055 "00000000" // /* MW 1 */
+ 3056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3057 "00000000" // /* MW 1 */
+ 3058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3059 "00000000" // /* MW 1 */
+ 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3061 "00000000" // /* MW 1 */
+ 3062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3063 "00000000" // /* MW 1 */
+ 3064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3065 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 291 40 first
+ 3066 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3067 "01010111" // /* MW 3 */
+ 3068 "00001000" // /* MW 2 */
+ 3069 "00000100" // /* MW 1 */
+ 3070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3071 "00000000" // /* MW 1 */
+ 3072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3073 "00000000" // /* MW 1 */
+ 3074 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3075 "00000000" // /* MW 5 */
+ 3076 "00000000" // /* MW 4 */
+ 3077 "11101000" // /* MW 3 */
+ 3078 "00000110" // /* MW 2 */
+ 3079 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3081 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3083 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3085 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 292 38 first
+.delay_slot
+ 3086 "10011000" // ST r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3087 "01010001" // /* MW 3 */
+ 3088 "00000110" // /* MW 2 */
+ 3089 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 293 38 first
+.delay_slot
+ 3090 "00101110" // NOPA; ST r6, [p4, #4]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 3091 "00011100" // /* MW 13 */
+ 3092 "00000000" // /* MW 12 */
+ 3093 "00000000" // /* MW 11 */
+ 3094 "01010111" // /* MW 10 */
+ 3095 "00011010" // /* MW 9 */
+ 3096 "01000000" // /* MW 8 */
+ 3097 "00000000" // /* MW 7 */
+ 3098 "00000000" // /* MW 6 */
+ 3099 "10100011" // /* MW 5 */
+ 3100 "00101001" // /* MW 4 */
+ 3101 "11111000" // /* MW 3 */
+ 3102 "00101100" // /* MW 2 */
+ 3103 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528
+.src_ref 2 "reduce_base_c8.h" 274 44 first
+.src_ref 2 "reduce_base_c8.h" 275 40
+.src_ref 2 "reduce_base_c8.h" 275 40
+ 3104 "10111010" // ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3105 "01011000" // /* MW 9 */
+ 3106 "00010000" // /* MW 8 */
+ 3107 "01001000" // /* MW 7 */
+ 3108 "10101000" // /* MW 6 */
+ 3109 "01100111" // /* MW 5 */
+ 3110 "00111110" // /* MW 4 */
+ 3111 "11100000" // /* MW 3 */
+ 3112 "10010010" // /* MW 2 */
+ 3113 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 40 first
+.src_ref 2 "reduce_base_c8.h" 279 40
+ 3114 "10111010" // MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3115 "01111000" // /* MW 9 */
+ 3116 "00001110" // /* MW 8 */
+ 3117 "11010000" // /* MW 7 */
+ 3118 "00110011" // /* MW 6 */
+ 3119 "00100010" // /* MW 5 */
+ 3120 "00001100" // /* MW 4 */
+ 3121 "10000000" // /* MW 3 */
+ 3122 "10000000" // /* MW 2 */
+ 3123 "11111101" // /* MW 1 */
+ 3124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3125 "00000000" // /* MW 1 */
+ 3126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3127 "00000000" // /* MW 1 */
+ 3128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3129 "00000000" // /* MW 1 */
+ 3130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3131 "00000000" // /* MW 1 */
+ 3132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3133 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 38
+ 3134 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3135 "01010111" // /* MW 3 */
+ 3136 "00011100" // /* MW 2 */
+ 3137 "00000100" // /* MW 1 */
+ 3138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3139 "00000000" // /* MW 1 */
+ 3140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3141 "00000000" // /* MW 1 */
+ 3142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3143 "00000000" // /* MW 1 */
+ 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3145 "00000000" // /* MW 1 */
+ 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3147 "00000000" // /* MW 1 */
+ 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3149 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 39 first
+ 3150 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3151 "11110111" // /* MW 3 */
+ 3152 "00011110" // /* MW 2 */
+ 3153 "00000100" // /* MW 1 */
+ 3154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3155 "00000000" // /* MW 1 */
+ 3156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3157 "00000000" // /* MW 1 */
+ 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3159 "00000000" // /* MW 1 */
+ 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3161 "00000000" // /* MW 1 */
+ 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3163 "00000000" // /* MW 1 */
+ 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3165 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 38 first
+.src_ref 2 "reduce_base_c8.h" 277 38 first
+ 3166 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3167 "01010111" // /* MW 3 */
+ 3168 "00011100" // /* MW 2 */
+ 3169 "00000100" // /* MW 1 */
+ 3170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3171 "00000000" // /* MW 1 */
+ 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3173 "00000000" // /* MW 1 */
+ 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3175 "00000000" // /* MW 1 */
+ 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3177 "00000000" // /* MW 1 */
+ 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3179 "00000000" // /* MW 1 */
+ 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3181 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 278 39 first
+ 3182 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3183 "10110111" // /* MW 3 */
+ 3184 "00011100" // /* MW 2 */
+ 3185 "00000100" // /* MW 1 */
+ 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3187 "00000000" // /* MW 1 */
+ 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3189 "00000000" // /* MW 1 */
+ 3190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3191 "00000000" // /* MW 1 */
+ 3192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3193 "00000000" // /* MW 1 */
+ 3194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3195 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3197 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 279 40 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3198 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3199 "00110111" // /* MW 3 */
+ 3200 "00001000" // /* MW 2 */
+ 3201 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3203 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3205 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3206 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3207 "00000000" // /* MW 5 */
+ 3208 "00000000" // /* MW 4 */
+ 3209 "11101000" // /* MW 3 */
+ 3210 "00000110" // /* MW 2 */
+ 3211 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 279 40
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3212 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3213 "01000001" // /* MW 3 */
+ 3214 "00000010" // /* MW 2 */
+ 3215 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3217 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3219 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 281 38 first
+.delay_slot
+ 3220 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3221 "01110001" // /* MW 3 */
+ 3222 "00010100" // /* MW 2 */
+ 3223 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 280 38 first
+.delay_slot
+ 3224 "00000010" // ST r16, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3225 "01110000" // /* MW 7 */
+ 3226 "10100101" // /* MW 6 */
+ 3227 "00000001" // /* MW 5 */
+ 3228 "00000000" // /* MW 4 */
+ 3229 "00110000" // /* MW 3 */
+ 3230 "11000010" // /* MW 2 */
+ 3231 "10000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 301 40
+.src_ref 2 "reduce_base_c8.h" 302 76
+ 3232 "00101100" // LDA r3, [sp, #-4]; MOVX r4, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3233 "00110010" // /* MW 5 */
+ 3234 "00010000" // /* MW 4 */
+ 3235 "00100000" // /* MW 3 */
+ 3236 "10001110" // /* MW 2 */
+ 3237 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 3238 "10011000" // EQ r4, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3239 "01000111" // /* MW 3 */
+ 3240 "10001000" // /* MW 2 */
+ 3241 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3242 "10000100" // JNZ r4, #3408 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3408 delay_slots=5 */
+ 3243 "00000001" // /* MW 5 */
+ 3244 "01000000" // /* MW 4 */
+ 3245 "10101000" // /* MW 3 */
+ 3246 "00000110" // /* MW 2 */
+ 3247 "00100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 298 44
+.src_ref 2 "reduce_base_c8.h" 303 40
+.src_ref 2 "reduce_base_c8.h" 310 44
+.src_ref 2 "reduce_base_c8.h" 311 38
+.delay_slot
+ 3248 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3249 "01000001" // /* MW 3 */
+ 3250 "00000010" // /* MW 2 */
+ 3251 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3255 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3257 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3259 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3260 "00011000" // MOVX r3, #7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3261 "00011101" // /* MW 3 */
+ 3262 "00000110" // /* MW 2 */
+ 3263 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3264 "10011000" // NE r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3265 "00101000" // /* MW 3 */
+ 3266 "11000100" // /* MW 2 */
+ 3267 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3268 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */
+ 3269 "00000001" // /* MW 5 */
+ 3270 "01000000" // /* MW 4 */
+ 3271 "11110000" // /* MW 3 */
+ 3272 "00000110" // /* MW 2 */
+ 3273 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3275 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3277 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3283 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 310 44 first
+.src_ref 2 "reduce_base_c8.h" 312 41 first
+.src_ref 2 "reduce_base_c8.h" 315 40
+ 3284 "10111010" // ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3285 "01011000" // /* MW 9 */
+ 3286 "11101100" // /* MW 8 */
+ 3287 "00000111" // /* MW 7 */
+ 3288 "11111000" // /* MW 6 */
+ 3289 "00101111" // /* MW 5 */
+ 3290 "00100110" // /* MW 4 */
+ 3291 "11100000" // /* MW 3 */
+ 3292 "10000110" // /* MW 2 */
+ 3293 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 313 38
+.src_ref 2 "reduce_base_c8.h" 317 97
+ 3294 "10111010" // MOVA r3, #-6; MOVXM dj0, #65536 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3295 "00010000" // /* MW 9 */
+ 3296 "00000000" // /* MW 8 */
+ 3297 "01000000" // /* MW 7 */
+ 3298 "01000000" // /* MW 6 */
+ 3299 "00000000" // /* MW 5 */
+ 3300 "00000000" // /* MW 4 */
+ 3301 "00000000" // /* MW 3 */
+ 3302 "01000011" // /* MW 2 */
+ 3303 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 315 40
+.src_ref 2 "reduce_base_c8.h" 317 97 first
+ 3304 "01100100" // LSHL r3, r28, r3; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3305 "00000001" // /* MW 5 */
+ 3306 "00100000" // /* MW 4 */
+ 3307 "10111100" // /* MW 3 */
+ 3308 "11000111" // /* MW 2 */
+ 3309 "11100000" // /* MW 1 */
+ 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3311 "00000000" // /* MW 1 */
+ 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3313 "00000000" // /* MW 1 */
+ 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3315 "00000000" // /* MW 1 */
+ 3316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3317 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 311 38 first
+ 3318 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3319 "00110111" // /* MW 3 */
+ 3320 "00011100" // /* MW 2 */
+ 3321 "00000100" // /* MW 1 */
+ 3322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3323 "00000000" // /* MW 1 */
+ 3324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3325 "00000000" // /* MW 1 */
+ 3326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3327 "00000000" // /* MW 1 */
+ 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3329 "00000000" // /* MW 1 */
+ 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3331 "00000000" // /* MW 1 */
+ 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3333 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 39 first
+ 3334 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3335 "01010111" // /* MW 3 */
+ 3336 "00011100" // /* MW 2 */
+ 3337 "00000100" // /* MW 1 */
+ 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3339 "00000000" // /* MW 1 */
+ 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3341 "00000000" // /* MW 1 */
+ 3342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3343 "00000000" // /* MW 1 */
+ 3344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3345 "00000000" // /* MW 1 */
+ 3346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3347 "00000000" // /* MW 1 */
+ 3348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3349 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 313 38 first
+ 3350 "10011000" // ST dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3351 "01000001" // /* MW 3 */
+ 3352 "00011100" // /* MW 2 */
+ 3353 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 315 40 first
+ 3354 "00011000" // ST.s16 r24, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3355 "00010111" // /* MW 3 */
+ 3356 "00001011" // /* MW 2 */
+ 3357 "00000100" // /* MW 1 */
+ 3358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3359 "00000000" // /* MW 1 */
+ 3360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3361 "00000000" // /* MW 1 */
+ 3362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3363 "00000000" // /* MW 1 */
+ 3364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3365 "00000000" // /* MW 1 */
+ 3366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3367 "00000000" // /* MW 1 */
+ 3368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3369 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 316 38 first
+ 3370 "10011000" // ST r26, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3371 "01010001" // /* MW 3 */
+ 3372 "00000111" // /* MW 2 */
+ 3373 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 317 38 first
+ 3374 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3375 "01110001" // /* MW 3 */
+ 3376 "00010100" // /* MW 2 */
+ 3377 "00001100" // /* MW 1 */
+ 3378 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3379 "00000000" // /* MW 5 */
+ 3380 "00000000" // /* MW 4 */
+ 3381 "11101000" // /* MW 3 */
+ 3382 "00000110" // /* MW 2 */
+ 3383 "00000000" // /* MW 1 */
+.delay_slot
+ 3384 "11111000" // MOV r30, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3385 "10100000" // /* MW 3 */
+ 3386 "10011111" // /* MW 2 */
+ 3387 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3391 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3394 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 3395 "00011100" // /* MW 13 */
+ 3396 "00000000" // /* MW 12 */
+ 3397 "00000000" // /* MW 11 */
+ 3398 "01010111" // /* MW 10 */
+ 3399 "00011010" // /* MW 9 */
+ 3400 "01000000" // /* MW 8 */
+ 3401 "00000000" // /* MW 7 */
+ 3402 "00000000" // /* MW 6 */
+ 3403 "10110110" // /* MW 5 */
+ 3404 "00000010" // /* MW 4 */
+ 3405 "11110000" // /* MW 3 */
+ 3406 "00101100" // /* MW 2 */
+ 3407 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832
+.src_ref 2 "reduce_base_c8.h" 298 44 first
+.src_ref 2 "reduce_base_c8.h" 301 40
+.src_ref 2 "reduce_base_c8.h" 301 40 first
+ 3408 "10111010" // ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3409 "01011000" // /* MW 9 */
+ 3410 "00010000" // /* MW 8 */
+ 3411 "01001000" // /* MW 7 */
+ 3412 "01110000" // /* MW 6 */
+ 3413 "00101011" // /* MW 5 */
+ 3414 "00000110" // /* MW 4 */
+ 3415 "11100000" // /* MW 3 */
+ 3416 "10000110" // /* MW 2 */
+ 3417 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 76
+.src_ref 2 "reduce_base_c8.h" 303 40
+.src_ref 2 "reduce_base_c8.h" 306 62
+ 3418 "10111010" // MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3419 "01111000" // /* MW 9 */
+ 3420 "00001110" // /* MW 8 */
+ 3421 "11010000" // /* MW 7 */
+ 3422 "10101000" // /* MW 6 */
+ 3423 "01000111" // /* MW 5 */
+ 3424 "00111110" // /* MW 4 */
+ 3425 "10000000" // /* MW 3 */
+ 3426 "10000000" // /* MW 2 */
+ 3427 "11111101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 76 first
+ 3428 "10011000" // LSHL r4, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3429 "01001101" // /* MW 3 */
+ 3430 "11001000" // /* MW 2 */
+ 3431 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 41
+.src_ref 2 "reduce_base_c8.h" 306 62 first
+ 3432 "00100100" // MUL r30, r30, r6; ADD.NC r3, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3433 "11111111" // /* MW 5 */
+ 3434 "10100100" // /* MW 4 */
+ 3435 "11110001" // /* MW 3 */
+ 3436 "10001101" // /* MW 2 */
+ 3437 "11110111" // /* MW 1 */
+ 3438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3439 "00000000" // /* MW 1 */
+ 3440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3441 "00000000" // /* MW 1 */
+ 3442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3443 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 299 38 first
+ 3444 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3445 "01110111" // /* MW 3 */
+ 3446 "00011111" // /* MW 2 */
+ 3447 "00000100" // /* MW 1 */
+ 3448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3449 "00000000" // /* MW 1 */
+ 3450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3451 "00000000" // /* MW 1 */
+ 3452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3453 "00000000" // /* MW 1 */
+ 3454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3455 "00000000" // /* MW 1 */
+ 3456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3457 "00000000" // /* MW 1 */
+ 3458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3459 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 300 39 first
+ 3460 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3461 "10110111" // /* MW 3 */
+ 3462 "00011100" // /* MW 2 */
+ 3463 "00000100" // /* MW 1 */
+ 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3465 "00000000" // /* MW 1 */
+ 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3467 "00000000" // /* MW 1 */
+ 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3469 "00000000" // /* MW 1 */
+ 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3471 "00000000" // /* MW 1 */
+ 3472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3473 "00000000" // /* MW 1 */
+ 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3475 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 301 38 first
+ 3476 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3477 "01010111" // /* MW 3 */
+ 3478 "00011100" // /* MW 2 */
+ 3479 "00000100" // /* MW 1 */
+ 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3481 "00000000" // /* MW 1 */
+ 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3483 "00000000" // /* MW 1 */
+ 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3485 "00000000" // /* MW 1 */
+ 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3487 "00000000" // /* MW 1 */
+ 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3489 "00000000" // /* MW 1 */
+ 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3491 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 39 first
+ 3492 "00011000" // ST.s16 r3, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3493 "01110111" // /* MW 3 */
+ 3494 "00011100" // /* MW 2 */
+ 3495 "00000100" // /* MW 1 */
+ 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3497 "00000000" // /* MW 1 */
+ 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3499 "00000000" // /* MW 1 */
+ 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3501 "00000000" // /* MW 1 */
+ 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3503 "00000000" // /* MW 1 */
+ 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3505 "00000000" // /* MW 1 */
+ 3506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3507 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 303 40 first
+ 3508 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3509 "00110111" // /* MW 3 */
+ 3510 "00001000" // /* MW 2 */
+ 3511 "00000100" // /* MW 1 */
+ 3512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3513 "00000000" // /* MW 1 */
+ 3514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3515 "00000000" // /* MW 1 */
+ 3516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3517 "00000000" // /* MW 1 */
+ 3518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3519 "00000000" // /* MW 1 */
+ 3520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3521 "00000000" // /* MW 1 */
+ 3522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3523 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 304 38 first
+ 3524 "10011000" // ST r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3525 "00110001" // /* MW 3 */
+ 3526 "00000110" // /* MW 2 */
+ 3527 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 305 38 first
+ 3528 "00000010" // ST r20, [p4, #4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3529 "01110000" // /* MW 7 */
+ 3530 "10100101" // /* MW 6 */
+ 3531 "00000001" // /* MW 5 */
+ 3532 "00000000" // /* MW 4 */
+ 3533 "00110000" // /* MW 3 */
+ 3534 "11010010" // /* MW 2 */
+ 3535 "10000010" // /* MW 1 */
+.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ 3536 "10111000" // MOV dj0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3537 "01000000" // /* MW 3 */
+ 3538 "10000000" // /* MW 2 */
+ 3539 "00011000" // /* MW 1 */
+ 3540 "00110110" // ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3541 "10000001" // /* MW 11 */
+ 3542 "10101101" // /* MW 10 */
+ 3543 "00000000" // /* MW 9 */
+ 3544 "00000000" // /* MW 8 */
+ 3545 "00000000" // /* MW 7 */
+ 3546 "00000000" // /* MW 6 */
+ 3547 "00100000" // /* MW 5 */
+ 3548 "00000000" // /* MW 4 */
+ 3549 "11100000" // /* MW 3 */
+ 3550 "01111010" // /* MW 2 */
+ 3551 "01100000" // /* MW 1 */
+.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.src_ref 2 "reduce_base_c8.h" 326 79 first
+.src_ref 2 "reduce_base_c8.h" 329 51
+ 3552 "00010100" // MOVA m2, #24; ADD.NC p0, r0, #30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3553 "00011110" // /* MW 5 */
+ 3554 "11000000" // /* MW 4 */
+ 3555 "10000000" // /* MW 3 */
+ 3556 "00001000" // /* MW 2 */
+ 3557 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.src_ref 2 "reduce_base_c8.h" 330 26
+.src_ref 3 "reduce_mean_c8_impl.h" 139 51 first
+ 3558 "10111010" // LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3559 "01011000" // /* MW 9 */
+ 3560 "11100010" // /* MW 8 */
+ 3561 "00000111" // /* MW 7 */
+ 3562 "00001000" // /* MW 6 */
+ 3563 "00000010" // /* MW 5 */
+ 3564 "00000000" // /* MW 4 */
+ 3565 "11010000" // /* MW 3 */
+ 3566 "10001010" // /* MW 2 */
+ 3567 "01000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 331 24
+.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first
+ 3568 "01010100" // LDA.s16 r3, [p2]; MOV m1, #38 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3569 "10011001" // /* MW 5 */
+ 3570 "00000000" // /* MW 4 */
+ 3571 "01010010" // /* MW 3 */
+ 3572 "10001110" // /* MW 2 */
+ 3573 "01000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 141 49 first
+ 3574 "10011000" // LDA r1, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3575 "00110110" // /* MW 3 */
+ 3576 "00010100" // /* MW 2 */
+ 3577 "00000010" // /* MW 1 */
+ 3578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3579 "00000000" // /* MW 1 */
+ 3580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3581 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 326 28 first
+ 3582 "00011000" // ST.s16 r31, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3583 "11110111" // /* MW 3 */
+ 3584 "00101111" // /* MW 2 */
+ 3585 "00000000" // /* MW 1 */
+ 3586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3587 "00000000" // /* MW 1 */
+ 3588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3589 "00000000" // /* MW 1 */
+ 3590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3591 "00000000" // /* MW 1 */
+ 3592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3593 "00000000" // /* MW 1 */
+ 3594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3595 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3597 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 327 31 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3598 "00011000" // ST.s16 r24, [p0], #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3599 "00010111" // /* MW 3 */
+ 3600 "01011111" // /* MW 2 */
+ 3601 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3603 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3605 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3607 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3609 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 327 31
+.src_ref 2 "reduce_base_c8.h" 328 23
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3610 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3611 "00000001" // /* MW 3 */
+ 3612 "00110000" // /* MW 2 */
+ 3613 "00010000" // /* MW 1 */
+ 3614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3615 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 328 23 first
+ 3616 "00011000" // ST.s16 r24, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3617 "00010111" // /* MW 3 */
+ 3618 "11001111" // /* MW 2 */
+ 3619 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 51 first
+ 3620 "10011000" // LDA.u16 r4, [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3621 "10011010" // /* MW 3 */
+ 3622 "01001000" // /* MW 2 */
+ 3623 "00000000" // /* MW 1 */
+ 3624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3625 "00000000" // /* MW 1 */
+ 3626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3627 "00000000" // /* MW 1 */
+ 3628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3629 "00000000" // /* MW 1 */
+ 3630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3631 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3633 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 28
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3634 "00011000" // ST.s16 r0, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3635 "00010111" // /* MW 3 */
+ 3636 "11111100" // /* MW 2 */
+ 3637 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.src_ref 2 "reduce_base_c8.h" 330 28
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3638 "00100100" // LSHL r4, r4, r26; ADD.NC r5, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3639 "11111111" // /* MW 5 */
+ 3640 "10100100" // /* MW 4 */
+ 3641 "10110010" // /* MW 3 */
+ 3642 "00110101" // /* MW 2 */
+ 3643 "00100001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3644 "10011000" // SUB r0, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3645 "01000001" // /* MW 3 */
+ 3646 "00000000" // /* MW 2 */
+ 3647 "00010000" // /* MW 1 */
+ 3648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3649 "00000000" // /* MW 1 */
+ 3650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3651 "00000000" // /* MW 1 */
+ 3652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3653 "00000000" // /* MW 1 */
+ 3654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3655 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 330 26 first
+ 3656 "00011000" // ST.s16 r5, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3657 "10110111" // /* MW 3 */
+ 3658 "00001000" // /* MW 2 */
+ 3659 "00000000" // /* MW 1 */
+ 3660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3661 "00000000" // /* MW 1 */
+ 3662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3663 "00000000" // /* MW 1 */
+ 3664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3665 "00000000" // /* MW 1 */
+ 3666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3667 "00000000" // /* MW 1 */
+ 3668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3669 "00000000" // /* MW 1 */
+ 3670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3671 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 331 24 first
+ 3672 "00011000" // ST.s16 r19, [p0], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3673 "01110111" // /* MW 3 */
+ 3674 "00101010" // /* MW 2 */
+ 3675 "00000000" // /* MW 1 */
+ 3676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3677 "00000000" // /* MW 1 */
+ 3678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3679 "00000000" // /* MW 1 */
+ 3680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3681 "00000000" // /* MW 1 */
+ 3682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3683 "00000000" // /* MW 1 */
+ 3684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3685 "00000000" // /* MW 1 */
+ 3686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3687 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 139 40 first
+ 3688 "00011000" // ST.s8 r2, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3689 "01000111" // /* MW 3 */
+ 3690 "11101100" // /* MW 2 */
+ 3691 "00000000" // /* MW 1 */
+ 3692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3693 "00000000" // /* MW 1 */
+ 3694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3695 "00000000" // /* MW 1 */
+ 3696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3697 "00000000" // /* MW 1 */
+ 3698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3699 "00000000" // /* MW 1 */
+ 3700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3701 "00000000" // /* MW 1 */
+ 3702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3703 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first
+ 3704 "00011000" // ST.s16 r3, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3705 "01110111" // /* MW 3 */
+ 3706 "00000100" // /* MW 2 */
+ 3707 "00000000" // /* MW 1 */
+ 3708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3709 "00000000" // /* MW 1 */
+ 3710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3711 "00000000" // /* MW 1 */
+ 3712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3713 "00000000" // /* MW 1 */
+ 3714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3715 "00000000" // /* MW 1 */
+ 3716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3717 "00000000" // /* MW 1 */
+ 3718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3719 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 141 38 first
+ 3720 "00011000" // ST.s8 r1, [p0, #-2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3721 "00100111" // /* MW 3 */
+ 3722 "11100100" // /* MW 2 */
+ 3723 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 349 4 first
+ 3724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 3725 "00000000" // /* MW 3 */
+ 3726 "00101000" // /* MW 2 */
+ 3727 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 349 4
+.delay_slot
+ 3728 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3729 "00000001" // /* MW 5 */
+ 3730 "00000000" // /* MW 4 */
+ 3731 "00000000" // /* MW 3 */
+ 3732 "11111000" // /* MW 2 */
+ 3733 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3735 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3737 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3739 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3740 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3741 "01100111" // /* MW 3 */
+ 3742 "00000001" // /* MW 2 */
+ 3743 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168
+.src_ref 2 "reduce_base_c8.h" 262 44 first
+.src_ref 2 "reduce_base_c8.h" 263 77
+ 3744 "10111010" // ST.s16 r21, [p4], #2; MOVXM r5, #65512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3745 "00010000" // /* MW 9 */
+ 3746 "11110100" // /* MW 8 */
+ 3747 "10101111" // /* MW 7 */
+ 3748 "00111100" // /* MW 6 */
+ 3749 "00000000" // /* MW 5 */
+ 3750 "00000000" // /* MW 4 */
+ 3751 "11100000" // /* MW 3 */
+ 3752 "11010110" // /* MW 2 */
+ 3753 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 56
+.src_ref 2 "reduce_base_c8.h" 263 77 first
+.src_ref 2 "reduce_base_c8.h" 267 40
+ 3754 "10111010" // LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3755 "01011000" // /* MW 9 */
+ 3756 "11101100" // /* MW 8 */
+ 3757 "00000111" // /* MW 7 */
+ 3758 "00000100" // /* MW 6 */
+ 3759 "01111101" // /* MW 5 */
+ 3760 "00001010" // /* MW 4 */
+ 3761 "00100000" // /* MW 3 */
+ 3762 "10001010" // /* MW 2 */
+ 3763 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 118
+.src_ref 2 "reduce_base_c8.h" 329 30
+ 3764 "10111010" // MOVA r26, #4; MOVXM r6, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3765 "10010000" // /* MW 9 */
+ 3766 "11111111" // /* MW 8 */
+ 3767 "11001111" // /* MW 7 */
+ 3768 "00111100" // /* MW 6 */
+ 3769 "00000000" // /* MW 5 */
+ 3770 "00000000" // /* MW 4 */
+ 3771 "00000000" // /* MW 3 */
+ 3772 "10011010" // /* MW 2 */
+ 3773 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 118 first
+ 3774 "10011000" // ADD r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3775 "01100000" // /* MW 3 */
+ 3776 "11100010" // /* MW 2 */
+ 3777 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 98
+.src_ref 2 "reduce_base_c8.h" 267 116 first
+ 3778 "00011000" // MAC r29, r29, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3779 "01000110" // /* MW 3 */
+ 3780 "01111010" // /* MW 2 */
+ 3781 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 60 first
+.src_ref 2 "reduce_base_c8.h" 265 98 first
+ 3782 "00011000" // MSC r21, r21, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3783 "01001110" // /* MW 3 */
+ 3784 "01101010" // /* MW 2 */
+ 3785 "00010100" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3787 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 38 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3788 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3789 "01010111" // /* MW 3 */
+ 3790 "00011100" // /* MW 2 */
+ 3791 "00000100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 56
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3792 "10011000" // MUL r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3793 "00101111" // /* MW 3 */
+ 3794 "11000100" // /* MW 2 */
+ 3795 "00010001" // /* MW 1 */
+ 3796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3797 "00000000" // /* MW 1 */
+ 3798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3799 "00000000" // /* MW 1 */
+ 3800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3801 "00000000" // /* MW 1 */
+ 3802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3803 "00000000" // /* MW 1 */
+ 3804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3805 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 264 39 first
+ 3806 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3807 "11010111" // /* MW 3 */
+ 3808 "00011110" // /* MW 2 */
+ 3809 "00000100" // /* MW 1 */
+ 3810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3811 "00000000" // /* MW 1 */
+ 3812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3813 "00000000" // /* MW 1 */
+ 3814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3815 "00000000" // /* MW 1 */
+ 3816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3817 "00000000" // /* MW 1 */
+ 3818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3819 "00000000" // /* MW 1 */
+ 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3821 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 38 first
+ 3822 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3823 "10110111" // /* MW 3 */
+ 3824 "00011110" // /* MW 2 */
+ 3825 "00000100" // /* MW 1 */
+ 3826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3827 "00000000" // /* MW 1 */
+ 3828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3829 "00000000" // /* MW 1 */
+ 3830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3831 "00000000" // /* MW 1 */
+ 3832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3833 "00000000" // /* MW 1 */
+ 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3835 "00000000" // /* MW 1 */
+ 3836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3837 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 266 39 first
+ 3838 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3839 "00110111" // /* MW 3 */
+ 3840 "00011100" // /* MW 2 */
+ 3841 "00000100" // /* MW 1 */
+ 3842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3843 "00000000" // /* MW 1 */
+ 3844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3845 "00000000" // /* MW 1 */
+ 3846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3847 "00000000" // /* MW 1 */
+ 3848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3849 "00000000" // /* MW 1 */
+ 3850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3851 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3853 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 40 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3854 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3855 "01010111" // /* MW 3 */
+ 3856 "00001000" // /* MW 2 */
+ 3857 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3859 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3861 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3862 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3863 "00000000" // /* MW 5 */
+ 3864 "00000000" // /* MW 4 */
+ 3865 "11101000" // /* MW 3 */
+ 3866 "00000110" // /* MW 2 */
+ 3867 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 42
+.delay_slot
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3868 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3869 "01000001" // /* MW 3 */
+ 3870 "00001010" // /* MW 2 */
+ 3871 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 42
+.delay_slot
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3872 "10011000" // SUB r2, r5, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3873 "11010001" // /* MW 3 */
+ 3874 "01000101" // /* MW 2 */
+ 3875 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 270 64
+.delay_slot
+ 3876 "11111000" // MOV r6, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3877 "00011100" // /* MW 3 */
+ 3878 "10100001" // /* MW 2 */
+ 3879 "00011001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 268 38 first
+.delay_slot
+ 3880 "00000010" // ST r3, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3881 "01110000" // /* MW 7 */
+ 3882 "10100101" // /* MW 6 */
+ 3883 "00000001" // /* MW 5 */
+ 3884 "00000000" // /* MW 4 */
+ 3885 "00110000" // /* MW 3 */
+ 3886 "10001110" // /* MW 2 */
+ 3887 "10000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 269 38 first
+.src_ref 2 "reduce_base_c8.h" 270 64 first
+.delay_slot
+ 3888 "11100001" // NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 3889 "00000000" // /* MW 15 */
+ 3890 "00000000" // /* MW 14 */
+ 3891 "01111000" // /* MW 13 */
+ 3892 "10100101" // /* MW 12 */
+ 3893 "00000001" // /* MW 11 */
+ 3894 "01111100" // /* MW 10 */
+ 3895 "11100011" // /* MW 9 */
+ 3896 "10111101" // /* MW 8 */
+ 3897 "00010001" // /* MW 7 */
+ 3898 "00010110" // /* MW 6 */
+ 3899 "00100100" // /* MW 5 */
+ 3900 "00000000" // /* MW 4 */
+ 3901 "11110000" // /* MW 3 */
+ 3902 "00101100" // /* MW 2 */
+ 3903 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328
+.src_ref 2 "reduce_base_c8.h" 250 44
+.src_ref 2 "reduce_base_c8.h" 250 44 first
+.src_ref 2 "reduce_base_c8.h" 255 40
+ 3904 "10111010" // ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3905 "01011000" // /* MW 9 */
+ 3906 "11101100" // /* MW 8 */
+ 3907 "00000111" // /* MW 7 */
+ 3908 "00001000" // /* MW 6 */
+ 3909 "01000010" // /* MW 5 */
+ 3910 "00000000" // /* MW 4 */
+ 3911 "11100000" // /* MW 3 */
+ 3912 "10010010" // /* MW 2 */
+ 3913 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 255 113 first
+ 3914 "10111010" // LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3915 "01011000" // /* MW 9 */
+ 3916 "00001000" // /* MW 8 */
+ 3917 "01001000" // /* MW 7 */
+ 3918 "01110000" // /* MW 6 */
+ 3919 "00101101" // /* MW 5 */
+ 3920 "00000110" // /* MW 4 */
+ 3921 "00100000" // /* MW 3 */
+ 3922 "10000110" // /* MW 2 */
+ 3923 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 329 30
+ 3924 "01100100" // MOVX r3, #16; MOV r26, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3925 "00010001" // /* MW 5 */
+ 3926 "00100000" // /* MW 4 */
+ 3927 "00101101" // /* MW 3 */
+ 3928 "11001000" // /* MW 2 */
+ 3929 "00000000" // /* MW 1 */
+ 3930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3931 "00000000" // /* MW 1 */
+ 3932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3933 "00000000" // /* MW 1 */
+ 3934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3935 "00000000" // /* MW 1 */
+ 3936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3937 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 251 38 first
+ 3938 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3939 "01110111" // /* MW 3 */
+ 3940 "00011111" // /* MW 2 */
+ 3941 "00000100" // /* MW 1 */
+ 3942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3943 "00000000" // /* MW 1 */
+ 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3945 "00000000" // /* MW 1 */
+ 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3947 "00000000" // /* MW 1 */
+ 3948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3949 "00000000" // /* MW 1 */
+ 3950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3951 "00000000" // /* MW 1 */
+ 3952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3953 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 252 39 first
+ 3954 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3955 "10110111" // /* MW 3 */
+ 3956 "00011100" // /* MW 2 */
+ 3957 "00000100" // /* MW 1 */
+ 3958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3959 "00000000" // /* MW 1 */
+ 3960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3961 "00000000" // /* MW 1 */
+ 3962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3963 "00000000" // /* MW 1 */
+ 3964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3965 "00000000" // /* MW 1 */
+ 3966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3967 "00000000" // /* MW 1 */
+ 3968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3969 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 253 38 first
+ 3970 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3971 "01110111" // /* MW 3 */
+ 3972 "00011111" // /* MW 2 */
+ 3973 "00000100" // /* MW 1 */
+ 3974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3975 "00000000" // /* MW 1 */
+ 3976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3977 "00000000" // /* MW 1 */
+ 3978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3979 "00000000" // /* MW 1 */
+ 3980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3981 "00000000" // /* MW 1 */
+ 3982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3983 "00000000" // /* MW 1 */
+ 3984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3985 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 254 39 first
+ 3986 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3987 "11010111" // /* MW 3 */
+ 3988 "00011110" // /* MW 2 */
+ 3989 "00000100" // /* MW 1 */
+ 3990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3991 "00000000" // /* MW 1 */
+ 3992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3993 "00000000" // /* MW 1 */
+ 3994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3995 "00000000" // /* MW 1 */
+ 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3997 "00000000" // /* MW 1 */
+ 3998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3999 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 4000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4001 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 40 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 4002 "00011000" // ST.s16 r3, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4003 "01110111" // /* MW 3 */
+ 4004 "00001000" // /* MW 2 */
+ 4005 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4007 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4009 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4010 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 4011 "00000000" // /* MW 5 */
+ 4012 "00000000" // /* MW 4 */
+ 4013 "11101000" // /* MW 3 */
+ 4014 "00000110" // /* MW 2 */
+ 4015 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.delay_slot
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4016 "00011000" // MAC r3, r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4017 "00100110" // /* MW 3 */
+ 4018 "01000110" // /* MW 2 */
+ 4019 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4023 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 256 38 first
+.delay_slot
+ 4024 "10011000" // ST r6, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4025 "11010001" // /* MW 3 */
+ 4026 "00000100" // /* MW 2 */
+ 4027 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 257 38 first
+.delay_slot
+ 4028 "10011000" // ST r18, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4029 "01010001" // /* MW 3 */
+ 4030 "00010110" // /* MW 2 */
+ 4031 "00001100" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456
+.src_ref 2 "reduce_base_c8.h" 238 44 first
+ 4032 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4033 "10110111" // /* MW 3 */
+ 4034 "00011110" // /* MW 2 */
+ 4035 "00000100" // /* MW 1 */
+ 4036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4037 "00000000" // /* MW 1 */
+ 4038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4039 "00000000" // /* MW 1 */
+ 4040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4041 "00000000" // /* MW 1 */
+ 4042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4043 "00000000" // /* MW 1 */
+ 4044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4045 "00000000" // /* MW 1 */
+ 4046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4047 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 239 38 first
+ 4048 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4049 "11110111" // /* MW 3 */
+ 4050 "00011100" // /* MW 2 */
+ 4051 "00000100" // /* MW 1 */
+ 4052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4053 "00000000" // /* MW 1 */
+ 4054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4055 "00000000" // /* MW 1 */
+ 4056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4057 "00000000" // /* MW 1 */
+ 4058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4059 "00000000" // /* MW 1 */
+ 4060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4061 "00000000" // /* MW 1 */
+ 4062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4063 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 240 39 first
+ 4064 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4065 "11110111" // /* MW 3 */
+ 4066 "00011110" // /* MW 2 */
+ 4067 "00000100" // /* MW 1 */
+ 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4069 "00000000" // /* MW 1 */
+ 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4071 "00000000" // /* MW 1 */
+ 4072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4073 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 94
+ 4074 "00011000" // LDA r3, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4075 "01110001" // /* MW 3 */
+ 4076 "11111100" // /* MW 2 */
+ 4077 "00000111" // /* MW 1 */
+ 4078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4079 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 4080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4081 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 38 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 4082 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4083 "00110111" // /* MW 3 */
+ 4084 "00011100" // /* MW 2 */
+ 4085 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4087 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4088 "01000100" // MOVXM r1, #65504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4089 "11000000" // /* MW 5 */
+ 4090 "10111111" // /* MW 4 */
+ 4091 "11110000" // /* MW 3 */
+ 4092 "00000000" // /* MW 2 */
+ 4093 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4094 "10011000" // ADD r2, r1, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4095 "10100000" // /* MW 3 */
+ 4096 "01000101" // /* MW 2 */
+ 4097 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 94
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4098 "01100100" // MAC r1, r1, r3, r2; MOV r1, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4099 "01000001" // /* MW 5 */
+ 4100 "10100000" // /* MW 4 */
+ 4101 "11000000" // /* MW 3 */
+ 4102 "01000100" // /* MW 2 */
+ 4103 "00011000" // /* MW 1 */
+ 4104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4105 "00000000" // /* MW 1 */
+ 4106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4107 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 242 39 first
+ 4108 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4109 "11010111" // /* MW 3 */
+ 4110 "00011110" // /* MW 2 */
+ 4111 "00000100" // /* MW 1 */
+ 4112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4113 "00000000" // /* MW 1 */
+ 4114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4115 "00000000" // /* MW 1 */
+ 4116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4117 "00000000" // /* MW 1 */
+ 4118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4119 "00000000" // /* MW 1 */
+ 4120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4121 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 40
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 4122 "10111000" // MOV m0, #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4123 "11011000" // /* MW 3 */
+ 4124 "00001111" // /* MW 2 */
+ 4125 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 40 first
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 4126 "00011000" // ST.s16 r5, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4127 "10110111" // /* MW 3 */
+ 4128 "00001000" // /* MW 2 */
+ 4129 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4131 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4133 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4134 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 4135 "00000000" // /* MW 5 */
+ 4136 "00000000" // /* MW 4 */
+ 4137 "11101000" // /* MW 3 */
+ 4138 "00000110" // /* MW 2 */
+ 4139 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 42
+.src_ref 2 "reduce_base_c8.h" 243 42
+.src_ref 2 "reduce_base_c8.h" 243 91
+.src_ref 2 "reduce_base_c8.h" 243 91
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4140 "01100100" // MSC r5, r5, r22, r4; MOV r5, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4141 "01000001" // /* MW 5 */
+ 4142 "10100000" // /* MW 4 */
+ 4143 "11000010" // /* MW 3 */
+ 4144 "01001001" // /* MW 2 */
+ 4145 "10110001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4147 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4149 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 244 38 first
+.delay_slot
+ 4150 "10011000" // ST r20, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4151 "10010001" // /* MW 3 */
+ 4152 "00000110" // /* MW 2 */
+ 4153 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 245 38 first
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 4154 "00111010" // ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4155 "01111001" // /* MW 9 */
+ 4156 "10001110" // /* MW 8 */
+ 4157 "11010000" // /* MW 7 */
+ 4158 "10001011" // /* MW 6 */
+ 4159 "10100000" // /* MW 5 */
+ 4160 "00000001" // /* MW 4 */
+ 4161 "00110000" // /* MW 3 */
+ 4162 "11000110" // /* MW 2 */
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0
+ 4163 "10000010" // /* MW 1 */
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+.function pad_3d<(pad_3d_mode)0, bfloat16, 1> _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+.src_ref 3 "pad_3d.h" 266 first
+.src_ref 3 "pad_3d.h" 465 37 first
+.src_ref 3 "pad_3d.h" 468 21 first
+.src_ref 3 "pad_3d.h" 471 29
+.src_ref 3 "pad_3d.h" 479 21
+.function_start
+ 4176 "10111010" // LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4177 "01011000" // /* MW 9 */
+ 4178 "11101000" // /* MW 8 */
+ 4179 "10000111" // /* MW 7 */
+ 4180 "11001000" // /* MW 6 */
+ 4181 "01000111" // /* MW 5 */
+ 4182 "00111110" // /* MW 4 */
+ 4183 "11010000" // /* MW 3 */
+ 4184 "10000010" // /* MW 2 */
+ 4185 "01000010" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 469 21 first
+.src_ref 3 "pad_3d.h" 478 21
+.src_ref 3 "pad_3d.h" 499 52
+.src_ref 3 "pad_3d.h" 511 25
+ 4186 "10111010" // LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4187 "01011000" // /* MW 9 */
+ 4188 "00000110" // /* MW 8 */
+ 4189 "00001000" // /* MW 7 */
+ 4190 "10101010" // /* MW 6 */
+ 4191 "00100111" // /* MW 5 */
+ 4192 "00111110" // /* MW 4 */
+ 4193 "11010000" // /* MW 3 */
+ 4194 "10000110" // /* MW 2 */
+ 4195 "01000101" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 470 21 first
+.src_ref 3 "pad_3d.h" 486 26
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 26
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 517 22
+ 4196 "10111010" // LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4197 "01111000" // /* MW 9 */
+ 4198 "01100000" // /* MW 8 */
+ 4199 "01101000" // /* MW 7 */
+ 4200 "00001000" // /* MW 6 */
+ 4201 "10000000" // /* MW 5 */
+ 4202 "00000001" // /* MW 4 */
+ 4203 "11010000" // /* MW 3 */
+ 4204 "10010110" // /* MW 2 */
+ 4205 "01001111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 471 29 first
+ 4206 "10011000" // LDA.s16 r18, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4207 "01010010" // /* MW 3 */
+ 4208 "00101010" // /* MW 2 */
+ 4209 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 472 25 first
+ 4210 "10011000" // LDA r6, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4211 "11010110" // /* MW 3 */
+ 4212 "00011100" // /* MW 2 */
+ 4213 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 473 26 first
+ 4214 "10011000" // LDA r7, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4215 "11110110" // /* MW 3 */
+ 4216 "00101100" // /* MW 2 */
+ 4217 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 475 24 first
+ 4218 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4219 "00110110" // /* MW 3 */
+ 4220 "00000110" // /* MW 2 */
+ 4221 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 479 21 first
+ 4222 "10011000" // ASHL r19, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4223 "01001110" // /* MW 3 */
+ 4224 "00100110" // /* MW 2 */
+ 4225 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 477 23 first
+ 4226 "10011000" // LDA r4, [p2, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4227 "10010110" // /* MW 3 */
+ 4228 "00100100" // /* MW 2 */
+ 4229 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 478 21 first
+ 4230 "10011000" // ASHL r20, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4231 "00101110" // /* MW 3 */
+ 4232 "01101000" // /* MW 2 */
+ 4233 "00010001" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 56 25 first
+ 4234 "11111000" // VBCST.16 x0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4235 "01110010" // /* MW 3 */
+ 4236 "01001001" // /* MW 2 */
+ 4237 "00011000" // /* MW 1 */
+ 4238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4239 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 485 45 first
+ 4240 "10011000" // MUL r18, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4241 "01001111" // /* MW 3 */
+ 4242 "11100101" // /* MW 2 */
+ 4243 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 485 34
+ 4244 "10011000" // SUB r19, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4245 "00010001" // /* MW 3 */
+ 4246 "01100111" // /* MW 2 */
+ 4247 "00010000" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 998 25 first
+ 4248 "10011000" // MUL r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4249 "00101111" // /* MW 3 */
+ 4250 "11100111" // /* MW 2 */
+ 4251 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 43 first
+ 4252 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4253 "00101111" // /* MW 3 */
+ 4254 "01100011" // /* MW 2 */
+ 4255 "00010100" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13 first
+ 4256 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4257 "00001101" // /* MW 3 */
+ 4258 "11100001" // /* MW 2 */
+ 4259 "00010100" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 486 26 first
+ 4260 "10100100" // GE r16, r24, r17; ADD.NC p2, r3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4261 "10000010" // /* MW 5 */
+ 4262 "11000011" // /* MW 4 */
+ 4263 "00110100" // /* MW 3 */
+ 4264 "00100011" // /* MW 2 */
+ 4265 "11000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4266 "10000100" // JNZ r16, #4416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4416 delay_slots=5 */
+ 4267 "00000001" // /* MW 5 */
+ 4268 "01000000" // /* MW 4 */
+ 4269 "10100000" // /* MW 3 */
+ 4270 "00001000" // /* MW 2 */
+ 4271 "10000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 487 22
+.delay_slot
+ 4272 "11111000" // VMOV bmll0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4273 "10010010" // /* MW 3 */
+ 4274 "00000000" // /* MW 2 */
+ 4275 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4277 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4283 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4284 "01000100" // MOVXM ls, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4285 "01100000" // /* MW 5 */
+ 4286 "11100010" // /* MW 4 */
+ 4287 "00010001" // /* MW 3 */
+ 4288 "00000000" // /* MW 2 */
+ 4289 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4290 "01000100" // MOVXM le, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4291 "01100000" // /* MW 5 */
+ 4292 "11100010" // /* MW 4 */
+ 4293 "00010110" // /* MW 3 */
+ 4294 "00000000" // /* MW 2 */
+ 4295 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4296 "00000010" // NOPS; MOV lc, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4297 "01110000" // /* MW 7 */
+ 4298 "01010000" // /* MW 6 */
+ 4299 "10111100" // /* MW 5 */
+ 4300 "00000010" // /* MW 4 */
+ 4301 "01100000" // /* MW 3 */
+ 4302 "00101011" // /* MW 2 */
+ 4303 "00000000" // /* MW 1 */
+ 4304 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4305 "00000000" // /* MW 15 */
+ 4306 "00000000" // /* MW 14 */
+ 4307 "01111000" // /* MW 13 */
+ 4308 "10100101" // /* MW 12 */
+ 4309 "00000001" // /* MW 11 */
+ 4310 "00000000" // /* MW 10 */
+ 4311 "00000000" // /* MW 9 */
+ 4312 "00000000" // /* MW 8 */
+ 4313 "01011011" // /* MW 7 */
+ 4314 "00000001" // /* MW 6 */
+ 4315 "00100000" // /* MW 5 */
+ 4316 "00000000" // /* MW 4 */
+ 4317 "11110000" // /* MW 3 */
+ 4318 "00101100" // /* MW 2 */
+ 4319 "00000000" // /* MW 1 */
+ 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4321 "00000000" // /* MW 15 */
+ 4322 "00000000" // /* MW 14 */
+ 4323 "01111000" // /* MW 13 */
+ 4324 "10100101" // /* MW 12 */
+ 4325 "00000001" // /* MW 11 */
+ 4326 "00000000" // /* MW 10 */
+ 4327 "00000000" // /* MW 9 */
+ 4328 "00000000" // /* MW 8 */
+ 4329 "01011011" // /* MW 7 */
+ 4330 "00000001" // /* MW 6 */
+ 4331 "00100000" // /* MW 5 */
+ 4332 "00000000" // /* MW 4 */
+ 4333 "11110000" // /* MW 3 */
+ 4334 "00101100" // /* MW 2 */
+ 4335 "00000000" // /* MW 1 */
+ 4336 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4337 "00000000" // /* MW 15 */
+ 4338 "00000000" // /* MW 14 */
+ 4339 "01111000" // /* MW 13 */
+ 4340 "10100101" // /* MW 12 */
+ 4341 "00000001" // /* MW 11 */
+ 4342 "00000000" // /* MW 10 */
+ 4343 "00000000" // /* MW 9 */
+ 4344 "00000000" // /* MW 8 */
+ 4345 "01011011" // /* MW 7 */
+ 4346 "00000001" // /* MW 6 */
+ 4347 "00100000" // /* MW 5 */
+ 4348 "00000000" // /* MW 4 */
+ 4349 "11110000" // /* MW 3 */
+ 4350 "00101100" // /* MW 2 */
+ 4351 "00000000" // /* MW 1 */
+ 4352 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4353 "00000000" // /* MW 15 */
+ 4354 "00000000" // /* MW 14 */
+ 4355 "01111000" // /* MW 13 */
+ 4356 "10100101" // /* MW 12 */
+ 4357 "00000001" // /* MW 11 */
+ 4358 "00000000" // /* MW 10 */
+ 4359 "00000000" // /* MW 9 */
+ 4360 "00000000" // /* MW 8 */
+ 4361 "01011011" // /* MW 7 */
+ 4362 "00000001" // /* MW 6 */
+ 4363 "00100000" // /* MW 5 */
+ 4364 "00000000" // /* MW 4 */
+ 4365 "11110000" // /* MW 3 */
+ 4366 "00101100" // /* MW 2 */
+ 4367 "00000000" // /* MW 1 */
+ 4368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4369 "00000000" // /* MW 15 */
+ 4370 "00000000" // /* MW 14 */
+ 4371 "01111000" // /* MW 13 */
+ 4372 "10100101" // /* MW 12 */
+ 4373 "00000001" // /* MW 11 */
+ 4374 "00000000" // /* MW 10 */
+ 4375 "00000000" // /* MW 9 */
+ 4376 "00000000" // /* MW 8 */
+ 4377 "01011011" // /* MW 7 */
+ 4378 "00000001" // /* MW 6 */
+ 4379 "00100000" // /* MW 5 */
+ 4380 "00000000" // /* MW 4 */
+ 4381 "11110000" // /* MW 3 */
+ 4382 "00101100" // /* MW 2 */
+ 4383 "00000000" // /* MW 1 */
+ 4384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4385 "00000000" // /* MW 15 */
+ 4386 "00000000" // /* MW 14 */
+ 4387 "01111000" // /* MW 13 */
+ 4388 "10100101" // /* MW 12 */
+ 4389 "00000001" // /* MW 11 */
+ 4390 "00000000" // /* MW 10 */
+ 4391 "00000000" // /* MW 9 */
+ 4392 "00000000" // /* MW 8 */
+ 4393 "01011011" // /* MW 7 */
+ 4394 "00000001" // /* MW 6 */
+ 4395 "00100000" // /* MW 5 */
+ 4396 "00000000" // /* MW 4 */
+ 4397 "11110000" // /* MW 3 */
+ 4398 "00101100" // /* MW 2 */
+ 4399 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224
+.src_ref 3 "pad_3d.h" 487 22 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4400 "11100001" // NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4401 "00000000" // /* MW 15 */
+ 4402 "00000000" // /* MW 14 */
+ 4403 "01111000" // /* MW 13 */
+ 4404 "10100101" // /* MW 12 */
+ 4405 "00000001" // /* MW 11 */
+ 4406 "00000000" // /* MW 10 */
+ 4407 "00000000" // /* MW 9 */
+ 4408 "10000000" // /* MW 8 */
+ 4409 "00000110" // /* MW 7 */
+ 4410 "00011100" // /* MW 6 */
+ 4411 "00100010" // /* MW 5 */
+ 4412 "00000000" // /* MW 4 */
+ 4413 "11110000" // /* MW 3 */
+ 4414 "00101100" // /* MW 2 */
+ 4415 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240
+.src_ref 3 "pad_3d.h" 495 21
+.src_ref 3 "pad_3d.h" 495 40 first
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 38 first
+.loop_nesting 0
+ 4416 "10111010" // MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4417 "10101000" // /* MW 9 */
+ 4418 "11001100" // /* MW 8 */
+ 4419 "00101001" // /* MW 7 */
+ 4420 "11111110" // /* MW 6 */
+ 4421 "00000000" // /* MW 5 */
+ 4422 "00001011" // /* MW 4 */
+ 4423 "00000000" // /* MW 3 */
+ 4424 "10000110" // /* MW 2 */
+ 4425 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 40
+.src_ref 3 "pad_3d.h" 496 29 first
+ 4426 "00100100" // SUB r17, r0, r17; ADD.NC dn1, r7, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4427 "11111111" // /* MW 5 */
+ 4428 "10000111" // /* MW 4 */
+ 4429 "00110010" // /* MW 3 */
+ 4430 "01100010" // /* MW 2 */
+ 4431 "00000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 21 first
+ 4432 "10011000" // LSHL r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4433 "01101101" // /* MW 3 */
+ 4434 "01100010" // /* MW 2 */
+ 4435 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 58
+.src_ref 3 "pad_3d.h" 498 23 first
+ 4436 "00100100" // SUB r17, r0, r7; ADD.NC m1, r17, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4437 "00010000" // /* MW 5 */
+ 4438 "00010001" // /* MW 4 */
+ 4439 "00110010" // /* MW 3 */
+ 4440 "01001110" // /* MW 2 */
+ 4441 "00000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 45 first
+ 4442 "10011000" // MUL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4443 "00001111" // /* MW 3 */
+ 4444 "11100001" // /* MW 2 */
+ 4445 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 498 10 first
+ 4446 "10011000" // LSHL r6, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4447 "01101101" // /* MW 3 */
+ 4448 "01001100" // /* MW 2 */
+ 4449 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 52 first
+ 4450 "10100100" // ASHL r6, r16, r2; ADD.NC p2, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4451 "00110010" // /* MW 5 */
+ 4452 "11000011" // /* MW 4 */
+ 4453 "11010100" // /* MW 3 */
+ 4454 "10000101" // /* MW 2 */
+ 4455 "10000001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 26
+ 4456 "10011000" // GE r7, r24, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4457 "01101001" // /* MW 3 */
+ 4458 "00001110" // /* MW 2 */
+ 4459 "00010110" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 4
+ 4460 "10000100" // JNZ r7, #4624 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4624 delay_slots=5 */
+ 4461 "00000001" // /* MW 5 */
+ 4462 "01000000" // /* MW 4 */
+ 4463 "00001000" // /* MW 3 */
+ 4464 "00001001" // /* MW 2 */
+ 4465 "00111000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4475 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 499 4
+ 4476 "10111010" // MOVA dc1, #0; MOVXM ls, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4477 "00010000" // /* MW 9 */
+ 4478 "00000000" // /* MW 8 */
+ 4479 "01111001" // /* MW 7 */
+ 4480 "00000100" // /* MW 6 */
+ 4481 "00000000" // /* MW 5 */
+ 4482 "00000000" // /* MW 4 */
+ 4483 "10000000" // /* MW 3 */
+ 4484 "00000111" // /* MW 2 */
+ 4485 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 499 4
+ 4486 "10111010" // MOVA dj1, #16; MOVXM le, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4487 "00010000" // /* MW 9 */
+ 4488 "00000000" // /* MW 8 */
+ 4489 "10111001" // /* MW 7 */
+ 4490 "00000101" // /* MW 6 */
+ 4491 "00000000" // /* MW 5 */
+ 4492 "00000000" // /* MW 4 */
+ 4493 "10000000" // /* MW 3 */
+ 4494 "00000110" // /* MW 2 */
+ 4495 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 4
+ 4496 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4497 "00000000" // /* MW 15 */
+ 4498 "00000000" // /* MW 14 */
+ 4499 "01111000" // /* MW 13 */
+ 4500 "10010000" // /* MW 12 */
+ 4501 "10111001" // /* MW 11 */
+ 4502 "00000010" // /* MW 10 */
+ 4503 "00000000" // /* MW 9 */
+ 4504 "00000000" // /* MW 8 */
+ 4505 "01011011" // /* MW 7 */
+ 4506 "00000001" // /* MW 6 */
+ 4507 "00100000" // /* MW 5 */
+ 4508 "00000000" // /* MW 4 */
+ 4509 "11110000" // /* MW 3 */
+ 4510 "00101100" // /* MW 2 */
+ 4511 "00000000" // /* MW 1 */
+ 4512 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4513 "00000000" // /* MW 15 */
+ 4514 "00000000" // /* MW 14 */
+ 4515 "01111000" // /* MW 13 */
+ 4516 "10100101" // /* MW 12 */
+ 4517 "00000001" // /* MW 11 */
+ 4518 "00000000" // /* MW 10 */
+ 4519 "00000000" // /* MW 9 */
+ 4520 "00000000" // /* MW 8 */
+ 4521 "01011011" // /* MW 7 */
+ 4522 "00000001" // /* MW 6 */
+ 4523 "00100000" // /* MW 5 */
+ 4524 "00000000" // /* MW 4 */
+ 4525 "11110000" // /* MW 3 */
+ 4526 "00101100" // /* MW 2 */
+ 4527 "00000000" // /* MW 1 */
+ 4528 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4529 "00000000" // /* MW 15 */
+ 4530 "00000000" // /* MW 14 */
+ 4531 "01111000" // /* MW 13 */
+ 4532 "10100101" // /* MW 12 */
+ 4533 "00000001" // /* MW 11 */
+ 4534 "00000000" // /* MW 10 */
+ 4535 "00000000" // /* MW 9 */
+ 4536 "00000000" // /* MW 8 */
+ 4537 "01011011" // /* MW 7 */
+ 4538 "00000001" // /* MW 6 */
+ 4539 "00100000" // /* MW 5 */
+ 4540 "00000000" // /* MW 4 */
+ 4541 "11110000" // /* MW 3 */
+ 4542 "00101100" // /* MW 2 */
+ 4543 "00000000" // /* MW 1 */
+ 4544 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4545 "00000000" // /* MW 15 */
+ 4546 "00000000" // /* MW 14 */
+ 4547 "01111000" // /* MW 13 */
+ 4548 "10100101" // /* MW 12 */
+ 4549 "00000001" // /* MW 11 */
+ 4550 "00000000" // /* MW 10 */
+ 4551 "00000000" // /* MW 9 */
+ 4552 "00000000" // /* MW 8 */
+ 4553 "01011011" // /* MW 7 */
+ 4554 "00000001" // /* MW 6 */
+ 4555 "00100000" // /* MW 5 */
+ 4556 "00000000" // /* MW 4 */
+ 4557 "11110000" // /* MW 3 */
+ 4558 "00101100" // /* MW 2 */
+ 4559 "00000000" // /* MW 1 */
+ 4560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4561 "00000000" // /* MW 15 */
+ 4562 "00000000" // /* MW 14 */
+ 4563 "01111000" // /* MW 13 */
+ 4564 "10100101" // /* MW 12 */
+ 4565 "00000001" // /* MW 11 */
+ 4566 "00000000" // /* MW 10 */
+ 4567 "00000000" // /* MW 9 */
+ 4568 "00000000" // /* MW 8 */
+ 4569 "01011011" // /* MW 7 */
+ 4570 "00000001" // /* MW 6 */
+ 4571 "00100000" // /* MW 5 */
+ 4572 "00000000" // /* MW 4 */
+ 4573 "11110000" // /* MW 3 */
+ 4574 "00101100" // /* MW 2 */
+ 4575 "00000000" // /* MW 1 */
+ 4576 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4577 "00000000" // /* MW 15 */
+ 4578 "00000000" // /* MW 14 */
+ 4579 "01111000" // /* MW 13 */
+ 4580 "10100101" // /* MW 12 */
+ 4581 "00000001" // /* MW 11 */
+ 4582 "00000000" // /* MW 10 */
+ 4583 "00000000" // /* MW 9 */
+ 4584 "00000000" // /* MW 8 */
+ 4585 "01011011" // /* MW 7 */
+ 4586 "00000001" // /* MW 6 */
+ 4587 "00100000" // /* MW 5 */
+ 4588 "00000000" // /* MW 4 */
+ 4589 "11110000" // /* MW 3 */
+ 4590 "00101100" // /* MW 2 */
+ 4591 "00000000" // /* MW 1 */
+ 4592 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4593 "00000000" // /* MW 15 */
+ 4594 "00000000" // /* MW 14 */
+ 4595 "01111000" // /* MW 13 */
+ 4596 "10100101" // /* MW 12 */
+ 4597 "00000001" // /* MW 11 */
+ 4598 "00000000" // /* MW 10 */
+ 4599 "00000000" // /* MW 9 */
+ 4600 "00000000" // /* MW 8 */
+ 4601 "01011011" // /* MW 7 */
+ 4602 "00000001" // /* MW 6 */
+ 4603 "00100000" // /* MW 5 */
+ 4604 "00000000" // /* MW 4 */
+ 4605 "11110000" // /* MW 3 */
+ 4606 "00101100" // /* MW 2 */
+ 4607 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4608 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4609 "00000000" // /* MW 15 */
+ 4610 "00000000" // /* MW 14 */
+ 4611 "01111000" // /* MW 13 */
+ 4612 "10100101" // /* MW 12 */
+ 4613 "00000001" // /* MW 11 */
+ 4614 "00000000" // /* MW 10 */
+ 4615 "00000000" // /* MW 9 */
+ 4616 "00000000" // /* MW 8 */
+ 4617 "00101110" // /* MW 7 */
+ 4618 "00110000" // /* MW 6 */
+ 4619 "00100010" // /* MW 5 */
+ 4620 "00000000" // /* MW 4 */
+ 4621 "11110000" // /* MW 3 */
+ 4622 "00101100" // /* MW 2 */
+ 4623 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448
+.src_ref 3 "pad_3d.h" 514 39
+.loop_nesting 0
+ 4624 "01000100" // MOVXM r7, #2147483640 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4625 "11110000" // /* MW 5 */
+ 4626 "10111111" // /* MW 4 */
+ 4627 "11110011" // /* MW 3 */
+ 4628 "11111111" // /* MW 2 */
+ 4629 "01111111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 39 first
+ 4630 "10011000" // AND r7, r7, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4631 "01000100" // /* MW 3 */
+ 4632 "11001110" // /* MW 2 */
+ 4633 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 35
+ 4634 "10011000" // SUB r7, r5, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4635 "01110001" // /* MW 3 */
+ 4636 "01001110" // /* MW 2 */
+ 4637 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55
+ 4638 "10011000" // MUL r7, r7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4639 "00001111" // /* MW 3 */
+ 4640 "11001110" // /* MW 2 */
+ 4641 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 511 25 first
+ 4642 "10011000" // ASHL r2, r4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4643 "00101110" // /* MW 3 */
+ 4644 "00000100" // /* MW 2 */
+ 4645 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 36 first
+ 4646 "10011000" // SUB r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4647 "01000001" // /* MW 3 */
+ 4648 "01001000" // /* MW 2 */
+ 4649 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 515 30 first
+ 4650 "10011000" // MUL r2, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4651 "00001111" // /* MW 3 */
+ 4652 "10000100" // /* MW 2 */
+ 4653 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 28 first
+ 4654 "10011000" // MUL r0, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4655 "00001111" // /* MW 3 */
+ 4656 "00000000" // /* MW 2 */
+ 4657 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 514 55
+.src_ref 3 "pad_3d.h" 517 39 first
+ 4658 "01100100" // MUL r1, r1, r2; MOV r6, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4659 "00000101" // /* MW 5 */
+ 4660 "00100000" // /* MW 4 */
+ 4661 "11110011" // /* MW 3 */
+ 4662 "01000101" // /* MW 2 */
+ 4663 "00001000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21 first
+ 4664 "10011000" // LSHL r0, r0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4665 "01101101" // /* MW 3 */
+ 4666 "00000000" // /* MW 2 */
+ 4667 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 517 22 first
+ 4668 "10100100" // GE r0, r24, r1; ADD.NC p2, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4669 "00000010" // /* MW 5 */
+ 4670 "11000011" // /* MW 4 */
+ 4671 "00110100" // /* MW 3 */
+ 4672 "00000011" // /* MW 2 */
+ 4673 "11000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 517 4
+ 4674 "10000100" // JNZ r0, #4832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */
+ 4675 "00000001" // /* MW 5 */
+ 4676 "01000000" // /* MW 4 */
+ 4677 "01110000" // /* MW 3 */
+ 4678 "00001001" // /* MW 2 */
+ 4679 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4681 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4683 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55 first
+.delay_slot
+ 4684 "10011000" // LSHL r4, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4685 "01101101" // /* MW 3 */
+ 4686 "11001000" // /* MW 2 */
+ 4687 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55
+.delay_slot
+ 4688 "00011000" // ADD.NC m0, r4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4689 "00001000" // /* MW 3 */
+ 4690 "00000010" // /* MW 2 */
+ 4691 "00011000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 515 37 first
+.delay_slot
+ 4692 "10011000" // ADD.NC dn0, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4693 "01111111" // /* MW 3 */
+ 4694 "01000001" // /* MW 2 */
+ 4695 "00011000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 517 4 first
+ 4696 "10111010" // MOVA dc0, #0; MOVXM ls, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4697 "00010000" // /* MW 9 */
+ 4698 "01101000" // /* MW 8 */
+ 4699 "01111001" // /* MW 7 */
+ 4700 "00000100" // /* MW 6 */
+ 4701 "00000000" // /* MW 5 */
+ 4702 "00000000" // /* MW 4 */
+ 4703 "10000000" // /* MW 3 */
+ 4704 "00000011" // /* MW 2 */
+ 4705 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 517 4
+ 4706 "10111010" // MOVA dj0, #16; MOVXM le, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4707 "00010000" // /* MW 9 */
+ 4708 "01101000" // /* MW 8 */
+ 4709 "10111001" // /* MW 7 */
+ 4710 "00000101" // /* MW 6 */
+ 4711 "00000000" // /* MW 5 */
+ 4712 "00000000" // /* MW 4 */
+ 4713 "10000000" // /* MW 3 */
+ 4714 "00000010" // /* MW 2 */
+ 4715 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 517 4
+ 4716 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4717 "10100000" // /* MW 3 */
+ 4718 "01110000" // /* MW 2 */
+ 4719 "00011101" // /* MW 1 */
+ 4720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4721 "00000000" // /* MW 15 */
+ 4722 "00000000" // /* MW 14 */
+ 4723 "01111000" // /* MW 13 */
+ 4724 "10100101" // /* MW 12 */
+ 4725 "00000001" // /* MW 11 */
+ 4726 "00000000" // /* MW 10 */
+ 4727 "00000000" // /* MW 9 */
+ 4728 "00000000" // /* MW 8 */
+ 4729 "01011011" // /* MW 7 */
+ 4730 "00000001" // /* MW 6 */
+ 4731 "00100000" // /* MW 5 */
+ 4732 "00000000" // /* MW 4 */
+ 4733 "11110000" // /* MW 3 */
+ 4734 "00101100" // /* MW 2 */
+ 4735 "00000000" // /* MW 1 */
+ 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4737 "00000000" // /* MW 15 */
+ 4738 "00000000" // /* MW 14 */
+ 4739 "01111000" // /* MW 13 */
+ 4740 "10100101" // /* MW 12 */
+ 4741 "00000001" // /* MW 11 */
+ 4742 "00000000" // /* MW 10 */
+ 4743 "00000000" // /* MW 9 */
+ 4744 "00000000" // /* MW 8 */
+ 4745 "01011011" // /* MW 7 */
+ 4746 "00000001" // /* MW 6 */
+ 4747 "00100000" // /* MW 5 */
+ 4748 "00000000" // /* MW 4 */
+ 4749 "11110000" // /* MW 3 */
+ 4750 "00101100" // /* MW 2 */
+ 4751 "00000000" // /* MW 1 */
+ 4752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4753 "00000000" // /* MW 15 */
+ 4754 "00000000" // /* MW 14 */
+ 4755 "01111000" // /* MW 13 */
+ 4756 "10100101" // /* MW 12 */
+ 4757 "00000001" // /* MW 11 */
+ 4758 "00000000" // /* MW 10 */
+ 4759 "00000000" // /* MW 9 */
+ 4760 "00000000" // /* MW 8 */
+ 4761 "01011011" // /* MW 7 */
+ 4762 "00000001" // /* MW 6 */
+ 4763 "00100000" // /* MW 5 */
+ 4764 "00000000" // /* MW 4 */
+ 4765 "11110000" // /* MW 3 */
+ 4766 "00101100" // /* MW 2 */
+ 4767 "00000000" // /* MW 1 */
+ 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4769 "00000000" // /* MW 15 */
+ 4770 "00000000" // /* MW 14 */
+ 4771 "01111000" // /* MW 13 */
+ 4772 "10100101" // /* MW 12 */
+ 4773 "00000001" // /* MW 11 */
+ 4774 "00000000" // /* MW 10 */
+ 4775 "00000000" // /* MW 9 */
+ 4776 "00000000" // /* MW 8 */
+ 4777 "01011011" // /* MW 7 */
+ 4778 "00000001" // /* MW 6 */
+ 4779 "00100000" // /* MW 5 */
+ 4780 "00000000" // /* MW 4 */
+ 4781 "11110000" // /* MW 3 */
+ 4782 "00101100" // /* MW 2 */
+ 4783 "00000000" // /* MW 1 */
+ 4784 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4785 "00000000" // /* MW 15 */
+ 4786 "00000000" // /* MW 14 */
+ 4787 "01111000" // /* MW 13 */
+ 4788 "10100101" // /* MW 12 */
+ 4789 "00000001" // /* MW 11 */
+ 4790 "00000000" // /* MW 10 */
+ 4791 "00000000" // /* MW 9 */
+ 4792 "00000000" // /* MW 8 */
+ 4793 "01011011" // /* MW 7 */
+ 4794 "00000001" // /* MW 6 */
+ 4795 "00100000" // /* MW 5 */
+ 4796 "00000000" // /* MW 4 */
+ 4797 "11110000" // /* MW 3 */
+ 4798 "00101100" // /* MW 2 */
+ 4799 "00000000" // /* MW 1 */
+ 4800 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4801 "00000000" // /* MW 15 */
+ 4802 "00000000" // /* MW 14 */
+ 4803 "01111000" // /* MW 13 */
+ 4804 "10100101" // /* MW 12 */
+ 4805 "00000001" // /* MW 11 */
+ 4806 "00000000" // /* MW 10 */
+ 4807 "00000000" // /* MW 9 */
+ 4808 "00000000" // /* MW 8 */
+ 4809 "01011011" // /* MW 7 */
+ 4810 "00000001" // /* MW 6 */
+ 4811 "00100000" // /* MW 5 */
+ 4812 "00000000" // /* MW 4 */
+ 4813 "11110000" // /* MW 3 */
+ 4814 "00101100" // /* MW 2 */
+ 4815 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4816 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4817 "00000000" // /* MW 15 */
+ 4818 "00000000" // /* MW 14 */
+ 4819 "01111000" // /* MW 13 */
+ 4820 "10100101" // /* MW 12 */
+ 4821 "00000001" // /* MW 11 */
+ 4822 "00000000" // /* MW 10 */
+ 4823 "00000000" // /* MW 9 */
+ 4824 "00000000" // /* MW 8 */
+ 4825 "00101110" // /* MW 7 */
+ 4826 "00010000" // /* MW 6 */
+ 4827 "00100010" // /* MW 5 */
+ 4828 "00000000" // /* MW 4 */
+ 4829 "11110000" // /* MW 3 */
+ 4830 "00101100" // /* MW 2 */
+ 4831 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656
+.src_ref 3 "pad_3d.h" 282 first
+.loop_nesting 0
+ 4832 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4833 "00000000" // /* MW 3 */
+ 4834 "00101000" // /* MW 2 */
+ 4835 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4837 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4839 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4841 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4843 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0
+ 4845 "00000000" // /* MW 1 */
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.function run _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 2 "reduce_base_c8.h" 352 30
+.src_ref 2 "reduce_base_c8.h" 362 first
+.src_ref 2 "reduce_base_c8.h" 365 18
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+.function_start
+ 4848 "11111000" // MOV r3, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4849 "11000000" // /* MW 3 */
+ 4850 "11010100" // /* MW 2 */
+ 4851 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 365 18 first
+ 4852 "00000010" // MOVS dn3, p7; ADD.NC p7, r3, #44 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4853 "00000000" // /* MW 7 */
+ 4854 "11001011" // /* MW 6 */
+ 4855 "10110000" // /* MW 5 */
+ 4856 "00000011" // /* MW 4 */
+ 4857 "01100000" // /* MW 3 */
+ 4858 "10010001" // /* MW 2 */
+ 4859 "01101011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 367 19 first
+ 4860 "10011000" // LDA.u16 r0, [p7], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4861 "00011010" // /* MW 3 */
+ 4862 "10001100" // /* MW 2 */
+ 4863 "00000111" // /* MW 1 */
+ 4864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4865 "00000000" // /* MW 1 */
+ 4866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4867 "00000000" // /* MW 1 */
+ 4868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4869 "00000000" // /* MW 1 */
+ 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4871 "00000000" // /* MW 1 */
+ 4872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4873 "00000000" // /* MW 1 */
+ 4874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4875 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 367 12
+.src_ref 2 "reduce_base_c8.h" 367 19
+ 4876 "10000100" // JNZ r0, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */
+ 4877 "00000001" // /* MW 5 */
+ 4878 "01000000" // /* MW 4 */
+ 4879 "11110000" // /* MW 3 */
+ 4880 "00001001" // /* MW 2 */
+ 4881 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 101 18
+.src_ref 5 "broadcast.hpp" 80 25
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 2 "reduce_base_c8.h" 372 34
+.delay_slot
+ 4882 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4883 "00000001" // /* MW 3 */
+ 4884 "00100000" // /* MW 2 */
+ 4885 "00010000" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+.delay_slot
+ 4886 "11111000" // VBCST.32 x1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4887 "01110010" // /* MW 3 */
+ 4888 "11000010" // /* MW 2 */
+ 4889 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4891 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4893 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 362
+.delay_slot
+ 4894 "11000100" // PADDXM [sp], #256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4895 "00000001" // /* MW 5 */
+ 4896 "00000000" // /* MW 4 */
+ 4897 "00000000" // /* MW 3 */
+ 4898 "00100000" // /* MW 2 */
+ 4899 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 43
+ 4900 "10111000" // MOV dj2, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4901 "01001000" // /* MW 3 */
+ 4902 "10000000" // /* MW 2 */
+ 4903 "00011010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 43 first
+ 4904 "10011000" // LDA r1, [p2, dj2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4905 "00110110" // /* MW 3 */
+ 4906 "01000000" // /* MW 2 */
+ 4907 "00000010" // /* MW 1 */
+ 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4909 "00000000" // /* MW 1 */
+ 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4911 "00000000" // /* MW 1 */
+ 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4913 "00000000" // /* MW 1 */
+ 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4915 "00000000" // /* MW 1 */
+ 4916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4917 "00000000" // /* MW 1 */
+ 4918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4919 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 34
+ 4920 "10011000" // GE r2, r16, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4921 "00011001" // /* MW 3 */
+ 4922 "00000100" // /* MW 2 */
+ 4923 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4924 "10000100" // JNZ r2, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */
+ 4925 "00000001" // /* MW 5 */
+ 4926 "01000000" // /* MW 4 */
+ 4927 "11110000" // /* MW 3 */
+ 4928 "00001001" // /* MW 2 */
+ 4929 "00010000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 2 "reduce_base_c8.h" 374 29
+.delay_slot
+ 4930 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4931 "10010010" // /* MW 3 */
+ 4932 "00000010" // /* MW 2 */
+ 4933 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4935 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4941 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 2 "reduce_base_c8.h" 372 12
+.src_ref 2 "reduce_base_c8.h" 374 29
+ 4942 "01110110" // NOPA; MOVS p3, p1; MOVXM ls, #5072 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4943 "00010000" // /* MW 11 */
+ 4944 "11101000" // /* MW 10 */
+ 4945 "01111001" // /* MW 9 */
+ 4946 "00000100" // /* MW 8 */
+ 4947 "00000000" // /* MW 7 */
+ 4948 "00000000" // /* MW 6 */
+ 4949 "10001011" // /* MW 5 */
+ 4950 "10000100" // /* MW 4 */
+ 4951 "11110011" // /* MW 3 */
+ 4952 "00101100" // /* MW 2 */
+ 4953 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4954 "01000100" // MOVXM le, #5072 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4955 "10100000" // /* MW 5 */
+ 4956 "11100111" // /* MW 4 */
+ 4957 "00010110" // /* MW 3 */
+ 4958 "00000000" // /* MW 2 */
+ 4959 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4960 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4961 "00000000" // /* MW 15 */
+ 4962 "00000000" // /* MW 14 */
+ 4963 "01111000" // /* MW 13 */
+ 4964 "01010000" // /* MW 12 */
+ 4965 "10111000" // /* MW 11 */
+ 4966 "00000010" // /* MW 10 */
+ 4967 "00000000" // /* MW 9 */
+ 4968 "00000000" // /* MW 8 */
+ 4969 "01011011" // /* MW 7 */
+ 4970 "00000001" // /* MW 6 */
+ 4971 "00100000" // /* MW 5 */
+ 4972 "00000000" // /* MW 4 */
+ 4973 "11110000" // /* MW 3 */
+ 4974 "00101100" // /* MW 2 */
+ 4975 "00000000" // /* MW 1 */
+ 4976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4977 "00000000" // /* MW 15 */
+ 4978 "00000000" // /* MW 14 */
+ 4979 "01111000" // /* MW 13 */
+ 4980 "10100101" // /* MW 12 */
+ 4981 "00000001" // /* MW 11 */
+ 4982 "00000000" // /* MW 10 */
+ 4983 "00000000" // /* MW 9 */
+ 4984 "00000000" // /* MW 8 */
+ 4985 "01011011" // /* MW 7 */
+ 4986 "00000001" // /* MW 6 */
+ 4987 "00100000" // /* MW 5 */
+ 4988 "00000000" // /* MW 4 */
+ 4989 "11110000" // /* MW 3 */
+ 4990 "00101100" // /* MW 2 */
+ 4991 "00000000" // /* MW 1 */
+ 4992 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4993 "00000000" // /* MW 15 */
+ 4994 "00000000" // /* MW 14 */
+ 4995 "01111000" // /* MW 13 */
+ 4996 "10100101" // /* MW 12 */
+ 4997 "00000001" // /* MW 11 */
+ 4998 "00000000" // /* MW 10 */
+ 4999 "00000000" // /* MW 9 */
+ 5000 "00000000" // /* MW 8 */
+ 5001 "01011011" // /* MW 7 */
+ 5002 "00000001" // /* MW 6 */
+ 5003 "00100000" // /* MW 5 */
+ 5004 "00000000" // /* MW 4 */
+ 5005 "11110000" // /* MW 3 */
+ 5006 "00101100" // /* MW 2 */
+ 5007 "00000000" // /* MW 1 */
+ 5008 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5009 "00000000" // /* MW 15 */
+ 5010 "00000000" // /* MW 14 */
+ 5011 "01111000" // /* MW 13 */
+ 5012 "10100101" // /* MW 12 */
+ 5013 "00000001" // /* MW 11 */
+ 5014 "00000000" // /* MW 10 */
+ 5015 "00000000" // /* MW 9 */
+ 5016 "00000000" // /* MW 8 */
+ 5017 "01011011" // /* MW 7 */
+ 5018 "00000001" // /* MW 6 */
+ 5019 "00100000" // /* MW 5 */
+ 5020 "00000000" // /* MW 4 */
+ 5021 "11110000" // /* MW 3 */
+ 5022 "00101100" // /* MW 2 */
+ 5023 "00000000" // /* MW 1 */
+ 5024 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5025 "00000000" // /* MW 15 */
+ 5026 "00000000" // /* MW 14 */
+ 5027 "01111000" // /* MW 13 */
+ 5028 "10100101" // /* MW 12 */
+ 5029 "00000001" // /* MW 11 */
+ 5030 "00000000" // /* MW 10 */
+ 5031 "00000000" // /* MW 9 */
+ 5032 "00000000" // /* MW 8 */
+ 5033 "01011011" // /* MW 7 */
+ 5034 "00000001" // /* MW 6 */
+ 5035 "00100000" // /* MW 5 */
+ 5036 "00000000" // /* MW 4 */
+ 5037 "11110000" // /* MW 3 */
+ 5038 "00101100" // /* MW 2 */
+ 5039 "00000000" // /* MW 1 */
+ 5040 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5041 "00000000" // /* MW 15 */
+ 5042 "00000000" // /* MW 14 */
+ 5043 "01111000" // /* MW 13 */
+ 5044 "10100101" // /* MW 12 */
+ 5045 "00000001" // /* MW 11 */
+ 5046 "00000000" // /* MW 10 */
+ 5047 "00000000" // /* MW 9 */
+ 5048 "00000000" // /* MW 8 */
+ 5049 "01011011" // /* MW 7 */
+ 5050 "00000001" // /* MW 6 */
+ 5051 "00100000" // /* MW 5 */
+ 5052 "00000000" // /* MW 4 */
+ 5053 "11110000" // /* MW 3 */
+ 5054 "00101100" // /* MW 2 */
+ 5055 "00000000" // /* MW 1 */
+ 5056 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5057 "00000000" // /* MW 15 */
+ 5058 "00000000" // /* MW 14 */
+ 5059 "01111000" // /* MW 13 */
+ 5060 "10100101" // /* MW 12 */
+ 5061 "00000001" // /* MW 11 */
+ 5062 "00000000" // /* MW 10 */
+ 5063 "00000000" // /* MW 9 */
+ 5064 "00000000" // /* MW 8 */
+ 5065 "01011011" // /* MW 7 */
+ 5066 "00000001" // /* MW 6 */
+ 5067 "00100000" // /* MW 5 */
+ 5068 "00000000" // /* MW 4 */
+ 5069 "11110000" // /* MW 3 */
+ 5070 "00101100" // /* MW 2 */
+ 5071 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 2 "reduce_base_c8.h" 374 29 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 5072 "11100001" // NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5073 "00000000" // /* MW 15 */
+ 5074 "00000000" // /* MW 14 */
+ 5075 "01111000" // /* MW 13 */
+ 5076 "10100101" // /* MW 12 */
+ 5077 "00000001" // /* MW 11 */
+ 5078 "00000000" // /* MW 10 */
+ 5079 "00000000" // /* MW 9 */
+ 5080 "10000000" // /* MW 8 */
+ 5081 "00000110" // /* MW 7 */
+ 5082 "00011101" // /* MW 6 */
+ 5083 "00100011" // /* MW 5 */
+ 5084 "00000000" // /* MW 4 */
+ 5085 "11110000" // /* MW 3 */
+ 5086 "00101100" // /* MW 2 */
+ 5087 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 412 41
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.loop_nesting 0
+ 5088 "10111000" // MOV m4, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5089 "01000000" // /* MW 3 */
+ 5090 "00000000" // /* MW 2 */
+ 5091 "00011100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28 first
+ 5092 "10011000" // LDA.u16 r17, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5093 "00111010" // /* MW 3 */
+ 5094 "10001010" // /* MW 2 */
+ 5095 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 388 28
+ 5096 "01010100" // LDA.s16 r22, [p7], #-2; MOV m5, #-58 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5097 "00011001" // /* MW 5 */
+ 5098 "00011111" // /* MW 4 */
+ 5099 "01011010" // /* MW 3 */
+ 5100 "11011010" // /* MW 2 */
+ 5101 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 570 33
+ 5102 "01010100" // LDA.u16 r26, [p7], m5; MOV dj0, #46 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5103 "10111001" // /* MW 5 */
+ 5104 "00000000" // /* MW 4 */
+ 5105 "01010001" // /* MW 3 */
+ 5106 "01101011" // /* MW 2 */
+ 5107 "11110101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 570 33 first
+.src_ref 2 "reduce_base_c8.h" 594 43
+ 5108 "11010100" // LDA.s16 r20, [p7, dj0]; MOV r19, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5109 "10000001" // /* MW 5 */
+ 5110 "10111101" // /* MW 4 */
+ 5111 "01011001" // /* MW 3 */
+ 5112 "01010010" // /* MW 2 */
+ 5113 "11100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 594 43 first
+ 5114 "00010100" // LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5115 "00111000" // /* MW 5 */
+ 5116 "11010011" // /* MW 4 */
+ 5117 "01010110" // /* MW 3 */
+ 5118 "01001110" // /* MW 2 */
+ 5119 "11100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 594 43
+ 5120 "10011000" // LDA.s16 r21, [p3], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5121 "10110010" // /* MW 3 */
+ 5122 "11011110" // /* MW 2 */
+ 5123 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 594 64
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 5124 "10011000" // LDA.u16 r28, [p3], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5125 "10011010" // /* MW 3 */
+ 5126 "11111111" // /* MW 2 */
+ 5127 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 595 56 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 5128 "00101100" // LDA.s16 r17, [p3], #6; MOVX r7, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5129 "00010010" // /* MW 5 */
+ 5130 "00011100" // /* MW 4 */
+ 5131 "01010000" // /* MW 3 */
+ 5132 "11000110" // /* MW 2 */
+ 5133 "01100111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 596 56 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5134 "10111010" // LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5135 "01011000" // /* MW 9 */
+ 5136 "00000000" // /* MW 8 */
+ 5137 "01100000" // /* MW 7 */
+ 5138 "11001010" // /* MW 6 */
+ 5139 "00100111" // /* MW 5 */
+ 5140 "00111111" // /* MW 4 */
+ 5141 "01010000" // /* MW 3 */
+ 5142 "11001010" // /* MW 2 */
+ 5143 "01111110" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 2 "reduce_base_c8.h" 388 28 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 33 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5144 "01110110" // LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5145 "01011000" // /* MW 11 */
+ 5146 "00000001" // /* MW 10 */
+ 5147 "11001000" // /* MW 9 */
+ 5148 "01101100" // /* MW 8 */
+ 5149 "00101001" // /* MW 7 */
+ 5150 "00100011" // /* MW 6 */
+ 5151 "01001011" // /* MW 5 */
+ 5152 "00010000" // /* MW 4 */
+ 5153 "01010010" // /* MW 3 */
+ 5154 "00011110" // /* MW 2 */
+ 5155 "11100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 595 75 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5156 "01110110" // LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5157 "01011000" // /* MW 11 */
+ 5158 "00111100" // /* MW 10 */
+ 5159 "01001000" // /* MW 9 */
+ 5160 "11101100" // /* MW 8 */
+ 5161 "01110011" // /* MW 7 */
+ 5162 "00101100" // /* MW 6 */
+ 5163 "00001011" // /* MW 5 */
+ 5164 "01011010" // /* MW 4 */
+ 5165 "01010010" // /* MW 3 */
+ 5166 "11101111" // /* MW 2 */
+ 5167 "01100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5168 "01110110" // MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5169 "01111000" // /* MW 11 */
+ 5170 "11010000" // /* MW 10 */
+ 5171 "00000001" // /* MW 9 */
+ 5172 "01101101" // /* MW 8 */
+ 5173 "01000011" // /* MW 7 */
+ 5174 "00101001" // /* MW 6 */
+ 5175 "10001011" // /* MW 5 */
+ 5176 "10000100" // /* MW 4 */
+ 5177 "10000011" // /* MW 3 */
+ 5178 "00001010" // /* MW 2 */
+ 5179 "00001000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5180 "10111010" // VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5181 "01111000" // /* MW 9 */
+ 5182 "00010000" // /* MW 8 */
+ 5183 "10000101" // /* MW 7 */
+ 5184 "01101110" // /* MW 6 */
+ 5185 "00110011" // /* MW 5 */
+ 5186 "00100111" // /* MW 4 */
+ 5187 "10110000" // /* MW 3 */
+ 5188 "00010010" // /* MW 2 */
+ 5189 "01101010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5190 "10111010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5191 "01111000" // /* MW 9 */
+ 5192 "11010000" // /* MW 8 */
+ 5193 "00000100" // /* MW 7 */
+ 5194 "01101111" // /* MW 6 */
+ 5195 "00110011" // /* MW 5 */
+ 5196 "00101011" // /* MW 4 */
+ 5197 "00110000" // /* MW 3 */
+ 5198 "01000001" // /* MW 2 */
+ 5199 "00010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5200 "00100100" // LSHL r17, r17, r6; ADD.NC lc, r18, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5201 "11111110" // /* MW 5 */
+ 5202 "11110010" // /* MW 4 */
+ 5203 "10111010" // /* MW 3 */
+ 5204 "01001101" // /* MW 2 */
+ 5205 "10001100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+ 5206 "11100100" // LSHL r17, r18, r6; MOV dj0, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5207 "01000001" // /* MW 5 */
+ 5208 "00010001" // /* MW 4 */
+ 5209 "10110001" // /* MW 3 */
+ 5210 "01001101" // /* MW 2 */
+ 5211 "10010100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+ 5212 "01110110" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5213 "01111000" // /* MW 11 */
+ 5214 "11010000" // /* MW 10 */
+ 5215 "00000100" // /* MW 9 */
+ 5216 "01101100" // /* MW 8 */
+ 5217 "01100011" // /* MW 7 */
+ 5218 "00001110" // /* MW 6 */
+ 5219 "01001011" // /* MW 5 */
+ 5220 "00010000" // /* MW 4 */
+ 5221 "00110000" // /* MW 3 */
+ 5222 "00000001" // /* MW 2 */
+ 5223 "00011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+ 5224 "01001010" // MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5225 "00111101" // /* MW 9 */
+ 5226 "00110000" // /* MW 8 */
+ 5227 "00010100" // /* MW 7 */
+ 5228 "11100100" // /* MW 6 */
+ 5229 "00100000" // /* MW 5 */
+ 5230 "00000011" // /* MW 4 */
+ 5231 "01100111" // /* MW 3 */
+ 5232 "10000001" // /* MW 2 */
+ 5233 "00001011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+ 5234 "10111010" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5235 "01110010" // /* MW 9 */
+ 5236 "01010000" // /* MW 8 */
+ 5237 "01000100" // /* MW 7 */
+ 5238 "00000010" // /* MW 6 */
+ 5239 "00001011" // /* MW 5 */
+ 5240 "01011011" // /* MW 4 */
+ 5241 "00110100" // /* MW 3 */
+ 5242 "00100001" // /* MW 2 */
+ 5243 "00011101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+ 5244 "11010100" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5245 "00000001" // /* MW 5 */
+ 5246 "10010011" // /* MW 4 */
+ 5247 "00110011" // /* MW 3 */
+ 5248 "00110001" // /* MW 2 */
+ 5249 "00000011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 5250 "01100010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5251 "00111101" // /* MW 7 */
+ 5252 "10000000" // /* MW 6 */
+ 5253 "00010001" // /* MW 5 */
+ 5254 "00000100" // /* MW 4 */
+ 5255 "00110000" // /* MW 3 */
+ 5256 "01000001" // /* MW 2 */
+ 5257 "00010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5258 "10011000" // VLDA.2D bmll1, [p3], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5259 "10010101" // /* MW 3 */
+ 5260 "01010000" // /* MW 2 */
+ 5261 "00000011" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5263 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5264 "01011010" // MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5265 "00111101" // /* MW 9 */
+ 5266 "00101000" // /* MW 8 */
+ 5267 "00010000" // /* MW 7 */
+ 5268 "00000010" // /* MW 6 */
+ 5269 "01001100" // /* MW 5 */
+ 5270 "10001111" // /* MW 4 */
+ 5271 "00000000" // /* MW 3 */
+ 5272 "00000000" // /* MW 2 */
+ 5273 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 412 41
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5274 "11010100" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5275 "00000001" // /* MW 5 */
+ 5276 "00010000" // /* MW 4 */
+ 5277 "00110111" // /* MW 3 */
+ 5278 "00000001" // /* MW 2 */
+ 5279 "00011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5280 "11101011" // MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5281 "10000001" // /* MW 15 */
+ 5282 "10100001" // /* MW 14 */
+ 5283 "01111000" // /* MW 13 */
+ 5284 "00000000" // /* MW 12 */
+ 5285 "10000010" // /* MW 11 */
+ 5286 "00001000" // /* MW 10 */
+ 5287 "01000100" // /* MW 9 */
+ 5288 "00000000" // /* MW 8 */
+ 5289 "10001011" // /* MW 7 */
+ 5290 "10000100" // /* MW 6 */
+ 5291 "00100100" // /* MW 5 */
+ 5292 "00000000" // /* MW 4 */
+ 5293 "10000000" // /* MW 3 */
+ 5294 "00000110" // /* MW 2 */
+ 5295 "00001000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5296 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5297 "01100001" // /* MW 15 */
+ 5298 "10010000" // /* MW 14 */
+ 5299 "00010000" // /* MW 13 */
+ 5300 "10010000" // /* MW 12 */
+ 5301 "10111010" // /* MW 11 */
+ 5302 "00000101" // /* MW 10 */
+ 5303 "00000000" // /* MW 9 */
+ 5304 "00000000" // /* MW 8 */
+ 5305 "00001011" // /* MW 7 */
+ 5306 "01011010" // /* MW 6 */
+ 5307 "00100001" // /* MW 5 */
+ 5308 "00000000" // /* MW 4 */
+ 5309 "00110000" // /* MW 3 */
+ 5310 "00100001" // /* MW 2 */
+ 5311 "00011101" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 5312 "10011000" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5313 "10001001" // /* MW 3 */
+ 5314 "00011001" // /* MW 2 */
+ 5315 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5316 "01100110" // VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5317 "00111101" // /* MW 11 */
+ 5318 "10000000" // /* MW 10 */
+ 5319 "00010001" // /* MW 9 */
+ 5320 "10001110" // /* MW 8 */
+ 5321 "10101101" // /* MW 7 */
+ 5322 "00000000" // /* MW 6 */
+ 5323 "00100000" // /* MW 5 */
+ 5324 "00000000" // /* MW 4 */
+ 5325 "10110000" // /* MW 3 */
+ 5326 "00010010" // /* MW 2 */
+ 5327 "01101010" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5328 "11100001" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5329 "00000000" // /* MW 15 */
+ 5330 "00000000" // /* MW 14 */
+ 5331 "01111000" // /* MW 13 */
+ 5332 "10100101" // /* MW 12 */
+ 5333 "00000001" // /* MW 11 */
+ 5334 "00000000" // /* MW 10 */
+ 5335 "00000000" // /* MW 9 */
+ 5336 "00000000" // /* MW 8 */
+ 5337 "01011011" // /* MW 7 */
+ 5338 "00000001" // /* MW 6 */
+ 5339 "00100000" // /* MW 5 */
+ 5340 "00000000" // /* MW 4 */
+ 5341 "00110000" // /* MW 3 */
+ 5342 "01000001" // /* MW 2 */
+ 5343 "00010101" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5345 "00000000" // /* MW 15 */
+ 5346 "00000000" // /* MW 14 */
+ 5347 "01111000" // /* MW 13 */
+ 5348 "10100101" // /* MW 12 */
+ 5349 "00000001" // /* MW 11 */
+ 5350 "00000000" // /* MW 10 */
+ 5351 "00000000" // /* MW 9 */
+ 5352 "00000000" // /* MW 8 */
+ 5353 "01011011" // /* MW 7 */
+ 5354 "00000001" // /* MW 6 */
+ 5355 "00100000" // /* MW 5 */
+ 5356 "00000000" // /* MW 4 */
+ 5357 "11110000" // /* MW 3 */
+ 5358 "00101100" // /* MW 2 */
+ 5359 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5360 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5361 "01000001" // /* MW 15 */
+ 5362 "10000001" // /* MW 14 */
+ 5363 "01111000" // /* MW 13 */
+ 5364 "10100101" // /* MW 12 */
+ 5365 "00000001" // /* MW 11 */
+ 5366 "00000000" // /* MW 10 */
+ 5367 "00000000" // /* MW 9 */
+ 5368 "00000000" // /* MW 8 */
+ 5369 "01011011" // /* MW 7 */
+ 5370 "00000001" // /* MW 6 */
+ 5371 "00100000" // /* MW 5 */
+ 5372 "00000000" // /* MW 4 */
+ 5373 "11110000" // /* MW 3 */
+ 5374 "00101100" // /* MW 2 */
+ 5375 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5376 "11100001" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5377 "00000000" // /* MW 15 */
+ 5378 "00000000" // /* MW 14 */
+ 5379 "01111000" // /* MW 13 */
+ 5380 "10100101" // /* MW 12 */
+ 5381 "00000001" // /* MW 11 */
+ 5382 "00000000" // /* MW 10 */
+ 5383 "00000000" // /* MW 9 */
+ 5384 "10000000" // /* MW 8 */
+ 5385 "00000110" // /* MW 7 */
+ 5386 "00110001" // /* MW 6 */
+ 5387 "00100100" // /* MW 5 */
+ 5388 "00000000" // /* MW 4 */
+ 5389 "00110000" // /* MW 3 */
+ 5390 "00000001" // /* MW 2 */
+ 5391 "00011001" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5392 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5393 "10000001" // /* MW 15 */
+ 5394 "10100001" // /* MW 14 */
+ 5395 "01111000" // /* MW 13 */
+ 5396 "10100101" // /* MW 12 */
+ 5397 "00000001" // /* MW 11 */
+ 5398 "00000000" // /* MW 10 */
+ 5399 "00000000" // /* MW 9 */
+ 5400 "00000000" // /* MW 8 */
+ 5401 "01011011" // /* MW 7 */
+ 5402 "00000001" // /* MW 6 */
+ 5403 "00100000" // /* MW 5 */
+ 5404 "00000000" // /* MW 4 */
+ 5405 "11110000" // /* MW 3 */
+ 5406 "00101100" // /* MW 2 */
+ 5407 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5408 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5409 "01100001" // /* MW 15 */
+ 5410 "10010000" // /* MW 14 */
+ 5411 "01111000" // /* MW 13 */
+ 5412 "10100101" // /* MW 12 */
+ 5413 "00000001" // /* MW 11 */
+ 5414 "00000000" // /* MW 10 */
+ 5415 "00000000" // /* MW 9 */
+ 5416 "00000000" // /* MW 8 */
+ 5417 "01011011" // /* MW 7 */
+ 5418 "00000001" // /* MW 6 */
+ 5419 "00100000" // /* MW 5 */
+ 5420 "00000000" // /* MW 4 */
+ 5421 "00110000" // /* MW 3 */
+ 5422 "00100001" // /* MW 2 */
+ 5423 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 107 23
+.src_ref 2 "reduce_base_c8.h" 412 41 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 5424 "10111010" // LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5425 "00010000" // /* MW 9 */
+ 5426 "11000000" // /* MW 8 */
+ 5427 "10101111" // /* MW 7 */
+ 5428 "00001100" // /* MW 6 */
+ 5429 "00000000" // /* MW 5 */
+ 5430 "00000000" // /* MW 4 */
+ 5431 "01010000" // /* MW 3 */
+ 5432 "00000111" // /* MW 2 */
+ 5433 "11101100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 6 "me_vmult_float_emulated.h" 107 23 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5434 "01001010" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5435 "00111101" // /* MW 9 */
+ 5436 "10000000" // /* MW 8 */
+ 5437 "00010001" // /* MW 7 */
+ 5438 "11100010" // /* MW 6 */
+ 5439 "01110010" // /* MW 5 */
+ 5440 "00010101" // /* MW 4 */
+ 5441 "00110010" // /* MW 3 */
+ 5442 "00110001" // /* MW 2 */
+ 5443 "00000011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 101 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5444 "11111000" // VBCST.16 x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5445 "01110010" // /* MW 3 */
+ 5446 "01000001" // /* MW 2 */
+ 5447 "00011000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5449 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5450 "01001000" // VADD.f dm0, dm1, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5451 "00111101" // /* MW 3 */
+ 5452 "00101000" // /* MW 2 */
+ 5453 "00010000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5454 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5455 "00000110" // /* MW 3 */
+ 5456 "00110001" // /* MW 2 */
+ 5457 "00001100" // /* MW 1 */
+ 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5459 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 412 52 first
+ 5460 "01100010" // ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5461 "00111101" // /* MW 7 */
+ 5462 "00001100" // /* MW 6 */
+ 5463 "00010010" // /* MW 5 */
+ 5464 "11111001" // /* MW 4 */
+ 5465 "01011111" // /* MW 3 */
+ 5466 "00000010" // /* MW 2 */
+ 5467 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 412 31
+ 5468 "10011000" // NE r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5469 "00001000" // /* MW 3 */
+ 5470 "01000000" // /* MW 2 */
+ 5471 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 412 16
+ 5472 "10000100" // JNZ r0, #6368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6368 delay_slots=5 */
+ 5473 "00000001" // /* MW 5 */
+ 5474 "01000000" // /* MW 4 */
+ 5475 "01110000" // /* MW 3 */
+ 5476 "00001100" // /* MW 2 */
+ 5477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5483 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.delay_slot
+ 5484 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5485 "00000110" // /* MW 3 */
+ 5486 "00110001" // /* MW 2 */
+ 5487 "00001100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5489 "00000000" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+.src_ref 3 "reduce_mean_c8_impl.h" 184 15 first
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5490 "00101100" // LDA r6, [p2, #12]; MOVX r5, #3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5491 "00011010" // /* MW 5 */
+ 5492 "00010100" // /* MW 4 */
+ 5493 "11010000" // /* MW 3 */
+ 5494 "10011010" // /* MW 2 */
+ 5495 "01000110" // /* MW 1 */
+ 5496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5497 "00000000" // /* MW 1 */
+ 5498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5499 "00000000" // /* MW 1 */
+ 5500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5501 "00000000" // /* MW 1 */
+ 5502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5503 "00000000" // /* MW 1 */
+ 5504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5505 "00000000" // /* MW 1 */
+ 5506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5507 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5508 "10011000" // GE r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5509 "01101001" // /* MW 3 */
+ 5510 "01001110" // /* MW 2 */
+ 5511 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5512 "10000100" // JNZ r7, #7296 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7296 delay_slots=5 */
+ 5513 "00000001" // /* MW 5 */
+ 5514 "01000000" // /* MW 4 */
+ 5515 "01000000" // /* MW 3 */
+ 5516 "00001110" // /* MW 2 */
+ 5517 "00111000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+.delay_slot
+ 5518 "00011000" // MOVX r0, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5519 "00010001" // /* MW 3 */
+ 5520 "00000000" // /* MW 2 */
+ 5521 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5527 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5529 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5530 "10011000" // NE r5, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5531 "00001000" // /* MW 3 */
+ 5532 "10001010" // /* MW 2 */
+ 5533 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5534 "10000100" // JNZ r5, #6512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6512 delay_slots=5 */
+ 5535 "00000001" // /* MW 5 */
+ 5536 "01000000" // /* MW 4 */
+ 5537 "10111000" // /* MW 3 */
+ 5538 "00001100" // /* MW 2 */
+ 5539 "00101000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5541 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5543 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5549 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 5550 "11100100" // MOVX r17, #257; MOV dc4, lr /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5551 "11000001" // /* MW 5 */
+ 5552 "10000011" // /* MW 4 */
+ 5553 "10101001" // /* MW 3 */
+ 5554 "01000000" // /* MW 2 */
+ 5555 "00100100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 5556 "01000100" // MOVXM r21, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5557 "11111110" // /* MW 5 */
+ 5558 "10111111" // /* MW 4 */
+ 5559 "11111010" // /* MW 3 */
+ 5560 "00000000" // /* MW 2 */
+ 5561 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48
+ 5562 "00101100" // NOPA; MOVX r20, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5563 "00000010" // /* MW 5 */
+ 5564 "01010000" // /* MW 4 */
+ 5565 "11110000" // /* MW 3 */
+ 5566 "00101100" // /* MW 2 */
+ 5567 "00000000" // /* MW 1 */
+.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+ 5568 "01110110" // MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5569 "01011000" // /* MW 11 */
+ 5570 "00111100" // /* MW 10 */
+ 5571 "01001000" // /* MW 9 */
+ 5572 "00001000" // /* MW 8 */
+ 5573 "01010010" // /* MW 7 */
+ 5574 "00000000" // /* MW 6 */
+ 5575 "00001011" // /* MW 5 */
+ 5576 "10000011" // /* MW 4 */
+ 5577 "10000010" // /* MW 3 */
+ 5578 "00001010" // /* MW 2 */
+ 5579 "00001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first
+ 5580 "00101100" // LDA.s16 r6, [p2, dj2]; MOVX r4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5581 "00000010" // /* MW 5 */
+ 5582 "00010001" // /* MW 4 */
+ 5583 "01010000" // /* MW 3 */
+ 5584 "00011010" // /* MW 2 */
+ 5585 "01001000" // /* MW 1 */
+ 5586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5587 "00000000" // /* MW 1 */
+ 5588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5589 "00000000" // /* MW 1 */
+ 5590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5591 "00000000" // /* MW 1 */
+ 5592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5593 "00000000" // /* MW 1 */
+ 5594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5595 "00000000" // /* MW 1 */
+ 5596 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5597 "01100111" // /* MW 3 */
+ 5598 "00000001" // /* MW 2 */
+ 5599 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+ 5600 "11100001" // NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5601 "00000000" // /* MW 15 */
+ 5602 "00000000" // /* MW 14 */
+ 5603 "01111000" // /* MW 13 */
+ 5604 "10100101" // /* MW 12 */
+ 5605 "00000001" // /* MW 11 */
+ 5606 "11110100" // /* MW 10 */
+ 5607 "01010010" // /* MW 9 */
+ 5608 "00001100" // /* MW 8 */
+ 5609 "01011011" // /* MW 7 */
+ 5610 "00000001" // /* MW 6 */
+ 5611 "00100000" // /* MW 5 */
+ 5612 "00000000" // /* MW 4 */
+ 5613 "11110000" // /* MW 3 */
+ 5614 "00101100" // /* MW 2 */
+ 5615 "00000000" // /* MW 1 */
+.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+ 5616 "01110110" // MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5617 "00010000" // /* MW 11 */
+ 5618 "01111000" // /* MW 10 */
+ 5619 "10110010" // /* MW 9 */
+ 5620 "11110011" // /* MW 8 */
+ 5621 "00000001" // /* MW 7 */
+ 5622 "10000000" // /* MW 6 */
+ 5623 "10100101" // /* MW 5 */
+ 5624 "11111101" // /* MW 4 */
+ 5625 "10000111" // /* MW 3 */
+ 5626 "10001010" // /* MW 2 */
+ 5627 "00000100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1289 16
+ 5628 "01110110" // LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5629 "01111000" // /* MW 11 */
+ 5630 "00111001" // /* MW 10 */
+ 5631 "10001011" // /* MW 9 */
+ 5632 "00001000" // /* MW 8 */
+ 5633 "01010000" // /* MW 7 */
+ 5634 "10000000" // /* MW 6 */
+ 5635 "01100101" // /* MW 5 */
+ 5636 "11111010" // /* MW 4 */
+ 5637 "01010111" // /* MW 3 */
+ 5638 "11011100" // /* MW 2 */
+ 5639 "11100000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 1280 49
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1287 41
+.src_ref 5 "vector.hpp" 1288 16
+.src_ref 5 "vector.hpp" 1289 16
+.src_ref 5 "vector.hpp" 1292 26
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35 first
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+ 5640 "01110110" // LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5641 "01111000" // /* MW 11 */
+ 5642 "01001001" // /* MW 10 */
+ 5643 "00000010" // /* MW 9 */
+ 5644 "11101000" // /* MW 8 */
+ 5645 "01100111" // /* MW 7 */
+ 5646 "00111111" // /* MW 6 */
+ 5647 "10001011" // /* MW 5 */
+ 5648 "10000100" // /* MW 4 */
+ 5649 "11010111" // /* MW 3 */
+ 5650 "00011010" // /* MW 2 */
+ 5651 "01001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "vector.hpp" 1280 49
+ 5652 "10111010" // MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5653 "01111000" // /* MW 9 */
+ 5654 "01001001" // /* MW 8 */
+ 5655 "00000010" // /* MW 7 */
+ 5656 "00000001" // /* MW 6 */
+ 5657 "11010010" // /* MW 5 */
+ 5658 "00000010" // /* MW 4 */
+ 5659 "00000000" // /* MW 3 */
+ 5660 "11111000" // /* MW 2 */
+ 5661 "00000011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9 first
+ 5662 "10111010" // MOVA r25, #16; MOVXM ls, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5663 "00010000" // /* MW 9 */
+ 5664 "01000000" // /* MW 8 */
+ 5665 "01111011" // /* MW 7 */
+ 5666 "00000100" // /* MW 6 */
+ 5667 "00000000" // /* MW 5 */
+ 5668 "00000000" // /* MW 4 */
+ 5669 "00000000" // /* MW 3 */
+ 5670 "00011001" // /* MW 2 */
+ 5671 "00000010" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9
+ 5672 "10111010" // VLDA wl2, [sp, #-32]; MOVXM le, #6336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5673 "00010000" // /* MW 9 */
+ 5674 "01100000" // /* MW 8 */
+ 5675 "10111100" // /* MW 7 */
+ 5676 "00000101" // /* MW 6 */
+ 5677 "00000000" // /* MW 5 */
+ 5678 "00000000" // /* MW 4 */
+ 5679 "10110000" // /* MW 3 */
+ 5680 "10010100" // /* MW 2 */
+ 5681 "11111111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 98
+ 5682 "00011000" // MOVX r26, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5683 "00000001" // /* MW 3 */
+ 5684 "01110100" // /* MW 2 */
+ 5685 "00010000" // /* MW 1 */
+ 5686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5687 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "vector.hpp" 1286 72
+.src_ref 7 "accum.hpp" 1108 103
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 5688 "00011000" // MOVX crRnd, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5689 "10000000" // /* MW 3 */
+ 5690 "11111010" // /* MW 2 */
+ 5691 "00010101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 112 19 first
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 5692 "00000010" // VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5693 "00000000" // /* MW 7 */
+ 5694 "10000000" // /* MW 6 */
+ 5695 "10111001" // /* MW 5 */
+ 5696 "00000010" // /* MW 4 */
+ 5697 "11000000" // /* MW 3 */
+ 5698 "00000010" // /* MW 2 */
+ 5699 "00001000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5700 "11111000" // VMOV x3, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5701 "10010010" // /* MW 3 */
+ 5702 "10100000" // /* MW 2 */
+ 5703 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 5704 "01100010" // VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5705 "10000011" // /* MW 7 */
+ 5706 "01000000" // /* MW 6 */
+ 5707 "00010000" // /* MW 5 */
+ 5708 "11100110" // /* MW 4 */
+ 5709 "10010010" // /* MW 3 */
+ 5710 "10100110" // /* MW 2 */
+ 5711 "00000010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 5712 "11111000" // VMOV x6, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5713 "10010010" // /* MW 3 */
+ 5714 "00101010" // /* MW 2 */
+ 5715 "00011011" // /* MW 1 */
+ 5716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5717 "00000000" // /* MW 1 */
+ 5718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5719 "00000000" // /* MW 1 */
+ 5720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5721 "00000000" // /* MW 1 */
+ 5722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5723 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 19 first
+ 5724 "00011000" // VCONV.bf16.fp32 wl3, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5725 "00010110" // /* MW 3 */
+ 5726 "11000000" // /* MW 2 */
+ 5727 "00001001" // /* MW 1 */
+ 5728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5729 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 5730 "01001000" // VMSC.f dm0, dm0, x3, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5731 "10000011" // /* MW 3 */
+ 5732 "00000110" // /* MW 2 */
+ 5733 "00010000" // /* MW 1 */
+ 5734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5735 "00000000" // /* MW 1 */
+ 5736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5737 "00000000" // /* MW 1 */
+ 5738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5739 "00000000" // /* MW 1 */
+ 5740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5741 "00000000" // /* MW 1 */
+ 5742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5743 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+ 5744 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5745 "00000000" // /* MW 15 */
+ 5746 "00000000" // /* MW 14 */
+ 5747 "01111000" // /* MW 13 */
+ 5748 "10100101" // /* MW 12 */
+ 5749 "00000001" // /* MW 11 */
+ 5750 "00001000" // /* MW 10 */
+ 5751 "01110001" // /* MW 9 */
+ 5752 "00000000" // /* MW 8 */
+ 5753 "00010110" // /* MW 7 */
+ 5754 "11000000" // /* MW 6 */
+ 5755 "00100010" // /* MW 5 */
+ 5756 "00000000" // /* MW 4 */
+ 5757 "11110000" // /* MW 3 */
+ 5758 "00101100" // /* MW 2 */
+ 5759 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 3 "reduce_mean_c8_impl.h" 226 22 first
+.begin_of_loop
+.loop_nesting 1
+ 5760 "11110100" // VLDB x7, [p1], #64; VMOV bmhh4, x9 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5761 "00100101" // /* MW 5 */
+ 5762 "10100101" // /* MW 4 */
+ 5763 "10001001" // /* MW 3 */
+ 5764 "10111110" // /* MW 2 */
+ 5765 "00100011" // /* MW 1 */
+ 5766 "11111000" // VMOV bmhh3, x11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5767 "10010010" // /* MW 3 */
+ 5768 "11010110" // /* MW 2 */
+ 5769 "00011011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1280 49
+ 5770 "11111000" // MOV r28, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5771 "11000000" // /* MW 3 */
+ 5772 "00011110" // /* MW 2 */
+ 5773 "00011111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1280 49 first
+ 5774 "10011000" // AND r29, r28, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5775 "10000100" // /* MW 3 */
+ 5776 "00111011" // /* MW 2 */
+ 5777 "00010111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1285 72 first
+ 5778 "00100100" // LT r27, r29, r4; ADD.NC r28, r29, #-32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5779 "11100000" // /* MW 5 */
+ 5780 "00111101" // /* MW 4 */
+ 5781 "01011110" // /* MW 3 */
+ 5782 "11001001" // /* MW 2 */
+ 5783 "11101110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+ 5784 "10011000" // LSHL r30, r22, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5785 "11011101" // /* MW 3 */
+ 5786 "10111101" // /* MW 2 */
+ 5787 "00010101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 98 first
+ 5788 "10011000" // SUB r31, r26, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5789 "11010001" // /* MW 3 */
+ 5790 "10111111" // /* MW 2 */
+ 5791 "00010110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "add_reduce.hpp" 322 47 first
+ 5792 "10100100" // SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5793 "11001101" // /* MW 5 */
+ 5794 "01110000" // /* MW 4 */
+ 5795 "01001000" // /* MW 3 */
+ 5796 "10111100" // /* MW 2 */
+ 5797 "00101111" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+ 5798 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5799 "10010010" // /* MW 3 */
+ 5800 "00010000" // /* MW 2 */
+ 5801 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 7 "accum.hpp" 198 120
+ 5802 "11111000" // VMOV wl8, wh7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5803 "00100010" // /* MW 3 */
+ 5804 "01001110" // /* MW 2 */
+ 5805 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 7 "accum.hpp" 198 120 first
+ 5806 "11111000" // VMOV wl10, wl7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5807 "00100010" // /* MW 3 */
+ 5808 "01001111" // /* MW 2 */
+ 5809 "00011101" // /* MW 1 */
+ 5810 "11111000" // VMOV bmhl4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5811 "10010010" // /* MW 3 */
+ 5812 "10010000" // /* MW 2 */
+ 5813 "00011100" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5814 "11111000" // VMOV bmhl3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5815 "10010010" // /* MW 3 */
+ 5816 "10010100" // /* MW 2 */
+ 5817 "00011011" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 5 "add.hpp" 28 49 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5818 "01100010" // VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5819 "00111101" // /* MW 7 */
+ 5820 "00101000" // /* MW 6 */
+ 5821 "00010011" // /* MW 5 */
+ 5822 "11100110" // /* MW 4 */
+ 5823 "10001010" // /* MW 3 */
+ 5824 "00010010" // /* MW 2 */
+ 5825 "00000010" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5826 "11111000" // VMOV cml1, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5827 "10001010" // /* MW 3 */
+ 5828 "00001110" // /* MW 2 */
+ 5829 "00011001" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 5 "vector.hpp" 243 115 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5830 "01100010" // VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5831 "00111101" // /* MW 7 */
+ 5832 "01010000" // /* MW 6 */
+ 5833 "00010010" // /* MW 5 */
+ 5834 "11100110" // /* MW 4 */
+ 5835 "00100010" // /* MW 3 */
+ 5836 "01001110" // /* MW 2 */
+ 5837 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5838 "11111000" // VMOV bmll2, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5839 "10010010" // /* MW 3 */
+ 5840 "00001110" // /* MW 2 */
+ 5841 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5842 "11011000" // VSHIFT x9, x8, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5843 "01100110" // /* MW 3 */
+ 5844 "11000000" // /* MW 2 */
+ 5845 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5846 "01100010" // VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5847 "00111101" // /* MW 7 */
+ 5848 "00110000" // /* MW 6 */
+ 5849 "00010100" // /* MW 5 */
+ 5850 "11100110" // /* MW 4 */
+ 5851 "10010010" // /* MW 3 */
+ 5852 "00010000" // /* MW 2 */
+ 5853 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5854 "11111000" // VMOV bmll4, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5855 "10010010" // /* MW 3 */
+ 5856 "00010010" // /* MW 2 */
+ 5857 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 151 136 first
+ 5858 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5859 "00010010" // /* MW 3 */
+ 5860 "00101100" // /* MW 2 */
+ 5861 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 243 115 first
+.src_ref 7 "accum.hpp" 151 115
+ 5862 "11111000" // VMOV wl9, wl8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5863 "00100010" // /* MW 3 */
+ 5864 "11010001" // /* MW 2 */
+ 5865 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5866 "11011000" // VSHIFT x8, x9, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5867 "01100110" // /* MW 3 */
+ 5868 "01001000" // /* MW 2 */
+ 5869 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5870 "01100010" // VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5871 "00111101" // /* MW 7 */
+ 5872 "01100100" // /* MW 6 */
+ 5873 "00010001" // /* MW 5 */
+ 5874 "11100110" // /* MW 4 */
+ 5875 "10010010" // /* MW 3 */
+ 5876 "00010000" // /* MW 2 */
+ 5877 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5878 "11111000" // VMOV bmll3, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5879 "10010010" // /* MW 3 */
+ 5880 "00010010" // /* MW 2 */
+ 5881 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22
+ 5882 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5883 "00010010" // /* MW 3 */
+ 5884 "00101000" // /* MW 2 */
+ 5885 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 5886 "11011000" // VSHIFT x10, x8, x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5887 "00011110" // /* MW 3 */
+ 5888 "01000000" // /* MW 2 */
+ 5889 "00011101" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5890 "01100010" // VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5891 "00111101" // /* MW 7 */
+ 5892 "01001100" // /* MW 6 */
+ 5893 "00010010" // /* MW 5 */
+ 5894 "11100110" // /* MW 4 */
+ 5895 "00010010" // /* MW 3 */
+ 5896 "00110000" // /* MW 2 */
+ 5897 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 5898 "11111000" // VMOV bmll3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5899 "10010010" // /* MW 3 */
+ 5900 "00010100" // /* MW 2 */
+ 5901 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5902 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5903 "00111101" // /* MW 7 */
+ 5904 "10001100" // /* MW 6 */
+ 5905 "00010011" // /* MW 5 */
+ 5906 "11000110" // /* MW 4 */
+ 5907 "00011110" // /* MW 3 */
+ 5908 "01000000" // /* MW 2 */
+ 5909 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5910 "11111000" // VMOV bmll3, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5911 "10010010" // /* MW 3 */
+ 5912 "00010000" // /* MW 2 */
+ 5913 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 5914 "11111000" // VMOV x8, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5915 "00010010" // /* MW 3 */
+ 5916 "00100100" // /* MW 2 */
+ 5917 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 5918 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5919 "00111101" // /* MW 7 */
+ 5920 "00110000" // /* MW 6 */
+ 5921 "00010001" // /* MW 5 */
+ 5922 "11000110" // /* MW 4 */
+ 5923 "00011110" // /* MW 3 */
+ 5924 "01000000" // /* MW 2 */
+ 5925 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5926 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5927 "10010010" // /* MW 3 */
+ 5928 "00010000" // /* MW 2 */
+ 5929 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 5930 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5931 "00010010" // /* MW 3 */
+ 5932 "00101000" // /* MW 2 */
+ 5933 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 5934 "01100010" // VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5935 "00111101" // /* MW 7 */
+ 5936 "01010000" // /* MW 6 */
+ 5937 "00010010" // /* MW 5 */
+ 5938 "11000110" // /* MW 4 */
+ 5939 "00000010" // /* MW 3 */
+ 5940 "01000000" // /* MW 2 */
+ 5941 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5942 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5943 "10010010" // /* MW 3 */
+ 5944 "00010000" // /* MW 2 */
+ 5945 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 5946 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5947 "00010010" // /* MW 3 */
+ 5948 "00101100" // /* MW 2 */
+ 5949 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 5950 "11011000" // VSHIFT x8, x8, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5951 "00000010" // /* MW 3 */
+ 5952 "01000000" // /* MW 2 */
+ 5953 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5954 "01100010" // VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5955 "00111101" // /* MW 7 */
+ 5956 "01110000" // /* MW 6 */
+ 5957 "00010011" // /* MW 5 */
+ 5958 "11100110" // /* MW 4 */
+ 5959 "00010010" // /* MW 3 */
+ 5960 "00100100" // /* MW 2 */
+ 5961 "00000101" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 10
+.nohwbrkpt
+.noswbrkpt
+ 5962 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5963 "10010010" // /* MW 3 */
+ 5964 "00010000" // /* MW 2 */
+ 5965 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5966 "01100010" // VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5967 "00111101" // /* MW 7 */
+ 5968 "00110000" // /* MW 6 */
+ 5969 "00010000" // /* MW 5 */
+ 5970 "11000110" // /* MW 4 */
+ 5971 "00000010" // /* MW 3 */
+ 5972 "01010000" // /* MW 2 */
+ 5973 "00000101" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5974 "11111000" // VMOV bmll4, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5975 "10010010" // /* MW 3 */
+ 5976 "00010100" // /* MW 2 */
+ 5977 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 5978 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5979 "00010010" // /* MW 3 */
+ 5980 "00101000" // /* MW 2 */
+ 5981 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+ 5982 "10111000" // VEXTRACT.32 r23, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5983 "00000001" // /* MW 3 */
+ 5984 "11100010" // /* MW 2 */
+ 5985 "00011101" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 5986 "11111000" // VMOV x10, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5987 "00010010" // /* MW 3 */
+ 5988 "00101100" // /* MW 2 */
+ 5989 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+.src_ref 5 "vector.hpp" 1288 16 first
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 5990 "01110100" // VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5991 "00000011" // /* MW 5 */
+ 5992 "01010100" // /* MW 4 */
+ 5993 "10000011" // /* MW 3 */
+ 5994 "11010000" // /* MW 2 */
+ 5995 "11100010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 5996 "11111000" // VMOV x11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5997 "00010010" // /* MW 3 */
+ 5998 "10100000" // /* MW 2 */
+ 5999 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+.src_ref 5 "vector.hpp" 1287 41 first
+.src_ref 5 "broadcast.hpp" 80 25 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6000 "10110100" // VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6001 "00000110" // /* MW 5 */
+ 6002 "10110100" // /* MW 4 */
+ 6003 "10001010" // /* MW 3 */
+ 6004 "11010100" // /* MW 2 */
+ 6005 "11100000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6006 "00111000" // VSEL.32 x9, x10, x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6007 "10100000" // /* MW 3 */
+ 6008 "11010100" // /* MW 2 */
+ 6009 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 853 46 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6010 "01111000" // VINSERT.32 x10, x2, #0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6011 "11010001" // /* MW 3 */
+ 6012 "00010000" // /* MW 2 */
+ 6013 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 853 46
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6014 "01111000" // VINSERT.32 x8, x2, #0, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6015 "11110001" // /* MW 3 */
+ 6016 "00010010" // /* MW 2 */
+ 6017 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 5 "vector.hpp" 1413 19 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6018 "11111000" // VMOV wl11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6019 "00100010" // /* MW 3 */
+ 6020 "11010011" // /* MW 2 */
+ 6021 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 5 "vector.hpp" 1413 19
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6022 "11111000" // VMOV wh11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6023 "00100010" // /* MW 3 */
+ 6024 "10010011" // /* MW 2 */
+ 6025 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 5 "vector.hpp" 1413 19
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6026 "11111000" // VMOV wh8, wl10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6027 "00100010" // /* MW 3 */
+ 6028 "00010101" // /* MW 2 */
+ 6029 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36 first
+ 6030 "00111000" // VSEL.32 x8, x11, x8, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6031 "00000000" // /* MW 3 */
+ 6032 "01011100" // /* MW 2 */
+ 6033 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 6034 "00111000" // VSEL.32 x8, x1, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6035 "00001000" // /* MW 3 */
+ 6036 "00001100" // /* MW 2 */
+ 6037 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 6038 "00111000" // VSEL.32 x7, x8, x7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6039 "10101000" // /* MW 3 */
+ 6040 "11000011" // /* MW 2 */
+ 6041 "00011011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+ 6042 "11111000" // VMOV bmll0, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6043 "10010010" // /* MW 3 */
+ 6044 "00001110" // /* MW 2 */
+ 6045 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6046 "11111000" // VMOV x9, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6047 "10010010" // /* MW 3 */
+ 6048 "10101100" // /* MW 2 */
+ 6049 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+ 6050 "00000010" // VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6051 "01110000" // /* MW 7 */
+ 6052 "01001001" // /* MW 6 */
+ 6053 "00000111" // /* MW 5 */
+ 6054 "00000001" // /* MW 4 */
+ 6055 "11000000" // /* MW 3 */
+ 6056 "00000010" // /* MW 2 */
+ 6057 "01101000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6058 "11111000" // VMOV x8, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6059 "10010010" // /* MW 3 */
+ 6060 "00110010" // /* MW 2 */
+ 6061 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1289 16
+ 6062 "01011010" // LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6063 "10000011" // /* MW 9 */
+ 6064 "01001100" // /* MW 8 */
+ 6065 "00010010" // /* MW 7 */
+ 6066 "00001111" // /* MW 6 */
+ 6067 "11101010" // /* MW 5 */
+ 6068 "11101101" // /* MW 4 */
+ 6069 "11001101" // /* MW 3 */
+ 6070 "10111011" // /* MW 2 */
+ 6071 "00000101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 9 first
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1289 16 first
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id first
+ 6072 "01100010" // SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6073 "10100001" // /* MW 7 */
+ 6074 "11101100" // /* MW 6 */
+ 6075 "00010001" // /* MW 5 */
+ 6076 "10010001" // /* MW 4 */
+ 6077 "00111110" // /* MW 3 */
+ 6078 "00001011" // /* MW 2 */
+ 6079 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 124 9 first
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1289 16
+.aggressive_scheduled_block_id 12
+.noswbrkpt
+ 6080 "01011010" // SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6081 "01100001" // /* MW 9 */
+ 6082 "11101100" // /* MW 8 */
+ 6083 "00010000" // /* MW 7 */
+ 6084 "00101111" // /* MW 6 */
+ 6085 "00001001" // /* MW 5 */
+ 6086 "00110011" // /* MW 4 */
+ 6087 "11100010" // /* MW 3 */
+ 6088 "10100101" // /* MW 2 */
+ 6089 "00000101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 9 first
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6090 "01001000" // VMUL.f dm3, x6, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6091 "00000001" // /* MW 3 */
+ 6092 "11101100" // /* MW 2 */
+ 6093 "00010011" // /* MW 1 */
+ 6094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6095 "00000000" // /* MW 1 */
+ 6096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6097 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 19 first
+ 6098 "00011000" // VCONV.bf16.fp32 wl9, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6099 "00010110" // /* MW 3 */
+ 6100 "11000001" // /* MW 2 */
+ 6101 "00001100" // /* MW 1 */
+ 6102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6103 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 6104 "01001000" // VMSC.f dm2, dm2, x9, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6105 "10000011" // /* MW 3 */
+ 6106 "01010010" // /* MW 2 */
+ 6107 "00010010" // /* MW 1 */
+ 6108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6109 "00000000" // /* MW 1 */
+ 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6111 "00000000" // /* MW 1 */
+ 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6113 "00000000" // /* MW 1 */
+ 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6115 "00000000" // /* MW 1 */
+ 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6117 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6118 "00011000" // VCONV.bf16.fp32 wl8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6119 "00010110" // /* MW 3 */
+ 6120 "01000001" // /* MW 2 */
+ 6121 "00001100" // /* MW 1 */
+ 6122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6123 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+ 6124 "01001000" // VMUL.f dm4, x8, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6125 "10100001" // /* MW 3 */
+ 6126 "11110000" // /* MW 2 */
+ 6127 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 9 first
+ 6128 "01001000" // VMUL.f dm2, x8, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6129 "01100001" // /* MW 3 */
+ 6130 "11110000" // /* MW 2 */
+ 6131 "00010010" // /* MW 1 */
+ 6132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6133 "00000000" // /* MW 1 */
+ 6134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6135 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id first
+ 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6137 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 9 first
+.aggressive_scheduled_block_id 13
+.noswbrkpt
+ 6138 "01001000" // VMUL.f dm2, x9, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6139 "10100001" // /* MW 3 */
+ 6140 "11110010" // /* MW 2 */
+ 6141 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6142 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6143 "00010010" // /* MW 3 */
+ 6144 "01110000" // /* MW 2 */
+ 6145 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6146 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6147 "00111101" // /* MW 3 */
+ 6148 "10001000" // /* MW 2 */
+ 6149 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6150 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6151 "10010010" // /* MW 3 */
+ 6152 "00000101" // /* MW 2 */
+ 6153 "00011100" // /* MW 1 */
+ 6154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6155 "00000000" // /* MW 1 */
+ 6156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6157 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id first
+ 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6159 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 9 first
+.aggressive_scheduled_block_id 14
+.noswbrkpt
+ 6160 "01001000" // VMUL.f dm2, x0, x8, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6161 "00000001" // /* MW 3 */
+ 6162 "11100001" // /* MW 2 */
+ 6163 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6164 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6165 "00010010" // /* MW 3 */
+ 6166 "01110000" // /* MW 2 */
+ 6167 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6168 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6169 "00111101" // /* MW 3 */
+ 6170 "10001000" // /* MW 2 */
+ 6171 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6172 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6173 "10010010" // /* MW 3 */
+ 6174 "00000001" // /* MW 2 */
+ 6175 "00011100" // /* MW 1 */
+ 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6177 "00000000" // /* MW 1 */
+ 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6179 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id first
+ 6180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6181 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 9 first
+.aggressive_scheduled_block_id 15
+.noswbrkpt
+ 6182 "01001000" // VMUL.f dm1, x9, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6183 "01100001" // /* MW 3 */
+ 6184 "11110010" // /* MW 2 */
+ 6185 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6186 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6187 "00010010" // /* MW 3 */
+ 6188 "01110000" // /* MW 2 */
+ 6189 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6190 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6191 "00111101" // /* MW 3 */
+ 6192 "10000100" // /* MW 2 */
+ 6193 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6194 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6195 "10010010" // /* MW 3 */
+ 6196 "00000101" // /* MW 2 */
+ 6197 "00011100" // /* MW 1 */
+ 6198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6199 "00000000" // /* MW 1 */
+ 6200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6201 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id first
+ 6202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6203 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 9 first
+.aggressive_scheduled_block_id 16
+.noswbrkpt
+ 6204 "01001000" // VMUL.f dm1, x9, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6205 "00000001" // /* MW 3 */
+ 6206 "11110010" // /* MW 2 */
+ 6207 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6208 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6209 "00010010" // /* MW 3 */
+ 6210 "01110000" // /* MW 2 */
+ 6211 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6212 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6213 "00111101" // /* MW 3 */
+ 6214 "10000100" // /* MW 2 */
+ 6215 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6216 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6217 "10010010" // /* MW 3 */
+ 6218 "00000001" // /* MW 2 */
+ 6219 "00011100" // /* MW 1 */
+ 6220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6221 "00000000" // /* MW 1 */
+ 6222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6223 "00000000" // /* MW 1 */
+ 6224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6225 "00000000" // /* MW 1 */
+ 6226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6227 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id first
+ 6228 "11111000" // VMOV lfl1, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6229 "00010010" // /* MW 3 */
+ 6230 "01110000" // /* MW 2 */
+ 6231 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 17
+.noswbrkpt
+ 6232 "01001000" // VADD.f dm2, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6233 "00111101" // /* MW 3 */
+ 6234 "10001000" // /* MW 2 */
+ 6235 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6236 "11111000" // VMOV bmll4, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6237 "10010010" // /* MW 3 */
+ 6238 "00010101" // /* MW 2 */
+ 6239 "00011100" // /* MW 1 */
+ 6240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6241 "00000000" // /* MW 1 */
+ 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6243 "00000000" // /* MW 1 */
+ 6244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6245 "00000000" // /* MW 1 */
+ 6246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6247 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id first
+ 6248 "11111000" // VMOV lfh1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6249 "00010010" // /* MW 3 */
+ 6250 "01101000" // /* MW 2 */
+ 6251 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6 first
+.aggressive_scheduled_block_id 18
+.noswbrkpt
+ 6252 "01001000" // VADD.f dm2, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6253 "00111101" // /* MW 3 */
+ 6254 "01000100" // /* MW 2 */
+ 6255 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6256 "11111000" // VMOV bmll2, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6257 "10010010" // /* MW 3 */
+ 6258 "00010001" // /* MW 2 */
+ 6259 "00011010" // /* MW 1 */
+ 6260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6261 "00000000" // /* MW 1 */
+ 6262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6263 "00000000" // /* MW 1 */
+ 6264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6265 "00000000" // /* MW 1 */
+ 6266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6267 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id first
+ 6268 "11111000" // VMOV lfl1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6269 "00010010" // /* MW 3 */
+ 6270 "01101000" // /* MW 2 */
+ 6271 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 19
+.noswbrkpt
+ 6272 "01001000" // VADD.f dm0, dm1, dm0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6273 "00111101" // /* MW 3 */
+ 6274 "00100000" // /* MW 2 */
+ 6275 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6276 "11111000" // VMOV bmll1, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6277 "10010010" // /* MW 3 */
+ 6278 "00010101" // /* MW 2 */
+ 6279 "00011001" // /* MW 1 */
+ 6280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6281 "00000000" // /* MW 1 */
+ 6282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6283 "00000000" // /* MW 1 */
+ 6284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6285 "00000000" // /* MW 1 */
+ 6286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6287 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id first
+ 6288 "11111000" // VMOV lfh1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6289 "00010010" // /* MW 3 */
+ 6290 "01100000" // /* MW 2 */
+ 6291 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6 first
+.aggressive_scheduled_block_id 20
+.noswbrkpt
+ 6292 "01001000" // VADD.f dm0, dm0, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6293 "00111101" // /* MW 3 */
+ 6294 "00001100" // /* MW 2 */
+ 6295 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6296 "11111000" // VMOV bmll0, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6297 "10010010" // /* MW 3 */
+ 6298 "00010001" // /* MW 2 */
+ 6299 "00011000" // /* MW 1 */
+ 6300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6301 "00000000" // /* MW 1 */
+ 6302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6303 "00000000" // /* MW 1 */
+ 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6305 "00000000" // /* MW 1 */
+ 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6307 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 72 first
+.src_ref 7 "accum.hpp" 1108 103 first
+ 6308 "00011000" // VCONV.bf16.fp32 wl11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6309 "00010110" // /* MW 3 */
+ 6310 "11000000" // /* MW 2 */
+ 6311 "00001101" // /* MW 1 */
+ 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6313 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 41
+ 6314 "11011000" // VSHIFT x11, x0, x11, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6315 "11111110" // /* MW 3 */
+ 6316 "10000101" // /* MW 2 */
+ 6317 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1289 16 first
+ 6318 "00111000" // VSEL.8 x11, x10, x11, r19:r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6319 "11001100" // /* MW 3 */
+ 6320 "11010101" // /* MW 2 */
+ 6321 "00011101" // /* MW 1 */
+ 6322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6323 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98 first
+.src_ref 5 "vector.hpp" 1292 26 first
+ 6324 "00110110" // NOPA; NOPB; VST wh11, [p7, #32]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6325 "01000001" // /* MW 11 */
+ 6326 "01100101" // /* MW 10 */
+ 6327 "10001011" // /* MW 9 */
+ 6328 "00000011" // /* MW 8 */
+ 6329 "00000000" // /* MW 7 */
+ 6330 "00000000" // /* MW 6 */
+ 6331 "00100000" // /* MW 5 */
+ 6332 "00000000" // /* MW 4 */
+ 6333 "11110000" // /* MW 3 */
+ 6334 "00101100" // /* MW 2 */
+ 6335 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19 first
+.end_of_loop
+ 6336 "11100001" // NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6337 "00000000" // /* MW 15 */
+ 6338 "00000000" // /* MW 14 */
+ 6339 "01111000" // /* MW 13 */
+ 6340 "10100101" // /* MW 12 */
+ 6341 "00000001" // /* MW 11 */
+ 6342 "00000000" // /* MW 10 */
+ 6343 "00000000" // /* MW 9 */
+ 6344 "10000000" // /* MW 8 */
+ 6345 "11101010" // /* MW 7 */
+ 6346 "10001010" // /* MW 6 */
+ 6347 "00100111" // /* MW 5 */
+ 6348 "00000000" // /* MW 4 */
+ 6349 "11110000" // /* MW 3 */
+ 6350 "00101100" // /* MW 2 */
+ 6351 "00000000" // /* MW 1 */
+.loop_nesting 0
+ 6352 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */
+ 6353 "00000000" // /* MW 5 */
+ 6354 "00000000" // /* MW 4 */
+ 6355 "01111000" // /* MW 3 */
+ 6356 "00001100" // /* MW 2 */
+ 6357 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6359 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6361 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6363 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6365 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6367 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520
+ 6368 "01011100" // ST dn3, [sp, #-4]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6369 "10000000" // /* MW 5 */
+ 6370 "10110100" // /* MW 4 */
+ 6371 "10110000" // /* MW 3 */
+ 6372 "10110100" // /* MW 2 */
+ 6373 "11111111" // /* MW 1 */
+ 6374 "01111010" // NOPA; ST lr, [sp, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6375 "00000000" // /* MW 9 */
+ 6376 "00000000" // /* MW 8 */
+ 6377 "00000000" // /* MW 7 */
+ 6378 "10000000" // /* MW 6 */
+ 6379 "00111101" // /* MW 5 */
+ 6380 "11111000" // /* MW 4 */
+ 6381 "11110111" // /* MW 3 */
+ 6382 "00101100" // /* MW 2 */
+ 6383 "00000000" // /* MW 1 */
+.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 2 "reduce_base_c8.h" 352 30 first
+ 6384 "00011000" // ADD.NC p7, r3, #34 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6385 "10010001" // /* MW 3 */
+ 6386 "01100001" // /* MW 2 */
+ 6387 "00011111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+ 6388 "11010100" // LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6389 "11000001" // /* MW 5 */
+ 6390 "01100100" // /* MW 4 */
+ 6391 "01011011" // /* MW 3 */
+ 6392 "10001111" // /* MW 2 */
+ 6393 "11100000" // /* MW 1 */
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id first
+ 6394 "11111000" // MOV crSCDEn, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6395 "01100000" // /* MW 3 */
+ 6396 "01111011" // /* MW 2 */
+ 6397 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+.aggressive_scheduled_block_id 21
+.noswbrkpt
+ 6398 "00011000" // ST.s16 r3, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6399 "01110111" // /* MW 3 */
+ 6400 "00000100" // /* MW 2 */
+ 6401 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 353 57 first
+.aggressive_scheduled_block_id 21
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6402 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 6403 "00000001" // /* MW 5 */
+ 6404 "00000000" // /* MW 4 */
+ 6405 "11111000" // /* MW 3 */
+ 6406 "00010011" // /* MW 2 */
+ 6407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6413 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30 first
+.delay_slot
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6414 "00011000" // ADD r3, r3, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6415 "00000111" // /* MW 3 */
+ 6416 "11000110" // /* MW 2 */
+ 6417 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+.delay_slot
+ 6418 "01111110" // NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6419 "01100000" // /* MW 13 */
+ 6420 "00101011" // /* MW 12 */
+ 6421 "00000000" // /* MW 11 */
+ 6422 "10101111" // /* MW 10 */
+ 6423 "00110100" // /* MW 9 */
+ 6424 "00000000" // /* MW 8 */
+ 6425 "10110000" // /* MW 7 */
+ 6426 "11000000" // /* MW 6 */
+ 6427 "00100000" // /* MW 5 */
+ 6428 "00000000" // /* MW 4 */
+ 6429 "11110000" // /* MW 3 */
+ 6430 "00101100" // /* MW 2 */
+ 6431 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4
+.return_address
+ 6432 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6433 "00111001" // /* MW 3 */
+ 6434 "11111000" // /* MW 2 */
+ 6435 "00000111" // /* MW 1 */
+ 6436 "00011000" // LDA p1, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6437 "10011001" // /* MW 3 */
+ 6438 "11111100" // /* MW 2 */
+ 6439 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 353 23 first
+ 6440 "00011000" // ST.s16 r3, [p7, #10] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6441 "01110111" // /* MW 3 */
+ 6442 "01010100" // /* MW 2 */
+ 6443 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4 first
+ 6444 "11000100" // PADDXM [sp], #-256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6445 "00000001" // /* MW 5 */
+ 6446 "00000000" // /* MW 4 */
+ 6447 "00000000" // /* MW 3 */
+ 6448 "11100000" // /* MW 2 */
+ 6449 "11111111" // /* MW 1 */
+ 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6451 "00000000" // /* MW 1 */
+ 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6453 "00000000" // /* MW 1 */
+ 6454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6455 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4
+ 6456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 6457 "00000000" // /* MW 3 */
+ 6458 "00101000" // /* MW 2 */
+ 6459 "00010000" // /* MW 1 */
+.delay_slot
+ 6460 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6461 "11000000" // /* MW 3 */
+ 6462 "01100010" // /* MW 2 */
+ 6463 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6470 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6471 "01111110" // /* MW 9 */
+ 6472 "10100101" // /* MW 8 */
+ 6473 "00000001" // /* MW 7 */
+ 6474 "00000000" // /* MW 6 */
+ 6475 "00010000" // /* MW 5 */
+ 6476 "00000000" // /* MW 4 */
+ 6477 "11110000" // /* MW 3 */
+ 6478 "00101100" // /* MW 2 */
+ 6479 "00000000" // /* MW 1 */
+.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 5 "blend.hpp" 163 48
+ 6480 "10111010" // MOVA r20, #255; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 6481 "00100000" // /* MW 9 */
+ 6482 "00000000" // /* MW 8 */
+ 6483 "00000000" // /* MW 7 */
+ 6484 "10111000" // /* MW 6 */
+ 6485 "00000010" // /* MW 5 */
+ 6486 "00000000" // /* MW 4 */
+ 6487 "00000000" // /* MW 3 */
+ 6488 "11110100" // /* MW 2 */
+ 6489 "00011111" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 6490 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6491 "00000001" // /* MW 3 */
+ 6492 "00101010" // /* MW 2 */
+ 6493 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6495 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6500 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6501 "10000001" // /* MW 11 */
+ 6502 "10101101" // /* MW 10 */
+ 6503 "00000000" // /* MW 9 */
+ 6504 "00000000" // /* MW 8 */
+ 6505 "00000000" // /* MW 7 */
+ 6506 "00000000" // /* MW 6 */
+ 6507 "00100000" // /* MW 5 */
+ 6508 "00000000" // /* MW 4 */
+ 6509 "11110000" // /* MW 3 */
+ 6510 "00101100" // /* MW 2 */
+ 6511 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6512 "00011000" // MOVX r5, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6513 "00010101" // /* MW 3 */
+ 6514 "00001010" // /* MW 2 */
+ 6515 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first
+ 6516 "10011000" // EQ r5, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6517 "01100111" // /* MW 3 */
+ 6518 "01001010" // /* MW 2 */
+ 6519 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6520 "10000100" // JNZ r5, #7264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7264 delay_slots=5 */
+ 6521 "00000001" // /* MW 5 */
+ 6522 "01000000" // /* MW 4 */
+ 6523 "00110000" // /* MW 3 */
+ 6524 "00001110" // /* MW 2 */
+ 6525 "00101000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6527 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6535 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6536 "00011000" // MOVX r7, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6537 "00011001" // /* MW 3 */
+ 6538 "00001110" // /* MW 2 */
+ 6539 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6540 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6541 "01100111" // /* MW 3 */
+ 6542 "11001110" // /* MW 2 */
+ 6543 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6544 "10000100" // JNZ r7, #7504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7504 delay_slots=5 */
+ 6545 "00000001" // /* MW 5 */
+ 6546 "01000000" // /* MW 4 */
+ 6547 "10101000" // /* MW 3 */
+ 6548 "00001110" // /* MW 2 */
+ 6549 "00111000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.delay_slot
+ 6550 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6551 "01000001" // /* MW 3 */
+ 6552 "00001010" // /* MW 2 */
+ 6553 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6555 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6557 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6559 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6561 "00000000" // /* MW 15 */
+ 6562 "00000000" // /* MW 14 */
+ 6563 "01111000" // /* MW 13 */
+ 6564 "10100101" // /* MW 12 */
+ 6565 "00000001" // /* MW 11 */
+ 6566 "00000000" // /* MW 10 */
+ 6567 "00000000" // /* MW 9 */
+ 6568 "00000000" // /* MW 8 */
+ 6569 "01011011" // /* MW 7 */
+ 6570 "00000001" // /* MW 6 */
+ 6571 "00100000" // /* MW 5 */
+ 6572 "00000000" // /* MW 4 */
+ 6573 "11110000" // /* MW 3 */
+ 6574 "00101100" // /* MW 2 */
+ 6575 "00000000" // /* MW 1 */
+.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first
+.src_ref 3 "reduce_mean_c8_impl.h" 202 30
+ 6576 "10111010" // LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6577 "01111000" // /* MW 9 */
+ 6578 "11110000" // /* MW 8 */
+ 6579 "01100000" // /* MW 7 */
+ 6580 "11101010" // /* MW 6 */
+ 6581 "00010000" // /* MW 5 */
+ 6582 "00000001" // /* MW 4 */
+ 6583 "01010000" // /* MW 3 */
+ 6584 "00011110" // /* MW 2 */
+ 6585 "01001000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 202 30 first
+ 6586 "01100100" // NE r6, r17, r6; MOV r17, #257 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6587 "00000101" // /* MW 5 */
+ 6588 "10100100" // /* MW 4 */
+ 6589 "00011000" // /* MW 3 */
+ 6590 "10001101" // /* MW 2 */
+ 6591 "10001001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 202 12
+ 6592 "10000100" // JNZ r6, #7232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7232 delay_slots=5 */
+ 6593 "00000001" // /* MW 5 */
+ 6594 "01000000" // /* MW 4 */
+ 6595 "00100000" // /* MW 3 */
+ 6596 "00001110" // /* MW 2 */
+ 6597 "00110000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6599 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6601 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6603 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6605 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49 first
+.delay_slot
+ 6606 "10011000" // ASHL r5, r7, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6607 "01011110" // /* MW 3 */
+ 6608 "11001010" // /* MW 2 */
+ 6609 "00010001" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 199 120
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first
+.src_ref 3 "reduce_mean_c8_impl.h" 206 35
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22
+ 6610 "01110110" // MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6611 "00010000" // /* MW 11 */
+ 6612 "00001000" // /* MW 10 */
+ 6613 "01111101" // /* MW 9 */
+ 6614 "00000100" // /* MW 8 */
+ 6615 "00000000" // /* MW 7 */
+ 6616 "00000000" // /* MW 6 */
+ 6617 "10001011" // /* MW 5 */
+ 6618 "10000100" // /* MW 4 */
+ 6619 "10000000" // /* MW 3 */
+ 6620 "10001010" // /* MW 2 */
+ 6621 "00000100" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9
+.src_ref 3 "reduce_mean_c8_impl.h" 206 35
+ 6622 "01110110" // LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6623 "00010000" // /* MW 11 */
+ 6624 "00111000" // /* MW 10 */
+ 6625 "10111101" // /* MW 9 */
+ 6626 "00000101" // /* MW 8 */
+ 6627 "00000000" // /* MW 7 */
+ 6628 "10000000" // /* MW 6 */
+ 6629 "10100101" // /* MW 5 */
+ 6630 "11111101" // /* MW 4 */
+ 6631 "11010111" // /* MW 3 */
+ 6632 "00011110" // /* MW 2 */
+ 6633 "01001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first
+ 6634 "10011000" // VLDA bmll2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6635 "00010101" // /* MW 3 */
+ 6636 "00011101" // /* MW 2 */
+ 6637 "00000000" // /* MW 1 */
+ 6638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6639 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+ 6640 "11111000" // VMOV bmhh4, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6641 "10010010" // /* MW 3 */
+ 6642 "11000010" // /* MW 2 */
+ 6643 "00011100" // /* MW 1 */
+ 6644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6645 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+.src_ref 5 "add.hpp" 28 49 first
+ 6646 "01100010" // VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6647 "00111101" // /* MW 7 */
+ 6648 "01101000" // /* MW 6 */
+ 6649 "00010001" // /* MW 5 */
+ 6650 "11100110" // /* MW 4 */
+ 6651 "00010010" // /* MW 3 */
+ 6652 "00010011" // /* MW 2 */
+ 6653 "00000011" // /* MW 1 */
+ 6654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6655 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first
+ 6656 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6657 "00000000" // /* MW 15 */
+ 6658 "00000000" // /* MW 14 */
+ 6659 "11001000" // /* MW 13 */
+ 6660 "11111111" // /* MW 12 */
+ 6661 "10111001" // /* MW 11 */
+ 6662 "00000010" // /* MW 10 */
+ 6663 "00000000" // /* MW 9 */
+ 6664 "00000000" // /* MW 8 */
+ 6665 "01011011" // /* MW 7 */
+ 6666 "00000001" // /* MW 6 */
+ 6667 "00100000" // /* MW 5 */
+ 6668 "00000000" // /* MW 4 */
+ 6669 "11110000" // /* MW 3 */
+ 6670 "00101100" // /* MW 2 */
+ 6671 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first
+.begin_of_loop
+.loop_nesting 1
+ 6672 "11100001" // VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6673 "00000000" // /* MW 15 */
+ 6674 "00000000" // /* MW 14 */
+ 6675 "01111000" // /* MW 13 */
+ 6676 "10100101" // /* MW 12 */
+ 6677 "00000001" // /* MW 11 */
+ 6678 "00000000" // /* MW 10 */
+ 6679 "00000000" // /* MW 9 */
+ 6680 "00000000" // /* MW 8 */
+ 6681 "01011011" // /* MW 7 */
+ 6682 "00000001" // /* MW 6 */
+ 6683 "00100000" // /* MW 5 */
+ 6684 "00000000" // /* MW 4 */
+ 6685 "10110000" // /* MW 3 */
+ 6686 "10100010" // /* MW 2 */
+ 6687 "00000011" // /* MW 1 */
+ 6688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6689 "00000000" // /* MW 15 */
+ 6690 "00000000" // /* MW 14 */
+ 6691 "01111000" // /* MW 13 */
+ 6692 "10100101" // /* MW 12 */
+ 6693 "00000001" // /* MW 11 */
+ 6694 "00000000" // /* MW 10 */
+ 6695 "00000000" // /* MW 9 */
+ 6696 "00000000" // /* MW 8 */
+ 6697 "01011011" // /* MW 7 */
+ 6698 "00000001" // /* MW 6 */
+ 6699 "00100000" // /* MW 5 */
+ 6700 "00000000" // /* MW 4 */
+ 6701 "11110000" // /* MW 3 */
+ 6702 "00101100" // /* MW 2 */
+ 6703 "00000000" // /* MW 1 */
+ 6704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6705 "00000000" // /* MW 15 */
+ 6706 "00000000" // /* MW 14 */
+ 6707 "01111000" // /* MW 13 */
+ 6708 "10100101" // /* MW 12 */
+ 6709 "00000001" // /* MW 11 */
+ 6710 "00000000" // /* MW 10 */
+ 6711 "00000000" // /* MW 9 */
+ 6712 "00000000" // /* MW 8 */
+ 6713 "01011011" // /* MW 7 */
+ 6714 "00000001" // /* MW 6 */
+ 6715 "00100000" // /* MW 5 */
+ 6716 "00000000" // /* MW 4 */
+ 6717 "11110000" // /* MW 3 */
+ 6718 "00101100" // /* MW 2 */
+ 6719 "00000000" // /* MW 1 */
+ 6720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6721 "00000000" // /* MW 15 */
+ 6722 "00000000" // /* MW 14 */
+ 6723 "01111000" // /* MW 13 */
+ 6724 "10100101" // /* MW 12 */
+ 6725 "00000001" // /* MW 11 */
+ 6726 "00000000" // /* MW 10 */
+ 6727 "00000000" // /* MW 9 */
+ 6728 "00000000" // /* MW 8 */
+ 6729 "01011011" // /* MW 7 */
+ 6730 "00000001" // /* MW 6 */
+ 6731 "00100000" // /* MW 5 */
+ 6732 "00000000" // /* MW 4 */
+ 6733 "11110000" // /* MW 3 */
+ 6734 "00101100" // /* MW 2 */
+ 6735 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 150 115 first
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id first
+ 6736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6737 "00000000" // /* MW 15 */
+ 6738 "00000000" // /* MW 14 */
+ 6739 "01111000" // /* MW 13 */
+ 6740 "00001001" // /* MW 12 */
+ 6741 "01100010" // /* MW 11 */
+ 6742 "00000010" // /* MW 10 */
+ 6743 "00000000" // /* MW 9 */
+ 6744 "00000000" // /* MW 8 */
+ 6745 "01011011" // /* MW 7 */
+ 6746 "00000001" // /* MW 6 */
+ 6747 "00100000" // /* MW 5 */
+ 6748 "00000000" // /* MW 4 */
+ 6749 "11110000" // /* MW 3 */
+ 6750 "00101100" // /* MW 2 */
+ 6751 "00000000" // /* MW 1 */
+.src_ref 5 "add.hpp" 28 49 first
+.aggressive_scheduled_block_id 22
+.noswbrkpt
+ 6752 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6753 "01000001" // /* MW 15 */
+ 6754 "10001011" // /* MW 14 */
+ 6755 "01111000" // /* MW 13 */
+ 6756 "10100101" // /* MW 12 */
+ 6757 "00000001" // /* MW 11 */
+ 6758 "00000000" // /* MW 10 */
+ 6759 "00000000" // /* MW 9 */
+ 6760 "00000000" // /* MW 8 */
+ 6761 "01011011" // /* MW 7 */
+ 6762 "00000001" // /* MW 6 */
+ 6763 "00100000" // /* MW 5 */
+ 6764 "00000000" // /* MW 4 */
+ 6765 "11110000" // /* MW 3 */
+ 6766 "00101100" // /* MW 2 */
+ 6767 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920
+.src_ref 7 "accum.hpp" 199 120 first
+.end_of_loop
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6768 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6769 "00000000" // /* MW 15 */
+ 6770 "00000000" // /* MW 14 */
+ 6771 "01111000" // /* MW 13 */
+ 6772 "10001001" // /* MW 12 */
+ 6773 "10001001" // /* MW 11 */
+ 6774 "00000001" // /* MW 10 */
+ 6775 "00000000" // /* MW 9 */
+ 6776 "00000000" // /* MW 8 */
+ 6777 "01011011" // /* MW 7 */
+ 6778 "00000001" // /* MW 6 */
+ 6779 "00100000" // /* MW 5 */
+ 6780 "00000000" // /* MW 4 */
+ 6781 "11110000" // /* MW 3 */
+ 6782 "00101100" // /* MW 2 */
+ 6783 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id first
+.loop_nesting 0
+ 6784 "10111010" // MOVA r16, #16; MOVXM p7, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6785 "00010000" // /* MW 9 */
+ 6786 "01111000" // /* MW 8 */
+ 6787 "10110010" // /* MW 7 */
+ 6788 "11110011" // /* MW 6 */
+ 6789 "00000001" // /* MW 5 */
+ 6790 "00000000" // /* MW 4 */
+ 6791 "00000000" // /* MW 3 */
+ 6792 "00010000" // /* MW 2 */
+ 6793 "00000010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6794 "10111010" // LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6795 "01011000" // /* MW 9 */
+ 6796 "00000001" // /* MW 8 */
+ 6797 "10011000" // /* MW 7 */
+ 6798 "00001000" // /* MW 6 */
+ 6799 "01100001" // /* MW 5 */
+ 6800 "00000000" // /* MW 4 */
+ 6801 "01010000" // /* MW 3 */
+ 6802 "10010000" // /* MW 2 */
+ 6803 "11100000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6804 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6805 "00000101" // /* MW 3 */
+ 6806 "00100010" // /* MW 2 */
+ 6807 "00010000" // /* MW 1 */
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6809 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 150 115 first
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6810 "11111000" // VMOV bmhh4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6811 "00010010" // /* MW 3 */
+ 6812 "11000100" // /* MW 2 */
+ 6813 "00011100" // /* MW 1 */
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6815 "00000000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6816 "11111000" // VMOV x2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6817 "00010010" // /* MW 3 */
+ 6818 "00110011" // /* MW 2 */
+ 6819 "00011001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6820 "11011000" // VSHIFT x2, x2, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6821 "00010010" // /* MW 3 */
+ 6822 "00010000" // /* MW 2 */
+ 6823 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 1108 103
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6824 "01011010" // MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6825 "00111101" // /* MW 9 */
+ 6826 "01000000" // /* MW 8 */
+ 6827 "00010000" // /* MW 7 */
+ 6828 "00101111" // /* MW 6 */
+ 6829 "01001001" // /* MW 5 */
+ 6830 "00000000" // /* MW 4 */
+ 6831 "10000000" // /* MW 3 */
+ 6832 "00111010" // /* MW 2 */
+ 6833 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6834 "11111000" // VMOV bmll2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6835 "00010010" // /* MW 3 */
+ 6836 "00010011" // /* MW 2 */
+ 6837 "00011010" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+ 6838 "11111000" // VBCST.32 x2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6839 "01110010" // /* MW 3 */
+ 6840 "00010110" // /* MW 2 */
+ 6841 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+ 6842 "11111000" // VMOV bmll1, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6843 "10010010" // /* MW 3 */
+ 6844 "00000100" // /* MW 2 */
+ 6845 "00011001" // /* MW 1 */
+ 6846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6847 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+ 6848 "11111000" // VMOV bmll2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6849 "00010010" // /* MW 3 */
+ 6850 "00000100" // /* MW 2 */
+ 6851 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id first
+ 6852 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6853 "00010010" // /* MW 3 */
+ 6854 "00100000" // /* MW 2 */
+ 6855 "00011001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 24
+.noswbrkpt
+ 6856 "01100010" // VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6857 "00111101" // /* MW 7 */
+ 6858 "00001100" // /* MW 6 */
+ 6859 "00010000" // /* MW 5 */
+ 6860 "11000110" // /* MW 4 */
+ 6861 "01000010" // /* MW 3 */
+ 6862 "00010000" // /* MW 2 */
+ 6863 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6864 "11111000" // VMOV bmll3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6865 "10010010" // /* MW 3 */
+ 6866 "00000100" // /* MW 2 */
+ 6867 "00011011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 6868 "11111000" // VMOV x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6869 "10010010" // /* MW 3 */
+ 6870 "00100000" // /* MW 2 */
+ 6871 "00011001" // /* MW 1 */
+ 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6873 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 112 19 first
+ 6874 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6875 "10010110" // /* MW 3 */
+ 6876 "01000000" // /* MW 2 */
+ 6877 "00001000" // /* MW 1 */
+ 6878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6879 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id first
+ 6880 "01100010" // VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6881 "10000011" // /* MW 7 */
+ 6882 "01000000" // /* MW 6 */
+ 6883 "00010100" // /* MW 5 */
+ 6884 "11100110" // /* MW 4 */
+ 6885 "00010010" // /* MW 3 */
+ 6886 "10100000" // /* MW 2 */
+ 6887 "00000001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 25
+.noswbrkpt
+ 6888 "01100010" // VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6889 "00111101" // /* MW 7 */
+ 6890 "00001000" // /* MW 6 */
+ 6891 "00010000" // /* MW 5 */
+ 6892 "11000110" // /* MW 4 */
+ 6893 "00011010" // /* MW 3 */
+ 6894 "10011000" // /* MW 2 */
+ 6895 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6896 "11111000" // VMOV bmll2, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6897 "10010010" // /* MW 3 */
+ 6898 "00000110" // /* MW 2 */
+ 6899 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 6900 "11111000" // VMOV x3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6901 "10010010" // /* MW 3 */
+ 6902 "10100100" // /* MW 2 */
+ 6903 "00011001" // /* MW 1 */
+ 6904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6905 "00000000" // /* MW 1 */
+ 6906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6907 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 19 first
+ 6908 "00011000" // VCONV.bf16.fp32 wl2, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6909 "00010110" // /* MW 3 */
+ 6910 "01000010" // /* MW 2 */
+ 6911 "00001001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id first
+ 6912 "11111000" // VMOV x5, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6913 "00010010" // /* MW 3 */
+ 6914 "10100000" // /* MW 2 */
+ 6915 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 26
+.noswbrkpt
+ 6916 "01100010" // VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6917 "00111101" // /* MW 7 */
+ 6918 "00001000" // /* MW 6 */
+ 6919 "00010000" // /* MW 5 */
+ 6920 "11000110" // /* MW 4 */
+ 6921 "00000010" // /* MW 3 */
+ 6922 "00101000" // /* MW 2 */
+ 6923 "00000011" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6924 "11111000" // VMOV bmll2, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6925 "10010010" // /* MW 3 */
+ 6926 "00001100" // /* MW 2 */
+ 6927 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6928 "11111000" // VMOV x5, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6929 "10010010" // /* MW 3 */
+ 6930 "10100110" // /* MW 2 */
+ 6931 "00011010" // /* MW 1 */
+ 6932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6933 "00000000" // /* MW 1 */
+ 6934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6935 "00000000" // /* MW 1 */
+ 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6937 "00000000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 6938 "11111000" // VMOV x6, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6939 "00010010" // /* MW 3 */
+ 6940 "00100000" // /* MW 2 */
+ 6941 "00011011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+ 6942 "10111000" // VEXTRACT.32 r0, x6, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6943 "00000001" // /* MW 3 */
+ 6944 "00011010" // /* MW 2 */
+ 6945 "00011000" // /* MW 1 */
+ 6946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6947 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 856 23 first
+ 6948 "01111000" // VINSERT.32 x6, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6949 "00010001" // /* MW 3 */
+ 6950 "00000000" // /* MW 2 */
+ 6951 "00011011" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36 first
+ 6952 "00111000" // VSEL.32 x1, x1, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6953 "00001000" // /* MW 3 */
+ 6954 "10001011" // /* MW 2 */
+ 6955 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+ 6956 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6957 "10010010" // /* MW 3 */
+ 6958 "00000010" // /* MW 2 */
+ 6959 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6960 "11111000" // VMOV x1, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6961 "10010010" // /* MW 3 */
+ 6962 "10101010" // /* MW 2 */
+ 6963 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6964 "00000010" // VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6965 "01110000" // /* MW 7 */
+ 6966 "01001001" // /* MW 6 */
+ 6967 "10010001" // /* MW 5 */
+ 6968 "00000001" // /* MW 4 */
+ 6969 "11000000" // /* MW 3 */
+ 6970 "00100010" // /* MW 2 */
+ 6971 "01011000" // /* MW 1 */
+ 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6973 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+ 6974 "01001000" // VMSC.f dm1, dm2, x5, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6975 "10000011" // /* MW 3 */
+ 6976 "01001010" // /* MW 2 */
+ 6977 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 9 first
+ 6978 "01001000" // VMUL.f dm0, x5, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6979 "01000001" // /* MW 3 */
+ 6980 "11101010" // /* MW 2 */
+ 6981 "00010000" // /* MW 1 */
+ 6982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6983 "00000000" // /* MW 1 */
+ 6984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6985 "00000000" // /* MW 1 */
+ 6986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6987 "00000000" // /* MW 1 */
+ 6988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6989 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 19 first
+ 6990 "00011000" // VCONV.bf16.fp32 wl1, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6991 "10010110" // /* MW 3 */
+ 6992 "11000000" // /* MW 2 */
+ 6993 "00001000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 6994 "01001000" // VMSC.f dm4, dm4, x2, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6995 "10000011" // /* MW 3 */
+ 6996 "10000100" // /* MW 2 */
+ 6997 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 6998 "01001000" // VMSC.f dm3, dm1, x1, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6999 "10000011" // /* MW 3 */
+ 7000 "00100010" // /* MW 2 */
+ 7001 "00010011" // /* MW 1 */
+ 7002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7003 "00000000" // /* MW 1 */
+ 7004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7005 "00000000" // /* MW 1 */
+ 7006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7007 "00000000" // /* MW 1 */
+ 7008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7009 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 7010 "00011000" // VCONV.bf16.fp32 wl3, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7011 "00010110" // /* MW 3 */
+ 7012 "11000010" // /* MW 2 */
+ 7013 "00001001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 7014 "00011000" // VCONV.bf16.fp32 wl6, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7015 "10010110" // /* MW 3 */
+ 7016 "01000001" // /* MW 2 */
+ 7017 "00001011" // /* MW 1 */
+ 7018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7019 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+ 7020 "01001000" // VMUL.f dm2, x6, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7021 "01100001" // /* MW 3 */
+ 7022 "11101100" // /* MW 2 */
+ 7023 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 9 first
+ 7024 "01001000" // VMUL.f dm3, x6, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7025 "01000001" // /* MW 3 */
+ 7026 "11101100" // /* MW 2 */
+ 7027 "00010011" // /* MW 1 */
+ 7028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7029 "00000000" // /* MW 1 */
+ 7030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7031 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id first
+ 7032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7033 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 9 first
+.aggressive_scheduled_block_id 27
+.noswbrkpt
+ 7034 "01001000" // VMUL.f dm3, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7035 "01100001" // /* MW 3 */
+ 7036 "11100010" // /* MW 2 */
+ 7037 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7038 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7039 "00010010" // /* MW 3 */
+ 7040 "01101000" // /* MW 2 */
+ 7041 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7042 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7043 "00111101" // /* MW 3 */
+ 7044 "01001100" // /* MW 2 */
+ 7045 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7046 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7047 "10010010" // /* MW 3 */
+ 7048 "00000101" // /* MW 2 */
+ 7049 "00011010" // /* MW 1 */
+ 7050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7051 "00000000" // /* MW 1 */
+ 7052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7053 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id first
+ 7054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7055 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 9 first
+.aggressive_scheduled_block_id 28
+.noswbrkpt
+ 7056 "01001000" // VMUL.f dm3, x5, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7057 "01100001" // /* MW 3 */
+ 7058 "11101010" // /* MW 2 */
+ 7059 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7060 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7061 "00010010" // /* MW 3 */
+ 7062 "01101000" // /* MW 2 */
+ 7063 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7064 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7065 "00111101" // /* MW 3 */
+ 7066 "01001100" // /* MW 2 */
+ 7067 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7068 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7069 "10010010" // /* MW 3 */
+ 7070 "00000001" // /* MW 2 */
+ 7071 "00011010" // /* MW 1 */
+ 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7073 "00000000" // /* MW 1 */
+ 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7075 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id first
+ 7076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7077 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 9 first
+.aggressive_scheduled_block_id 29
+.noswbrkpt
+ 7078 "01001000" // VMUL.f dm3, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7079 "01000001" // /* MW 3 */
+ 7080 "11100010" // /* MW 2 */
+ 7081 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7082 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7083 "00010010" // /* MW 3 */
+ 7084 "01101000" // /* MW 2 */
+ 7085 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7086 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7087 "00111101" // /* MW 3 */
+ 7088 "01001100" // /* MW 2 */
+ 7089 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7090 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7091 "10010010" // /* MW 3 */
+ 7092 "00000101" // /* MW 2 */
+ 7093 "00011010" // /* MW 1 */
+ 7094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7095 "00000000" // /* MW 1 */
+ 7096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7097 "00000000" // /* MW 1 */
+ 7098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7099 "00000000" // /* MW 1 */
+ 7100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7101 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id first
+ 7102 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7103 "00010010" // /* MW 3 */
+ 7104 "01101000" // /* MW 2 */
+ 7105 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 30
+.noswbrkpt
+ 7106 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7107 "00111101" // /* MW 3 */
+ 7108 "01001100" // /* MW 2 */
+ 7109 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7110 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7111 "10010010" // /* MW 3 */
+ 7112 "00000001" // /* MW 2 */
+ 7113 "00011010" // /* MW 1 */
+ 7114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7115 "00000000" // /* MW 1 */
+ 7116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7117 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 9 first
+ 7118 "01001000" // VMUL.f dm3, x0, x6, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7119 "11000001" // /* MW 3 */
+ 7120 "11100000" // /* MW 2 */
+ 7121 "00010011" // /* MW 1 */
+ 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7123 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id first
+ 7124 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7125 "00010010" // /* MW 3 */
+ 7126 "01101000" // /* MW 2 */
+ 7127 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 31
+.noswbrkpt
+ 7128 "01001000" // VADD.f dm3, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7129 "00111101" // /* MW 3 */
+ 7130 "01001100" // /* MW 2 */
+ 7131 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7132 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7133 "10010010" // /* MW 3 */
+ 7134 "00000101" // /* MW 2 */
+ 7135 "00011010" // /* MW 1 */
+ 7136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7137 "00000000" // /* MW 1 */
+ 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7139 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 9 first
+ 7140 "01001000" // VMUL.f dm1, x1, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7141 "00000001" // /* MW 3 */
+ 7142 "11100010" // /* MW 2 */
+ 7143 "00010001" // /* MW 1 */
+ 7144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7145 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id first
+ 7146 "11111000" // VMOV lfh0, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7147 "00010010" // /* MW 3 */
+ 7148 "01101100" // /* MW 2 */
+ 7149 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6 first
+.aggressive_scheduled_block_id 32
+.noswbrkpt
+ 7150 "01001000" // VADD.f dm1, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7151 "00111101" // /* MW 3 */
+ 7152 "01000100" // /* MW 2 */
+ 7153 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7154 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7155 "10010010" // /* MW 3 */
+ 7156 "00000001" // /* MW 2 */
+ 7157 "00011010" // /* MW 1 */
+ 7158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7159 "00000000" // /* MW 1 */
+ 7160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7161 "00000000" // /* MW 1 */
+ 7162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7163 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id first
+ 7164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7165 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 33
+.noswbrkpt
+ 7166 "01100010" // VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7167 "00111101" // /* MW 7 */
+ 7168 "01000000" // /* MW 6 */
+ 7169 "00010000" // /* MW 5 */
+ 7170 "11100110" // /* MW 4 */
+ 7171 "00010010" // /* MW 3 */
+ 7172 "00100100" // /* MW 2 */
+ 7173 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9 first
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7174 "01100010" // VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7175 "00000001" // /* MW 7 */
+ 7176 "11101010" // /* MW 6 */
+ 7177 "00010100" // /* MW 5 */
+ 7178 "11100110" // /* MW 4 */
+ 7179 "10010010" // /* MW 3 */
+ 7180 "00000000" // /* MW 2 */
+ 7181 "00000010" // /* MW 1 */
+ 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7183 "00000000" // /* MW 1 */
+ 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7185 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id first
+ 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7187 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.aggressive_scheduled_block_id 34
+.noswbrkpt
+ 7188 "01001000" // VADD.f dm0, dm2, dm4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7189 "00111101" // /* MW 3 */
+ 7190 "01010000" // /* MW 2 */
+ 7191 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7193 "00010010" // /* MW 3 */
+ 7194 "00000000" // /* MW 2 */
+ 7195 "00011010" // /* MW 1 */
+ 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7197 "00000000" // /* MW 1 */
+ 7198 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */
+ 7199 "00000000" // /* MW 5 */
+ 7200 "00000000" // /* MW 4 */
+ 7201 "01111000" // /* MW 3 */
+ 7202 "00001100" // /* MW 2 */
+ 7203 "00000000" // /* MW 1 */
+.delay_slot
+ 7204 "10011000" // ST dc4, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7205 "01100101" // /* MW 3 */
+ 7206 "11111010" // /* MW 2 */
+ 7207 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7209 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6 first
+.delay_slot
+ 7210 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7211 "00010010" // /* MW 3 */
+ 7212 "00000000" // /* MW 2 */
+ 7213 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7215 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 1108 103 first
+.delay_slot
+ 7216 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7217 "00000000" // /* MW 15 */
+ 7218 "00000000" // /* MW 14 */
+ 7219 "01111000" // /* MW 13 */
+ 7220 "10100101" // /* MW 12 */
+ 7221 "00000001" // /* MW 11 */
+ 7222 "00000000" // /* MW 10 */
+ 7223 "00000000" // /* MW 9 */
+ 7224 "10000000" // /* MW 8 */
+ 7225 "00010010" // /* MW 7 */
+ 7226 "00000101" // /* MW 6 */
+ 7227 "00100001" // /* MW 5 */
+ 7228 "00000000" // /* MW 4 */
+ 7229 "11110000" // /* MW 3 */
+ 7230 "00101100" // /* MW 2 */
+ 7231 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384
+.src_ref 5 "blend.hpp" 163 48
+ 7232 "10111010" // MOVA r20, #0; J #5616 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5616 delay_slots=5 */
+ 7233 "00100000" // /* MW 9 */
+ 7234 "00000000" // /* MW 8 */
+ 7235 "00000000" // /* MW 7 */
+ 7236 "10111110" // /* MW 6 */
+ 7237 "00000010" // /* MW 5 */
+ 7238 "00000000" // /* MW 4 */
+ 7239 "00000000" // /* MW 3 */
+ 7240 "00010100" // /* MW 2 */
+ 7241 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7242 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7243 "00000001" // /* MW 3 */
+ 7244 "00101010" // /* MW 2 */
+ 7245 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7247 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7249 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7251 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7252 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7253 "10000001" // /* MW 11 */
+ 7254 "10101101" // /* MW 10 */
+ 7255 "00000000" // /* MW 9 */
+ 7256 "00000000" // /* MW 8 */
+ 7257 "00000000" // /* MW 7 */
+ 7258 "00000000" // /* MW 6 */
+ 7259 "00100000" // /* MW 5 */
+ 7260 "00000000" // /* MW 4 */
+ 7261 "11110000" // /* MW 3 */
+ 7262 "00101100" // /* MW 2 */
+ 7263 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416
+ 7264 "10000100" // J #7456 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */
+ 7265 "00000000" // /* MW 5 */
+ 7266 "00000000" // /* MW 4 */
+ 7267 "10010000" // /* MW 3 */
+ 7268 "00001110" // /* MW 2 */
+ 7269 "00000000" // /* MW 1 */
+.delay_slot
+ 7270 "00000010" // ST p1, [sp, #-4]; MOV dc4, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7271 "01110000" // /* MW 7 */
+ 7272 "11110000" // /* MW 6 */
+ 7273 "01100000" // /* MW 5 */
+ 7274 "00000010" // /* MW 4 */
+ 7275 "10110000" // /* MW 3 */
+ 7276 "10010011" // /* MW 2 */
+ 7277 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7279 "00000000" // /* MW 1 */
+.delay_slot
+ 7280 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7281 "00110011" // /* MW 3 */
+ 7282 "11110000" // /* MW 2 */
+ 7283 "00001111" // /* MW 1 */
+.delay_slot
+ 7284 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7285 "00110011" // /* MW 3 */
+ 7286 "11110101" // /* MW 2 */
+ 7287 "00001111" // /* MW 1 */
+.delay_slot
+ 7288 "00000010" // VST x1, [sp, #-128]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7289 "01110000" // /* MW 7 */
+ 7290 "10100101" // /* MW 6 */
+ 7291 "00000001" // /* MW 5 */
+ 7292 "00000000" // /* MW 4 */
+ 7293 "01100000" // /* MW 3 */
+ 7294 "00001110" // /* MW 2 */
+ 7295 "11111111" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7296 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7297 "00000101" // /* MW 3 */
+ 7298 "00100010" // /* MW 2 */
+ 7299 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first
+ 7300 "10011000" // EQ r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7301 "01100111" // /* MW 3 */
+ 7302 "01100010" // /* MW 2 */
+ 7303 "00010100" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7304 "10000100" // JNZ r17, #7456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7456 delay_slots=5 */
+ 7305 "00000001" // /* MW 5 */
+ 7306 "01000000" // /* MW 4 */
+ 7307 "10010000" // /* MW 3 */
+ 7308 "00001110" // /* MW 2 */
+ 7309 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7311 "00000000" // /* MW 1 */
+.delay_slot
+ 7312 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7313 "00110011" // /* MW 3 */
+ 7314 "11110000" // /* MW 2 */
+ 7315 "00001111" // /* MW 1 */
+.delay_slot
+ 7316 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7317 "00110011" // /* MW 3 */
+ 7318 "11110101" // /* MW 2 */
+ 7319 "00001111" // /* MW 1 */
+.delay_slot
+ 7320 "00011000" // VST x1, [sp, #-128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7321 "01110011" // /* MW 3 */
+ 7322 "11111000" // /* MW 2 */
+ 7323 "00001111" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+.delay_slot
+ 7324 "00111010" // ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7325 "01111001" // /* MW 9 */
+ 7326 "11110000" // /* MW 8 */
+ 7327 "01100000" // /* MW 7 */
+ 7328 "01001010" // /* MW 6 */
+ 7329 "01110000" // /* MW 5 */
+ 7330 "00000000" // /* MW 4 */
+ 7331 "10110000" // /* MW 3 */
+ 7332 "10010011" // /* MW 2 */
+ 7333 "11111111" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7334 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7335 "01100111" // /* MW 3 */
+ 7336 "11001110" // /* MW 2 */
+ 7337 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7338 "10000100" // JNZ r7, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */
+ 7339 "00000001" // /* MW 5 */
+ 7340 "01000000" // /* MW 4 */
+ 7341 "10000000" // /* MW 3 */
+ 7342 "00001110" // /* MW 2 */
+ 7343 "00111000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7345 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7347 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7349 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7351 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7353 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7354 "10011000" // EQ r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7355 "01100111" // /* MW 3 */
+ 7356 "01001110" // /* MW 2 */
+ 7357 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7358 "10000100" // JNZ r7, #7392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7392 delay_slots=5 */
+ 7359 "00000001" // /* MW 5 */
+ 7360 "01000000" // /* MW 4 */
+ 7361 "01110000" // /* MW 3 */
+ 7362 "00001110" // /* MW 2 */
+ 7363 "00111000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.delay_slot
+ 7364 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7365 "01000001" // /* MW 3 */
+ 7366 "00001010" // /* MW 2 */
+ 7367 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7369 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7371 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7373 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7375 "00000000" // /* MW 1 */
+ 7376 "10000100" // J #6576 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6576 delay_slots=5 */
+ 7377 "00000000" // /* MW 5 */
+ 7378 "00000000" // /* MW 4 */
+ 7379 "11011000" // /* MW 3 */
+ 7380 "00001100" // /* MW 2 */
+ 7381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7383 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7385 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7387 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7391 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544
+.src_ref 5 "blend.hpp" 170 36
+ 7392 "10111010" // MOVA r17, #257; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 7393 "00100000" // /* MW 9 */
+ 7394 "00000000" // /* MW 8 */
+ 7395 "00000000" // /* MW 7 */
+ 7396 "10111000" // /* MW 6 */
+ 7397 "00000010" // /* MW 5 */
+ 7398 "00000000" // /* MW 4 */
+ 7399 "00000000" // /* MW 3 */
+ 7400 "00110001" // /* MW 2 */
+ 7401 "00100000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7402 "01100100" // MOVX r21, #0; MOV m4, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7403 "01000001" // /* MW 5 */
+ 7404 "00000000" // /* MW 4 */
+ 7405 "00101000" // /* MW 3 */
+ 7406 "01000000" // /* MW 2 */
+ 7407 "00000101" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48
+.delay_slot
+ 7408 "00011000" // MOVX r20, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7409 "00000001" // /* MW 3 */
+ 7410 "00101000" // /* MW 2 */
+ 7411 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7416 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7417 "00011100" // /* MW 7 */
+ 7418 "00000000" // /* MW 6 */
+ 7419 "00000000" // /* MW 5 */
+ 7420 "00000100" // /* MW 4 */
+ 7421 "11110000" // /* MW 3 */
+ 7422 "00101100" // /* MW 2 */
+ 7423 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576
+ 7424 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */
+ 7425 "00000000" // /* MW 5 */
+ 7426 "00000000" // /* MW 4 */
+ 7427 "10101000" // /* MW 3 */
+ 7428 "00001100" // /* MW 2 */
+ 7429 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7430 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7431 "11111110" // /* MW 5 */
+ 7432 "10111111" // /* MW 4 */
+ 7433 "11111000" // /* MW 3 */
+ 7434 "00000000" // /* MW 2 */
+ 7435 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7436 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7437 "00100000" // /* MW 3 */
+ 7438 "00000000" // /* MW 2 */
+ 7439 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7444 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7445 "10000001" // /* MW 11 */
+ 7446 "10101101" // /* MW 10 */
+ 7447 "00000000" // /* MW 9 */
+ 7448 "00000000" // /* MW 8 */
+ 7449 "00000000" // /* MW 7 */
+ 7450 "00000000" // /* MW 6 */
+ 7451 "00100000" // /* MW 5 */
+ 7452 "00000000" // /* MW 4 */
+ 7453 "11110000" // /* MW 3 */
+ 7454 "00101100" // /* MW 2 */
+ 7455 "00000000" // /* MW 1 */
+.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 7456 "10111010" // VLDA x0, [sp, #-256]; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 7457 "00100000" // /* MW 9 */
+ 7458 "00000000" // /* MW 8 */
+ 7459 "00000000" // /* MW 7 */
+ 7460 "10111000" // /* MW 6 */
+ 7461 "00000010" // /* MW 5 */
+ 7462 "00000000" // /* MW 4 */
+ 7463 "01110000" // /* MW 3 */
+ 7464 "00000111" // /* MW 2 */
+ 7465 "11111110" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "blend.hpp" 163 48
+.delay_slot
+ 7466 "10111010" // VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7467 "01011000" // /* MW 9 */
+ 7468 "00000000" // /* MW 8 */
+ 7469 "10001000" // /* MW 7 */
+ 7470 "10001010" // /* MW 6 */
+ 7471 "00000000" // /* MW 5 */
+ 7472 "00000000" // /* MW 4 */
+ 7473 "01110000" // /* MW 3 */
+ 7474 "10100111" // /* MW 2 */
+ 7475 "11111110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1280 49
+.src_ref 5 "vector.hpp" 1287 41
+.src_ref 5 "vector.hpp" 1288 16
+.src_ref 5 "vector.hpp" 1292 26
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 226 22
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7476 "10111010" // LDA p1, [sp, #-4]; MOVXM r16, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7477 "10010000" // /* MW 9 */
+ 7478 "11111111" // /* MW 8 */
+ 7479 "00001111" // /* MW 7 */
+ 7480 "00111110" // /* MW 6 */
+ 7481 "00000000" // /* MW 5 */
+ 7482 "00000000" // /* MW 4 */
+ 7483 "00100000" // /* MW 3 */
+ 7484 "10010011" // /* MW 2 */
+ 7485 "11111111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7486 "01100100" // MOVX r21, #0; MOV m4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7487 "10000001" // /* MW 5 */
+ 7488 "00000000" // /* MW 4 */
+ 7489 "00101000" // /* MW 3 */
+ 7490 "01000000" // /* MW 2 */
+ 7491 "00000101" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7492 "00011000" // MOVX r17, #257 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7493 "00000101" // /* MW 3 */
+ 7494 "00100010" // /* MW 2 */
+ 7495 "00010001" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7496 "00100010" // VLDA x1, [sp, #-128]; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7497 "00011100" // /* MW 7 */
+ 7498 "00000000" // /* MW 6 */
+ 7499 "00000000" // /* MW 5 */
+ 7500 "00000100" // /* MW 4 */
+ 7501 "01110000" // /* MW 3 */
+ 7502 "00001111" // /* MW 2 */
+ 7503 "11111111" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656
+ 7504 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */
+ 7505 "00000000" // /* MW 5 */
+ 7506 "00000000" // /* MW 4 */
+ 7507 "10101000" // /* MW 3 */
+ 7508 "00001100" // /* MW 2 */
+ 7509 "00000000" // /* MW 1 */
+.delay_slot
+ 7510 "11111000" // MOV dc4, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7511 "11100000" // /* MW 3 */
+ 7512 "11000001" // /* MW 2 */
+ 7513 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7514 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7515 "11111110" // /* MW 5 */
+ 7516 "10111111" // /* MW 4 */
+ 7517 "11111000" // /* MW 3 */
+ 7518 "00000000" // /* MW 2 */
+ 7519 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7520 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7521 "00100000" // /* MW 3 */
+ 7522 "00000000" // /* MW 2 */
+ 7523 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0
+ 7527 "00000000" // /* MW 1 */
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_reduce_mean_c8 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 8 "superkernels.cpp" 472
+.src_ref 8 "superkernels.cpp" 472 first
+.function_start
+ 7536 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7537 "00000001" // /* MW 5 */
+ 7538 "00000000" // /* MW 4 */
+ 7539 "00000000" // /* MW 3 */
+ 7540 "00010000" // /* MW 2 */
+ 7541 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7542 "00111010" // ST p7, [sp, #-20]; MOVXM p7, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7543 "00010001" // /* MW 9 */
+ 7544 "01100000" // /* MW 8 */
+ 7545 "10110010" // /* MW 7 */
+ 7546 "11110011" // /* MW 6 */
+ 7547 "00000001" // /* MW 5 */
+ 7548 "00000000" // /* MW 4 */
+ 7549 "10110000" // /* MW 3 */
+ 7550 "11110011" // /* MW 2 */
+ 7551 "11111101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7552 "10111010" // LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7553 "01110010" // /* MW 9 */
+ 7554 "01110000" // /* MW 8 */
+ 7555 "00001101" // /* MW 7 */
+ 7556 "10000010" // /* MW 6 */
+ 7557 "00011101" // /* MW 5 */
+ 7558 "11100111" // /* MW 4 */
+ 7559 "11010111" // /* MW 3 */
+ 7560 "11000010" // /* MW 2 */
+ 7561 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 22 first
+.src_ref 8 "superkernels.cpp" 569
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7562 "00111010" // ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7563 "01111001" // /* MW 9 */
+ 7564 "11110000" // /* MW 8 */
+ 7565 "01101000" // /* MW 7 */
+ 7566 "10000001" // /* MW 6 */
+ 7567 "00000100" // /* MW 5 */
+ 7568 "00100001" // /* MW 4 */
+ 7569 "10110000" // /* MW 3 */
+ 7570 "00101110" // /* MW 2 */
+ 7571 "11111111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 30
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7572 "01011100" // ST r15, [sp, #-16]; ADD r17, r16, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7573 "11110110" // /* MW 5 */
+ 7574 "01000111" // /* MW 4 */
+ 7575 "10111000" // /* MW 3 */
+ 7576 "00111110" // /* MW 2 */
+ 7577 "11111110" // /* MW 1 */
+ 7578 "10011000" // ST r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7579 "10110101" // /* MW 3 */
+ 7580 "11101001" // /* MW 2 */
+ 7581 "00001111" // /* MW 1 */
+ 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7583 "00000000" // /* MW 1 */
+ 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7585 "00000000" // /* MW 1 */
+ 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7587 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6 first
+.src_ref 8 "superkernels.cpp" 477 16 first
+ 7588 "10000100" // JNZ r16, #8160 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8160 delay_slots=5 */
+ 7589 "00000001" // /* MW 5 */
+ 7590 "01000000" // /* MW 4 */
+ 7591 "11110000" // /* MW 3 */
+ 7592 "00001111" // /* MW 2 */
+ 7593 "10000000" // /* MW 1 */
+.delay_slot
+ 7594 "10011000" // ST r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7595 "10010101" // /* MW 3 */
+ 7596 "11111101" // /* MW 2 */
+ 7597 "00001111" // /* MW 1 */
+.delay_slot
+ 7598 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7599 "11010101" // /* MW 3 */
+ 7600 "11110101" // /* MW 2 */
+ 7601 "00001111" // /* MW 1 */
+.delay_slot
+ 7602 "10011000" // ST p0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7603 "00011101" // /* MW 3 */
+ 7604 "11100000" // /* MW 2 */
+ 7605 "00001111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 11
+.delay_slot
+ 7606 "01000100" // MOVXM p6, #509128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7607 "10010000" // /* MW 5 */
+ 7608 "11001001" // /* MW 4 */
+ 7609 "11001100" // /* MW 3 */
+ 7610 "00000111" // /* MW 2 */
+ 7611 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 11 first
+.delay_slot
+ 7612 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7613 "00110001" // /* MW 3 */
+ 7614 "00000110" // /* MW 2 */
+ 7615 "00001110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 5 "tile.hpp" 74 8
+.src_ref 5 "tile.hpp" 74 8
+ 7616 "01110110" // MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7617 "00010000" // /* MW 11 */
+ 7618 "01110110" // /* MW 10 */
+ 7619 "00110010" // /* MW 9 */
+ 7620 "11110001" // /* MW 8 */
+ 7621 "00000001" // /* MW 7 */
+ 7622 "00000000" // /* MW 6 */
+ 7623 "10001011" // /* MW 5 */
+ 7624 "10001000" // /* MW 4 */
+ 7625 "00000111" // /* MW 3 */
+ 7626 "00110001" // /* MW 2 */
+ 7627 "00000000" // /* MW 1 */
+.src_ref 5 "tile.hpp" 74 8 first
+.src_ref 5 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7628 "00111010" // ST r17, [p2]; MOVXM p2, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7629 "00010001" // /* MW 9 */
+ 7630 "01111000" // /* MW 8 */
+ 7631 "00110010" // /* MW 7 */
+ 7632 "11110001" // /* MW 6 */
+ 7633 "00000001" // /* MW 5 */
+ 7634 "00000000" // /* MW 4 */
+ 7635 "00110000" // /* MW 3 */
+ 7636 "11000110" // /* MW 2 */
+ 7637 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 44
+.src_ref 5 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7638 "11010100" // ST.s8 r16, [p2]; MOV p6, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7639 "10000001" // /* MW 5 */
+ 7640 "11000101" // /* MW 4 */
+ 7641 "11101100" // /* MW 3 */
+ 7642 "11000000" // /* MW 2 */
+ 7643 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 480 4 first
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 7644 "00000100" // JL #2576 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2576 delay_slots=5 */
+ 7645 "00000001" // /* MW 5 */
+ 7646 "00000000" // /* MW 4 */
+ 7647 "00001000" // /* MW 3 */
+ 7648 "00000101" // /* MW 2 */
+ 7649 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 480 4
+.delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7650 "01000100" // MOVXM p0, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7651 "10000000" // /* MW 5 */
+ 7652 "11001000" // /* MW 4 */
+ 7653 "11000000" // /* MW 3 */
+ 7654 "00000111" // /* MW 2 */
+ 7655 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7659 "00000000" // /* MW 1 */
+.src_ref 5 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7660 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7661 "00110001" // /* MW 3 */
+ 7662 "00100000" // /* MW 2 */
+ 7663 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7665 "00000000" // /* MW 15 */
+ 7666 "00000000" // /* MW 14 */
+ 7667 "01111000" // /* MW 13 */
+ 7668 "10100101" // /* MW 12 */
+ 7669 "00000001" // /* MW 11 */
+ 7670 "00000000" // /* MW 10 */
+ 7671 "00000000" // /* MW 9 */
+ 7672 "00000000" // /* MW 8 */
+ 7673 "01011011" // /* MW 7 */
+ 7674 "00000001" // /* MW 6 */
+ 7675 "00100000" // /* MW 5 */
+ 7676 "00000000" // /* MW 4 */
+ 7677 "11110000" // /* MW 3 */
+ 7678 "00101100" // /* MW 2 */
+ 7679 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 51
+.src_ref 8 "superkernels.cpp" 487 47
+.return_address
+ 7680 "10111010" // MOVA r17, #0; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7681 "00010000" // /* MW 9 */
+ 7682 "00100000" // /* MW 8 */
+ 7683 "00110010" // /* MW 7 */
+ 7684 "11110001" // /* MW 6 */
+ 7685 "00000001" // /* MW 5 */
+ 7686 "00000000" // /* MW 4 */
+ 7687 "00000000" // /* MW 3 */
+ 7688 "00010001" // /* MW 2 */
+ 7689 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 18
+.src_ref 8 "superkernels.cpp" 481 51 first
+ 7690 "10111010" // LDA r14, [p2]; MOVXM p2, #509128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7691 "00010000" // /* MW 9 */
+ 7692 "01100100" // /* MW 8 */
+ 7693 "00110010" // /* MW 7 */
+ 7694 "11110001" // /* MW 6 */
+ 7695 "00000001" // /* MW 5 */
+ 7696 "00000000" // /* MW 4 */
+ 7697 "11010000" // /* MW 3 */
+ 7698 "10111010" // /* MW 2 */
+ 7699 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 18
+.src_ref 8 "superkernels.cpp" 481 85
+ 7700 "10111010" // LDA r18, [p2]; MOVXM p2, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7701 "00010000" // /* MW 9 */
+ 7702 "00100010" // /* MW 8 */
+ 7703 "00110010" // /* MW 7 */
+ 7704 "11110001" // /* MW 6 */
+ 7705 "00000001" // /* MW 5 */
+ 7706 "00000000" // /* MW 4 */
+ 7707 "11010000" // /* MW 3 */
+ 7708 "11001010" // /* MW 2 */
+ 7709 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 85
+.src_ref 8 "superkernels.cpp" 482 16
+ 7710 "10111010" // LDA r13, [p2], #4; MOVXM p3, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7711 "00010000" // /* MW 9 */
+ 7712 "01101000" // /* MW 8 */
+ 7713 "10110010" // /* MW 7 */
+ 7714 "11110001" // /* MW 6 */
+ 7715 "00000001" // /* MW 5 */
+ 7716 "00000000" // /* MW 4 */
+ 7717 "11010000" // /* MW 3 */
+ 7718 "10110110" // /* MW 2 */
+ 7719 "01000011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 16
+.src_ref 8 "superkernels.cpp" 482 40 first
+ 7720 "10111010" // LDA el0, [p2, #4]; MOVXM p1, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7721 "00010000" // /* MW 9 */
+ 7722 "01100110" // /* MW 8 */
+ 7723 "10110010" // /* MW 7 */
+ 7724 "11110000" // /* MW 6 */
+ 7725 "00000001" // /* MW 5 */
+ 7726 "00000000" // /* MW 4 */
+ 7727 "11010000" // /* MW 3 */
+ 7728 "10000101" // /* MW 2 */
+ 7729 "01000010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 120 first
+.src_ref 8 "superkernels.cpp" 483 44
+ 7730 "11010100" // LDA r15, [p2]; MOV r16, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7731 "10000001" // /* MW 5 */
+ 7732 "00111001" // /* MW 4 */
+ 7733 "11011000" // /* MW 3 */
+ 7734 "10111110" // /* MW 2 */
+ 7735 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 44
+ 7736 "00011000" // ADD.NC p2, r16, #40 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7737 "00010100" // /* MW 3 */
+ 7738 "01101000" // /* MW 2 */
+ 7739 "00011010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7740 "01000100" // MOVXM p6, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7741 "00000000" // /* MW 5 */
+ 7742 "11001010" // /* MW 4 */
+ 7743 "11001100" // /* MW 3 */
+ 7744 "00000111" // /* MW 2 */
+ 7745 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 13
+ 7746 "01000100" // MOVXM p0, #509160 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7747 "11010000" // /* MW 5 */
+ 7748 "11001001" // /* MW 4 */
+ 7749 "11000000" // /* MW 3 */
+ 7750 "00000111" // /* MW 2 */
+ 7751 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 27
+ 7752 "10011000" // MUL r18, r14, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7753 "00101111" // /* MW 3 */
+ 7754 "10100101" // /* MW 2 */
+ 7755 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7756 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7757 "00000000" // /* MW 5 */
+ 7758 "00100000" // /* MW 4 */
+ 7759 "00001000" // /* MW 3 */
+ 7760 "00000000" // /* MW 2 */
+ 7761 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 61
+.src_ref 8 "superkernels.cpp" 482 16 first
+ 7762 "01011100" // ST el0, [p3]; MUL r18, r13, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7763 "01011111" // /* MW 5 */
+ 7764 "11001010" // /* MW 4 */
+ 7765 "00110110" // /* MW 3 */
+ 7766 "10000101" // /* MW 2 */
+ 7767 "01100000" // /* MW 1 */
+ 7768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7769 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 96 first
+ 7770 "10011000" // MUL r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7771 "00101111" // /* MW 3 */
+ 7772 "11100101" // /* MW 2 */
+ 7773 "00010011" // /* MW 1 */
+ 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7775 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 16
+ 7776 "10011000" // ST r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7777 "01010001" // /* MW 3 */
+ 7778 "00000110" // /* MW 2 */
+ 7779 "00001001" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 15 first
+ 7780 "10011000" // LDA el0, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7781 "00101110" // /* MW 3 */
+ 7782 "01001100" // /* MW 2 */
+ 7783 "00000010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47 first
+ 7784 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7785 "00110001" // /* MW 3 */
+ 7786 "00011110" // /* MW 2 */
+ 7787 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7788 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7789 "00110001" // /* MW 3 */
+ 7790 "00011110" // /* MW 2 */
+ 7791 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7792 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7793 "00110001" // /* MW 3 */
+ 7794 "00011110" // /* MW 2 */
+ 7795 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7796 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7797 "00110001" // /* MW 3 */
+ 7798 "00011110" // /* MW 2 */
+ 7799 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7800 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7801 "00110001" // /* MW 3 */
+ 7802 "00011110" // /* MW 2 */
+ 7803 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7804 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7805 "00110001" // /* MW 3 */
+ 7806 "00011110" // /* MW 2 */
+ 7807 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 13 first
+ 7808 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7809 "00101001" // /* MW 3 */
+ 7810 "00000100" // /* MW 2 */
+ 7811 "00001000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47 first
+ 7812 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7813 "00110001" // /* MW 3 */
+ 7814 "00011110" // /* MW 2 */
+ 7815 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7816 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7817 "00110001" // /* MW 3 */
+ 7818 "00011110" // /* MW 2 */
+ 7819 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7820 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7821 "00110001" // /* MW 3 */
+ 7822 "00011110" // /* MW 2 */
+ 7823 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40 first
+ 7824 "10011000" // LDA r1, [p2], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7825 "00110110" // /* MW 3 */
+ 7826 "11011100" // /* MW 2 */
+ 7827 "00000010" // /* MW 1 */
+ 7828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7829 "00000000" // /* MW 1 */
+ 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7831 "00000000" // /* MW 1 */
+ 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7833 "00000000" // /* MW 1 */
+ 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7835 "00000000" // /* MW 1 */
+ 7836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7837 "00000000" // /* MW 1 */
+ 7838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7839 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7840 "10011000" // GEU r17, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7841 "00001011" // /* MW 3 */
+ 7842 "01100011" // /* MW 2 */
+ 7843 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7844 "10000100" // JNZ r17, #7920 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7920 delay_slots=5 */
+ 7845 "00000001" // /* MW 5 */
+ 7846 "01000000" // /* MW 4 */
+ 7847 "01111000" // /* MW 3 */
+ 7848 "00001111" // /* MW 2 */
+ 7849 "10001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+.delay_slot
+ 7850 "11111000" // MOV r12, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7851 "11000000" // /* MW 3 */
+ 7852 "00011110" // /* MW 2 */
+ 7853 "00011011" // /* MW 1 */
+.delay_slot
+ 7854 "10011000" // ST p2, [sp, #-40] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7855 "00011101" // /* MW 3 */
+ 7856 "11011001" // /* MW 2 */
+ 7857 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7859 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7861 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7863 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.no_stack_arguments
+ 7864 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */
+ 7865 "00000001" // /* MW 5 */
+ 7866 "00000000" // /* MW 4 */
+ 7867 "01010000" // /* MW 3 */
+ 7868 "00010101" // /* MW 2 */
+ 7869 "00000000" // /* MW 1 */
+.delay_slot
+ 7870 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7871 "10010101" // /* MW 3 */
+ 7872 "11011101" // /* MW 2 */
+ 7873 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7875 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7877 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7879 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7880 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7881 "00011100" // /* MW 7 */
+ 7882 "00000000" // /* MW 6 */
+ 7883 "00000000" // /* MW 5 */
+ 7884 "00000100" // /* MW 4 */
+ 7885 "11110000" // /* MW 3 */
+ 7886 "00101100" // /* MW 2 */
+ 7887 "00000000" // /* MW 1 */
+.return_address
+ 7888 "10000100" // J #7984 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7984 delay_slots=5 */
+ 7889 "00000000" // /* MW 5 */
+ 7890 "00000000" // /* MW 4 */
+ 7891 "10011000" // /* MW 3 */
+ 7892 "00001111" // /* MW 2 */
+ 7893 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7894 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7895 "11100000" // /* MW 5 */
+ 7896 "11001001" // /* MW 4 */
+ 7897 "11001110" // /* MW 3 */
+ 7898 "00000111" // /* MW 2 */
+ 7899 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7901 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7903 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7905 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7906 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7907 "00011100" // /* MW 13 */
+ 7908 "00000000" // /* MW 12 */
+ 7909 "00000000" // /* MW 11 */
+ 7910 "01010111" // /* MW 10 */
+ 7911 "00011010" // /* MW 9 */
+ 7912 "01000000" // /* MW 8 */
+ 7913 "00000000" // /* MW 7 */
+ 7914 "00000000" // /* MW 6 */
+ 7915 "10110110" // /* MW 5 */
+ 7916 "00000010" // /* MW 4 */
+ 7917 "11110000" // /* MW 3 */
+ 7918 "00101100" // /* MW 2 */
+ 7919 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384
+.src_ref 8 "superkernels.cpp" 491 40
+.no_stack_arguments
+ 7920 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */
+ 7921 "00000001" // /* MW 5 */
+ 7922 "00000000" // /* MW 4 */
+ 7923 "01010000" // /* MW 3 */
+ 7924 "00010101" // /* MW 2 */
+ 7925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7932 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7933 "01100111" // /* MW 3 */
+ 7934 "00000001" // /* MW 2 */
+ 7935 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7936 "11100001" // NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7937 "00000000" // /* MW 15 */
+ 7938 "00000000" // /* MW 14 */
+ 7939 "01111000" // /* MW 13 */
+ 7940 "10100101" // /* MW 12 */
+ 7941 "00000001" // /* MW 11 */
+ 7942 "00001100" // /* MW 10 */
+ 7943 "00011000" // /* MW 9 */
+ 7944 "00000010" // /* MW 8 */
+ 7945 "01011011" // /* MW 7 */
+ 7946 "00000001" // /* MW 6 */
+ 7947 "00100000" // /* MW 5 */
+ 7948 "00000000" // /* MW 4 */
+ 7949 "11110000" // /* MW 3 */
+ 7950 "00101100" // /* MW 2 */
+ 7951 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.return_address
+.no_stack_arguments
+ 7952 "00000100" // JL #12416 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12416 delay_slots=5 */
+ 7953 "00000001" // /* MW 5 */
+ 7954 "00000000" // /* MW 4 */
+ 7955 "01000000" // /* MW 3 */
+ 7956 "00011000" // /* MW 2 */
+ 7957 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7958 "11111000" // MOV r1, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7959 "00100000" // /* MW 3 */
+ 7960 "01010000" // /* MW 2 */
+ 7961 "00011000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7962 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7963 "11100000" // /* MW 5 */
+ 7964 "11001001" // /* MW 4 */
+ 7965 "11001110" // /* MW 3 */
+ 7966 "00000111" // /* MW 2 */
+ 7967 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7968 "01000100" // MOVXM r2, #1325400064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7969 "00000000" // /* MW 5 */
+ 7970 "00100000" // /* MW 4 */
+ 7971 "00000001" // /* MW 3 */
+ 7972 "00000000" // /* MW 2 */
+ 7973 "01001111" // /* MW 1 */
+.delay_slot
+ 7974 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7975 "10010101" // /* MW 3 */
+ 7976 "11011101" // /* MW 2 */
+ 7977 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7978 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7979 "00100000" // /* MW 5 */
+ 7980 "00000000" // /* MW 4 */
+ 7981 "11110000" // /* MW 3 */
+ 7982 "00101100" // /* MW 2 */
+ 7983 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 491 40
+.return_address
+ 7984 "10111010" // LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7985 "10111000" // /* MW 9 */
+ 7986 "00001000" // /* MW 8 */
+ 7987 "00000000" // /* MW 7 */
+ 7988 "00000000" // /* MW 6 */
+ 7989 "11010010" // /* MW 5 */
+ 7990 "00000010" // /* MW 4 */
+ 7991 "01010000" // /* MW 3 */
+ 7992 "11000000" // /* MW 2 */
+ 7993 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 492 38
+.src_ref 8 "superkernels.cpp" 492 38
+ 7994 "10111010" // MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7995 "01111000" // /* MW 9 */
+ 7996 "01001001" // /* MW 8 */
+ 7997 "00000000" // /* MW 7 */
+ 7998 "00001000" // /* MW 6 */
+ 7999 "10000000" // /* MW 5 */
+ 8000 "00000001" // /* MW 4 */
+ 8001 "10000000" // /* MW 3 */
+ 8002 "01000000" // /* MW 2 */
+ 8003 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 13
+.src_ref 8 "superkernels.cpp" 498 15
+ 8004 "10111010" // LDA p2, [sp, #-40]; MOVXM p3, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8005 "00010000" // /* MW 9 */
+ 8006 "01101010" // /* MW 8 */
+ 8007 "10110010" // /* MW 7 */
+ 8008 "11110001" // /* MW 6 */
+ 8009 "00000001" // /* MW 5 */
+ 8010 "00000000" // /* MW 4 */
+ 8011 "00100000" // /* MW 3 */
+ 8012 "00100011" // /* MW 2 */
+ 8013 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 14
+ 8014 "01000100" // MOVXM p1, #509144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8015 "10110000" // /* MW 5 */
+ 8016 "11001001" // /* MW 4 */
+ 8017 "11000010" // /* MW 3 */
+ 8018 "00000111" // /* MW 2 */
+ 8019 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 7
+.src_ref 8 "superkernels.cpp" 508 7
+.src_ref 8 "superkernels.cpp" 511 7
+ 8020 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8021 "10100000" // /* MW 5 */
+ 8022 "11001001" // /* MW 4 */
+ 8023 "11001110" // /* MW 3 */
+ 8024 "00000111" // /* MW 2 */
+ 8025 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8027 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 38
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 8028 "00011000" // ST.s16 r16, [p6], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8029 "00010111" // /* MW 3 */
+ 8030 "00011110" // /* MW 2 */
+ 8031 "00000110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8032 "00011000" // MOVX crRnd, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8033 "10000000" // /* MW 3 */
+ 8034 "00111010" // /* MW 2 */
+ 8035 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8036 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8037 "00010110" // /* MW 3 */
+ 8038 "01000000" // /* MW 2 */
+ 8039 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8041 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8042 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8043 "00000001" // /* MW 3 */
+ 8044 "00000001" // /* MW 2 */
+ 8045 "00011100" // /* MW 1 */
+ 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8047 "00000000" // /* MW 1 */
+ 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8049 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 492 38 first
+ 8050 "00011000" // ST.s8 r24, [p6], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8051 "00000111" // /* MW 3 */
+ 8052 "00001011" // /* MW 2 */
+ 8053 "00000110" // /* MW 1 */
+ 8054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8055 "00000000" // /* MW 1 */
+ 8056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8057 "00000000" // /* MW 1 */
+ 8058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8059 "00000000" // /* MW 1 */
+ 8060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8061 "00000000" // /* MW 1 */
+ 8062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8063 "00000000" // /* MW 1 */
+ 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8065 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 494 25 first
+ 8066 "10011000" // ST r14, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8067 "11010001" // /* MW 3 */
+ 8068 "00011101" // /* MW 2 */
+ 8069 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 495 24 first
+ 8070 "10011000" // ST r15, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8071 "11110001" // /* MW 3 */
+ 8072 "00000101" // /* MW 2 */
+ 8073 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 496 24 first
+ 8074 "10011000" // ST r13, [p6, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8075 "10110001" // /* MW 3 */
+ 8076 "00010101" // /* MW 2 */
+ 8077 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 15 first
+ 8078 "10011000" // LDA el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8079 "00101110" // /* MW 3 */
+ 8080 "00011100" // /* MW 2 */
+ 8081 "00000010" // /* MW 1 */
+ 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8083 "00000000" // /* MW 1 */
+ 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8085 "00000000" // /* MW 1 */
+ 8086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8087 "00000000" // /* MW 1 */
+ 8088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8089 "00000000" // /* MW 1 */
+ 8090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8091 "00000000" // /* MW 1 */
+ 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8093 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 13
+ 8094 "10011000" // ST el0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8095 "00101001" // /* MW 3 */
+ 8096 "00000100" // /* MW 2 */
+ 8097 "00001011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 16 first
+ 8098 "10011000" // LDA el0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8099 "00101110" // /* MW 3 */
+ 8100 "00000100" // /* MW 2 */
+ 8101 "00000010" // /* MW 1 */
+ 8102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8103 "00000000" // /* MW 1 */
+ 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8105 "00000000" // /* MW 1 */
+ 8106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8107 "00000000" // /* MW 1 */
+ 8108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8109 "00000000" // /* MW 1 */
+ 8110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8111 "00000000" // /* MW 1 */
+ 8112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8113 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 14
+ 8114 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8115 "00101001" // /* MW 3 */
+ 8116 "00000100" // /* MW 2 */
+ 8117 "00001001" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 15 first
+ 8118 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8119 "00101110" // /* MW 3 */
+ 8120 "00010100" // /* MW 2 */
+ 8121 "00000010" // /* MW 1 */
+ 8122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8123 "00000000" // /* MW 1 */
+ 8124 "10000100" // J #8176 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8176 delay_slots=5 */
+ 8125 "00000000" // /* MW 5 */
+ 8126 "00000000" // /* MW 4 */
+ 8127 "11111000" // /* MW 3 */
+ 8128 "00001111" // /* MW 2 */
+ 8129 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 13
+.delay_slot
+ 8130 "01000100" // MOVXM p0, #509148 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8131 "10111000" // /* MW 5 */
+ 8132 "11001001" // /* MW 4 */
+ 8133 "11000000" // /* MW 3 */
+ 8134 "00000111" // /* MW 2 */
+ 8135 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8137 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8139 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8140 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8141 "01100111" // /* MW 3 */
+ 8142 "00000001" // /* MW 2 */
+ 8143 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 13
+.delay_slot
+ 8144 "11100001" // NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8145 "00000000" // /* MW 15 */
+ 8146 "00000000" // /* MW 14 */
+ 8147 "01111000" // /* MW 13 */
+ 8148 "10100101" // /* MW 12 */
+ 8149 "00000001" // /* MW 11 */
+ 8150 "00000000" // /* MW 10 */
+ 8151 "00000000" // /* MW 9 */
+ 8152 "10000000" // /* MW 8 */
+ 8153 "00101001" // /* MW 7 */
+ 8154 "00000100" // /* MW 6 */
+ 8155 "00100000" // /* MW 5 */
+ 8156 "00000000" // /* MW 4 */
+ 8157 "11110000" // /* MW 3 */
+ 8158 "00101100" // /* MW 2 */
+ 8159 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624
+.src_ref 8 "superkernels.cpp" 505 7
+.src_ref 8 "superkernels.cpp" 508 7
+.src_ref 8 "superkernels.cpp" 511 7
+ 8160 "00111010" // ST p2, [sp, #-36]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8161 "00010001" // /* MW 9 */
+ 8162 "01101000" // /* MW 8 */
+ 8163 "10110010" // /* MW 7 */
+ 8164 "11110011" // /* MW 6 */
+ 8165 "00000001" // /* MW 5 */
+ 8166 "00000000" // /* MW 4 */
+ 8167 "10110000" // /* MW 3 */
+ 8168 "10100011" // /* MW 2 */
+ 8169 "11111011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+ 8170 "11010100" // NOPA; MOV r12, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8171 "10000001" // /* MW 5 */
+ 8172 "00101001" // /* MW 4 */
+ 8173 "11110110" // /* MW 3 */
+ 8174 "00101100" // /* MW 2 */
+ 8175 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640
+.src_ref 8 "superkernels.cpp" 505 7 first
+.src_ref 8 "superkernels.cpp" 505 19
+ 8176 "00101100" // LDA r16, [p7]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8177 "00001010" // /* MW 5 */
+ 8178 "01000100" // /* MW 4 */
+ 8179 "11010000" // /* MW 3 */
+ 8180 "11000010" // /* MW 2 */
+ 8181 "11100000" // /* MW 1 */
+ 8182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8183 "00000000" // /* MW 1 */
+ 8184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8185 "00000000" // /* MW 1 */
+ 8186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8187 "00000000" // /* MW 1 */
+ 8188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8189 "00000000" // /* MW 1 */
+ 8190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8191 "00000000" // /* MW 1 */
+ 8192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8193 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 19
+ 8194 "10011000" // NE r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8195 "00001000" // /* MW 3 */
+ 8196 "01100011" // /* MW 2 */
+ 8197 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 25
+ 8198 "10000100" // JNZ r17, #8368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8368 delay_slots=5 */
+ 8199 "00000001" // /* MW 5 */
+ 8200 "01000000" // /* MW 4 */
+ 8201 "01011000" // /* MW 3 */
+ 8202 "00010000" // /* MW 2 */
+ 8203 "10001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.delay_slot
+ 8204 "00011000" // ADD.NC p6, r12, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8205 "00000110" // /* MW 3 */
+ 8206 "01100110" // /* MW 2 */
+ 8207 "00011110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8209 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8211 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8213 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8215 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 29
+ 8216 "01000100" // MOVXM p2, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8217 "10001000" // /* MW 5 */
+ 8218 "11001001" // /* MW 4 */
+ 8219 "11000100" // /* MW 3 */
+ 8220 "00000111" // /* MW 2 */
+ 8221 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 29 first
+.src_ref 8 "superkernels.cpp" 505 65
+ 8222 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8223 "00010000" // /* MW 9 */
+ 8224 "00110000" // /* MW 8 */
+ 8225 "00110010" // /* MW 7 */
+ 8226 "11110001" // /* MW 6 */
+ 8227 "00000001" // /* MW 5 */
+ 8228 "00000000" // /* MW 4 */
+ 8229 "11010000" // /* MW 3 */
+ 8230 "11000010" // /* MW 2 */
+ 8231 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 65
+ 8232 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8233 "00111010" // /* MW 3 */
+ 8234 "00000100" // /* MW 2 */
+ 8235 "00000010" // /* MW 1 */
+ 8236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8237 "00000000" // /* MW 1 */
+ 8238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8239 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.no_stack_arguments
+ 8240 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8241 "00000001" // /* MW 5 */
+ 8242 "00000000" // /* MW 4 */
+ 8243 "11111000" // /* MW 3 */
+ 8244 "00010011" // /* MW 2 */
+ 8245 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8246 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8247 "00000001" // /* MW 3 */
+ 8248 "00011010" // /* MW 2 */
+ 8249 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8251 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8252 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8253 "11011010" // /* MW 3 */
+ 8254 "00110110" // /* MW 2 */
+ 8255 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8256 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8257 "01000001" // /* MW 5 */
+ 8258 "10111011" // /* MW 4 */
+ 8259 "00110111" // /* MW 3 */
+ 8260 "01100000" // /* MW 2 */
+ 8261 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8262 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8263 "00010010" // /* MW 9 */
+ 8264 "00000001" // /* MW 8 */
+ 8265 "00000100" // /* MW 7 */
+ 8266 "00000000" // /* MW 6 */
+ 8267 "01011011" // /* MW 5 */
+ 8268 "00000001" // /* MW 4 */
+ 8269 "11110000" // /* MW 3 */
+ 8270 "00101100" // /* MW 2 */
+ 8271 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.src_ref 8 "superkernels.cpp" 505 41
+.return_address
+ 8272 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8273 "01000001" // /* MW 5 */
+ 8274 "10101111" // /* MW 4 */
+ 8275 "00111101" // /* MW 3 */
+ 8276 "00000110" // /* MW 2 */
+ 8277 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+ 8278 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8279 "00000010" // /* MW 3 */
+ 8280 "11100001" // /* MW 2 */
+ 8281 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 6
+.src_ref 8 "superkernels.cpp" 505 76
+ 8282 "10000100" // JNZ r16, #8352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8352 delay_slots=5 */
+ 8283 "00000001" // /* MW 5 */
+ 8284 "01000000" // /* MW 4 */
+ 8285 "01010000" // /* MW 3 */
+ 8286 "00010000" // /* MW 2 */
+ 8287 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8289 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8293 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8295 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8297 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8298 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8299 "10000001" // /* MW 5 */
+ 8300 "11011001" // /* MW 4 */
+ 8301 "10100100" // /* MW 3 */
+ 8302 "00011111" // /* MW 2 */
+ 8303 "11111100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8304 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8305 "01110110" // /* MW 3 */
+ 8306 "11111111" // /* MW 2 */
+ 8307 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8308 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8309 "00110110" // /* MW 3 */
+ 8310 "11111110" // /* MW 2 */
+ 8311 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8312 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8313 "01010110" // /* MW 3 */
+ 8314 "11111110" // /* MW 2 */
+ 8315 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8317 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 8318 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8319 "00110110" // /* MW 3 */
+ 8320 "01000110" // /* MW 2 */
+ 8321 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8323 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8325 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8327 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8329 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8330 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8331 "00010010" // /* MW 3 */
+ 8332 "10100011" // /* MW 2 */
+ 8333 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8334 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8335 "00110001" // /* MW 3 */
+ 8336 "00000110" // /* MW 2 */
+ 8337 "00001010" // /* MW 1 */
+ 8338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8339 "00000000" // /* MW 1 */
+ 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8341 "00000000" // /* MW 1 */
+ 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8343 "00000000" // /* MW 1 */
+ 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8345 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8346 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8347 "00010000" // /* MW 5 */
+ 8348 "10100110" // /* MW 4 */
+ 8349 "11111000" // /* MW 3 */
+ 8350 "00101100" // /* MW 2 */
+ 8351 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816
+ 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8353 "00000000" // /* MW 1 */
+ 8354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8355 "00000000" // /* MW 1 */
+ 8356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8357 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 7 first
+ 8358 "10111010" // LDA r16, [p7]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8359 "01111110" // /* MW 9 */
+ 8360 "10100101" // /* MW 8 */
+ 8361 "00000001" // /* MW 7 */
+ 8362 "00000000" // /* MW 6 */
+ 8363 "00010000" // /* MW 5 */
+ 8364 "00000000" // /* MW 4 */
+ 8365 "11010000" // /* MW 3 */
+ 8366 "11000010" // /* MW 2 */
+ 8367 "11100000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832
+.src_ref 8 "superkernels.cpp" 508 19
+.src_ref 8 "superkernels.cpp" 522 6
+.src_ref 8 "superkernels.cpp" 558 19
+ 8368 "00011000" // MOVX r14, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8369 "00001001" // /* MW 3 */
+ 8370 "00011100" // /* MW 2 */
+ 8371 "00010000" // /* MW 1 */
+ 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8373 "00000000" // /* MW 1 */
+ 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8375 "00000000" // /* MW 1 */
+ 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8377 "00000000" // /* MW 1 */
+ 8378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8379 "00000000" // /* MW 1 */
+ 8380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8381 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 19
+ 8382 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8383 "00001000" // /* MW 3 */
+ 8384 "10100001" // /* MW 2 */
+ 8385 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 25
+ 8386 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */
+ 8387 "00000001" // /* MW 5 */
+ 8388 "01000000" // /* MW 4 */
+ 8389 "10110000" // /* MW 3 */
+ 8390 "00010000" // /* MW 2 */
+ 8391 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8395 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8397 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8399 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8401 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 29
+ 8402 "01000100" // MOVXM p2, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8403 "11000000" // /* MW 5 */
+ 8404 "11001001" // /* MW 4 */
+ 8405 "11000100" // /* MW 3 */
+ 8406 "00000111" // /* MW 2 */
+ 8407 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 29
+.src_ref 8 "superkernels.cpp" 508 65
+ 8408 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8409 "00010000" // /* MW 9 */
+ 8410 "00110000" // /* MW 8 */
+ 8411 "00110010" // /* MW 7 */
+ 8412 "11110001" // /* MW 6 */
+ 8413 "00000001" // /* MW 5 */
+ 8414 "00000000" // /* MW 4 */
+ 8415 "11010000" // /* MW 3 */
+ 8416 "11000010" // /* MW 2 */
+ 8417 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 65
+ 8418 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8419 "00111010" // /* MW 3 */
+ 8420 "00000100" // /* MW 2 */
+ 8421 "00000010" // /* MW 1 */
+ 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8423 "00000000" // /* MW 1 */
+ 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8425 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.no_stack_arguments
+ 8426 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8427 "00000001" // /* MW 5 */
+ 8428 "00000000" // /* MW 4 */
+ 8429 "11111000" // /* MW 3 */
+ 8430 "00010011" // /* MW 2 */
+ 8431 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8432 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8433 "00000001" // /* MW 3 */
+ 8434 "00011010" // /* MW 2 */
+ 8435 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8437 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8438 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8439 "11011010" // /* MW 3 */
+ 8440 "00110110" // /* MW 2 */
+ 8441 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8442 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8443 "01000001" // /* MW 5 */
+ 8444 "10111011" // /* MW 4 */
+ 8445 "00110111" // /* MW 3 */
+ 8446 "01100000" // /* MW 2 */
+ 8447 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8448 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8449 "00000000" // /* MW 15 */
+ 8450 "00000000" // /* MW 14 */
+ 8451 "01111000" // /* MW 13 */
+ 8452 "10100101" // /* MW 12 */
+ 8453 "00000001" // /* MW 11 */
+ 8454 "10010000" // /* MW 10 */
+ 8455 "00001000" // /* MW 9 */
+ 8456 "00100000" // /* MW 8 */
+ 8457 "01011011" // /* MW 7 */
+ 8458 "00000001" // /* MW 6 */
+ 8459 "00100000" // /* MW 5 */
+ 8460 "00000000" // /* MW 4 */
+ 8461 "11110000" // /* MW 3 */
+ 8462 "00101100" // /* MW 2 */
+ 8463 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.src_ref 8 "superkernels.cpp" 508 41
+.return_address
+ 8464 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8465 "01000001" // /* MW 5 */
+ 8466 "10101111" // /* MW 4 */
+ 8467 "00111101" // /* MW 3 */
+ 8468 "00000110" // /* MW 2 */
+ 8469 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+ 8470 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8471 "00000010" // /* MW 3 */
+ 8472 "11100001" // /* MW 2 */
+ 8473 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 6
+.src_ref 8 "superkernels.cpp" 508 76
+ 8474 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */
+ 8475 "00000001" // /* MW 5 */
+ 8476 "01000000" // /* MW 4 */
+ 8477 "10110000" // /* MW 3 */
+ 8478 "00010000" // /* MW 2 */
+ 8479 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8483 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8485 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8487 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8489 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8490 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8491 "10000001" // /* MW 5 */
+ 8492 "11011001" // /* MW 4 */
+ 8493 "10100100" // /* MW 3 */
+ 8494 "00011111" // /* MW 2 */
+ 8495 "11111100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8496 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8497 "01110110" // /* MW 3 */
+ 8498 "11111111" // /* MW 2 */
+ 8499 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8500 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8501 "00110110" // /* MW 3 */
+ 8502 "11111110" // /* MW 2 */
+ 8503 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8504 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8505 "01010110" // /* MW 3 */
+ 8506 "11111110" // /* MW 2 */
+ 8507 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 8508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8509 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 8510 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8511 "00110110" // /* MW 3 */
+ 8512 "01000110" // /* MW 2 */
+ 8513 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8515 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8517 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8519 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8521 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8522 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8523 "00010010" // /* MW 3 */
+ 8524 "10100011" // /* MW 2 */
+ 8525 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8526 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8527 "00110001" // /* MW 3 */
+ 8528 "00000110" // /* MW 2 */
+ 8529 "00001010" // /* MW 1 */
+ 8530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8531 "00000000" // /* MW 1 */
+ 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8533 "00000000" // /* MW 1 */
+ 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8535 "00000000" // /* MW 1 */
+ 8536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8537 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8538 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8539 "00010000" // /* MW 5 */
+ 8540 "10100110" // /* MW 4 */
+ 8541 "11111000" // /* MW 3 */
+ 8542 "00101100" // /* MW 2 */
+ 8543 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008
+ 8544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8545 "00000000" // /* MW 1 */
+ 8546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8547 "00000000" // /* MW 1 */
+ 8548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8549 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 7 first
+.src_ref 8 "superkernels.cpp" 511 29
+ 8550 "10111010" // LDA r16, [p7]; MOVXM p7, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8551 "00010000" // /* MW 9 */
+ 8552 "01110010" // /* MW 8 */
+ 8553 "10110010" // /* MW 7 */
+ 8554 "11110011" // /* MW 6 */
+ 8555 "00000001" // /* MW 5 */
+ 8556 "00000000" // /* MW 4 */
+ 8557 "11010000" // /* MW 3 */
+ 8558 "11000010" // /* MW 2 */
+ 8559 "11100000" // /* MW 1 */
+ 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8561 "00000000" // /* MW 1 */
+ 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8563 "00000000" // /* MW 1 */
+ 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8565 "00000000" // /* MW 1 */
+ 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8567 "00000000" // /* MW 1 */
+ 8568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8569 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 19
+ 8570 "00011000" // MOVX r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8571 "00010001" // /* MW 3 */
+ 8572 "00100100" // /* MW 2 */
+ 8573 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 19
+ 8574 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8575 "00001000" // /* MW 3 */
+ 8576 "10100001" // /* MW 2 */
+ 8577 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 25
+ 8578 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */
+ 8579 "00000001" // /* MW 5 */
+ 8580 "01000000" // /* MW 4 */
+ 8581 "00100000" // /* MW 3 */
+ 8582 "00010001" // /* MW 2 */
+ 8583 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 66
+.delay_slot
+ 8584 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8585 "11000000" // /* MW 5 */
+ 8586 "11001000" // /* MW 4 */
+ 8587 "11000100" // /* MW 3 */
+ 8588 "00000111" // /* MW 2 */
+ 8589 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8591 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8593 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8595 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8596 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8597 "00000001" // /* MW 3 */
+ 8598 "00100010" // /* MW 2 */
+ 8599 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 29
+.src_ref 8 "superkernels.cpp" 511 42
+ 8600 "00101100" // LDA r16, [p7]; MOVX r13, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8601 "00000010" // /* MW 5 */
+ 8602 "00110100" // /* MW 4 */
+ 8603 "11010000" // /* MW 3 */
+ 8604 "11000010" // /* MW 2 */
+ 8605 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 66
+ 8606 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8607 "00111010" // /* MW 3 */
+ 8608 "00000100" // /* MW 2 */
+ 8609 "00000010" // /* MW 1 */
+ 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8611 "00000000" // /* MW 1 */
+ 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8613 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.no_stack_arguments
+ 8614 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8615 "00000001" // /* MW 5 */
+ 8616 "00000000" // /* MW 4 */
+ 8617 "11111000" // /* MW 3 */
+ 8618 "00010011" // /* MW 2 */
+ 8619 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8621 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8623 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8624 "10011000" // LT r27, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8625 "00011010" // /* MW 3 */
+ 8626 "00110111" // /* MW 2 */
+ 8627 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8628 "11100100" // SUB r17, r17, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8629 "01000001" // /* MW 5 */
+ 8630 "10111011" // /* MW 4 */
+ 8631 "00110111" // /* MW 3 */
+ 8632 "01100000" // /* MW 2 */
+ 8633 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8634 "00101100" // NOPA; SEL.EQZ r0, r16, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8635 "00100100" // /* MW 5 */
+ 8636 "00000010" // /* MW 4 */
+ 8637 "11111000" // /* MW 3 */
+ 8638 "00101100" // /* MW 2 */
+ 8639 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.src_ref 8 "superkernels.cpp" 511 42
+.return_address
+ 8640 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8641 "01000001" // /* MW 5 */
+ 8642 "10101111" // /* MW 4 */
+ 8643 "00111101" // /* MW 3 */
+ 8644 "00000110" // /* MW 2 */
+ 8645 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+ 8646 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8647 "00000010" // /* MW 3 */
+ 8648 "11100001" // /* MW 2 */
+ 8649 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 6
+.src_ref 8 "superkernels.cpp" 511 77
+ 8650 "10000100" // JNZ r16, #8736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8736 delay_slots=5 */
+ 8651 "00000001" // /* MW 5 */
+ 8652 "01000000" // /* MW 4 */
+ 8653 "00010000" // /* MW 3 */
+ 8654 "00010001" // /* MW 2 */
+ 8655 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8659 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8661 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8663 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8665 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8666 "10111010" // LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8667 "01011000" // /* MW 9 */
+ 8668 "00000001" // /* MW 8 */
+ 8669 "00001000" // /* MW 7 */
+ 8670 "11101010" // /* MW 6 */
+ 8671 "00010111" // /* MW 5 */
+ 8672 "00111111" // /* MW 4 */
+ 8673 "11010000" // /* MW 3 */
+ 8674 "11101110" // /* MW 2 */
+ 8675 "11011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8676 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8677 "01010110" // /* MW 3 */
+ 8678 "11111110" // /* MW 2 */
+ 8679 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8680 "10011000" // LDA r19, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8681 "01110110" // /* MW 3 */
+ 8682 "11111110" // /* MW 2 */
+ 8683 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 8684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8685 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 8686 "10011000" // LDA r18, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8687 "01010110" // /* MW 3 */
+ 8688 "01000110" // /* MW 2 */
+ 8689 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8691 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8693 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8695 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8697 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8698 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8699 "00100010" // /* MW 3 */
+ 8700 "11100101" // /* MW 2 */
+ 8701 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8702 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8703 "01010001" // /* MW 3 */
+ 8704 "00000110" // /* MW 2 */
+ 8705 "00001110" // /* MW 1 */
+ 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8707 "00000000" // /* MW 1 */
+ 8708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8709 "00000000" // /* MW 1 */
+ 8710 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */
+ 8711 "00000000" // /* MW 5 */
+ 8712 "00000000" // /* MW 4 */
+ 8713 "00101000" // /* MW 3 */
+ 8714 "00010001" // /* MW 2 */
+ 8715 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8717 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+.delay_slot
+ 8718 "00011000" // ACQ r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8719 "00011000" // /* MW 3 */
+ 8720 "10010011" // /* MW 2 */
+ 8721 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8723 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8726 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8727 "01111110" // /* MW 9 */
+ 8728 "10100101" // /* MW 8 */
+ 8729 "00000001" // /* MW 7 */
+ 8730 "00000000" // /* MW 6 */
+ 8731 "00010000" // /* MW 5 */
+ 8732 "00000000" // /* MW 4 */
+ 8733 "11110000" // /* MW 3 */
+ 8734 "00101100" // /* MW 2 */
+ 8735 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200
+ 8736 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */
+ 8737 "00000000" // /* MW 5 */
+ 8738 "00000000" // /* MW 4 */
+ 8739 "00101000" // /* MW 3 */
+ 8740 "00010001" // /* MW 2 */
+ 8741 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+.delay_slot
+ 8742 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8743 "00000101" // /* MW 3 */
+ 8744 "00100000" // /* MW 2 */
+ 8745 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8747 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8749 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8751 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8753 "00000000" // /* MW 15 */
+ 8754 "00000000" // /* MW 14 */
+ 8755 "01111000" // /* MW 13 */
+ 8756 "10100101" // /* MW 12 */
+ 8757 "00000001" // /* MW 11 */
+ 8758 "00000000" // /* MW 10 */
+ 8759 "00000000" // /* MW 9 */
+ 8760 "00000000" // /* MW 8 */
+ 8761 "01011011" // /* MW 7 */
+ 8762 "00000001" // /* MW 6 */
+ 8763 "00100000" // /* MW 5 */
+ 8764 "00000000" // /* MW 4 */
+ 8765 "11110000" // /* MW 3 */
+ 8766 "00101100" // /* MW 2 */
+ 8767 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+ 8768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8769 "00000000" // /* MW 15 */
+ 8770 "00000000" // /* MW 14 */
+ 8771 "01111000" // /* MW 13 */
+ 8772 "10100101" // /* MW 12 */
+ 8773 "00000001" // /* MW 11 */
+ 8774 "00101000" // /* MW 10 */
+ 8775 "00000000" // /* MW 9 */
+ 8776 "00000001" // /* MW 8 */
+ 8777 "01011011" // /* MW 7 */
+ 8778 "00000001" // /* MW 6 */
+ 8779 "00100000" // /* MW 5 */
+ 8780 "00000000" // /* MW 4 */
+ 8781 "11110000" // /* MW 3 */
+ 8782 "00101100" // /* MW 2 */
+ 8783 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248
+.src_ref 8 "superkernels.cpp" 516 47
+.src_ref 1 "io_buffer_main.h" 125 25
+ 8784 "10111010" // LDA p7, [sp, #-32]; MOVXM p6, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8785 "00010000" // /* MW 9 */
+ 8786 "01100110" // /* MW 8 */
+ 8787 "00110010" // /* MW 7 */
+ 8788 "11110011" // /* MW 6 */
+ 8789 "00000001" // /* MW 5 */
+ 8790 "00000000" // /* MW 4 */
+ 8791 "00100000" // /* MW 3 */
+ 8792 "01110011" // /* MW 2 */
+ 8793 "11111100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 47 first
+.src_ref 8 "superkernels.cpp" 522 6
+ 8794 "10111010" // LDA r21, [p6]; MOVXM p2, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8795 "00010000" // /* MW 9 */
+ 8796 "01101000" // /* MW 8 */
+ 8797 "00110010" // /* MW 7 */
+ 8798 "11110001" // /* MW 6 */
+ 8799 "00000001" // /* MW 5 */
+ 8800 "00000000" // /* MW 4 */
+ 8801 "11010000" // /* MW 3 */
+ 8802 "11010110" // /* MW 2 */
+ 8803 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8804 "10111010" // LDA r17, [p2]; MOVXM p6, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8805 "00010000" // /* MW 9 */
+ 8806 "01100000" // /* MW 8 */
+ 8807 "00110010" // /* MW 7 */
+ 8808 "11110011" // /* MW 6 */
+ 8809 "00000001" // /* MW 5 */
+ 8810 "00000000" // /* MW 4 */
+ 8811 "11010000" // /* MW 3 */
+ 8812 "11000110" // /* MW 2 */
+ 8813 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2 first
+ 8814 "10011000" // LDA r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8815 "10010110" // /* MW 3 */
+ 8816 "00000110" // /* MW 2 */
+ 8817 "00000110" // /* MW 1 */
+ 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8819 "00000000" // /* MW 1 */
+ 8820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8821 "00000000" // /* MW 1 */
+ 8822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8823 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 8824 "10011000" // LDA r19, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8825 "01110110" // /* MW 3 */
+ 8826 "00000110" // /* MW 2 */
+ 8827 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45 first
+ 8828 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8829 "00001101" // /* MW 3 */
+ 8830 "01101011" // /* MW 2 */
+ 8831 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8832 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8833 "00000111" // /* MW 3 */
+ 8834 "01100001" // /* MW 2 */
+ 8835 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8836 "10000100" // JNZ r16, #9232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9232 delay_slots=5 */
+ 8837 "00000001" // /* MW 5 */
+ 8838 "01000000" // /* MW 4 */
+ 8839 "00001000" // /* MW 3 */
+ 8840 "00010010" // /* MW 2 */
+ 8841 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2 first
+.delay_slot
+ 8842 "00011000" // ADD r20, r20, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8843 "00000111" // /* MW 3 */
+ 8844 "00101000" // /* MW 2 */
+ 8845 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2
+.delay_slot
+ 8846 "10011000" // ST r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8847 "10010001" // /* MW 3 */
+ 8848 "00000110" // /* MW 2 */
+ 8849 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8851 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45 first
+.delay_slot
+ 8852 "01011000" // ADD.NC p0, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8853 "11010101" // /* MW 3 */
+ 8854 "01101001" // /* MW 2 */
+ 8855 "00011000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 12
+.src_ref 8 "superkernels.cpp" 522 6
+.delay_slot
+ 8856 "01011100" // ST p0, [sp, #-68]; MOVX r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8857 "00100010" // /* MW 5 */
+ 8858 "01001000" // /* MW 4 */
+ 8859 "10110000" // /* MW 3 */
+ 8860 "10000011" // /* MW 2 */
+ 8861 "11110111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8862 "10011000" // EQ r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8863 "00100111" // /* MW 3 */
+ 8864 "01100001" // /* MW 2 */
+ 8865 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8866 "10000100" // JNZ r16, #9088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9088 delay_slots=5 */
+ 8867 "00000001" // /* MW 5 */
+ 8868 "01000000" // /* MW 4 */
+ 8869 "11000000" // /* MW 3 */
+ 8870 "00010001" // /* MW 2 */
+ 8871 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8873 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8875 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8877 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8879 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8881 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8882 "10011000" // NE r16, r17, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8883 "11101000" // /* MW 3 */
+ 8884 "01100000" // /* MW 2 */
+ 8885 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8886 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */
+ 8887 "00000001" // /* MW 5 */
+ 8888 "01000000" // /* MW 4 */
+ 8889 "10101000" // /* MW 3 */
+ 8890 "00010001" // /* MW 2 */
+ 8891 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 26
+.delay_slot
+ 8892 "01000100" // MOVXM p6, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8893 "11000000" // /* MW 5 */
+ 8894 "11001001" // /* MW 4 */
+ 8895 "11001100" // /* MW 3 */
+ 8896 "00000111" // /* MW 2 */
+ 8897 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8899 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8901 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8903 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8905 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 26 first
+.src_ref 8 "superkernels.cpp" 523 61
+ 8906 "10111010" // LDA r18, [p6]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8907 "00010000" // /* MW 9 */
+ 8908 "00100100" // /* MW 8 */
+ 8909 "00110010" // /* MW 7 */
+ 8910 "11110011" // /* MW 6 */
+ 8911 "00000001" // /* MW 5 */
+ 8912 "00000000" // /* MW 4 */
+ 8913 "11010000" // /* MW 3 */
+ 8914 "11001010" // /* MW 2 */
+ 8915 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 61
+.src_ref 8 "superkernels.cpp" 524 44
+ 8916 "10111010" // LDA r16, [p6]; MOVXM p6, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8917 "00010000" // /* MW 9 */
+ 8918 "01101010" // /* MW 8 */
+ 8919 "00110010" // /* MW 7 */
+ 8920 "11110011" // /* MW 6 */
+ 8921 "00000001" // /* MW 5 */
+ 8922 "00000000" // /* MW 4 */
+ 8923 "11010000" // /* MW 3 */
+ 8924 "11000010" // /* MW 2 */
+ 8925 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+.src_ref 8 "superkernels.cpp" 524 44 first
+ 8926 "00101100" // LDA r17, [p6]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8927 "00000010" // /* MW 5 */
+ 8928 "01100000" // /* MW 4 */
+ 8929 "11010000" // /* MW 3 */
+ 8930 "11000110" // /* MW 2 */
+ 8931 "11000000" // /* MW 1 */
+ 8932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8933 "00000000" // /* MW 1 */
+ 8934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8935 "00000000" // /* MW 1 */
+ 8936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8937 "00000000" // /* MW 1 */
+ 8938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8939 "00000000" // /* MW 1 */
+ 8940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8941 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 37 first
+ 8942 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8943 "00001111" // /* MW 3 */
+ 8944 "10100101" // /* MW 2 */
+ 8945 "00010100" // /* MW 1 */
+ 8946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8947 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30 first
+.src_ref 8 "superkernels.cpp" 524 30 first
+ 8948 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8949 "10000010" // /* MW 5 */
+ 8950 "00110010" // /* MW 4 */
+ 8951 "00111010" // /* MW 3 */
+ 8952 "11100100" // /* MW 2 */
+ 8953 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8954 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8955 "00011100" // /* MW 3 */
+ 8956 "00110111" // /* MW 2 */
+ 8957 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8958 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8959 "00000010" // /* MW 3 */
+ 8960 "11100111" // /* MW 2 */
+ 8961 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 42
+ 8962 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8963 "00011100" // /* MW 3 */
+ 8964 "10110111" // /* MW 2 */
+ 8965 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8966 "00011000" // SEL.EQZ r17, r24, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8967 "00110010" // /* MW 3 */
+ 8968 "00100011" // /* MW 2 */
+ 8969 "00010110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 65 first
+ 8970 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8971 "00010001" // /* MW 3 */
+ 8972 "00100101" // /* MW 2 */
+ 8973 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 526 36 first
+ 8974 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8975 "00001000" // /* MW 3 */
+ 8976 "01100001" // /* MW 2 */
+ 8977 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 8978 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */
+ 8979 "00000001" // /* MW 5 */
+ 8980 "01000000" // /* MW 4 */
+ 8981 "01000000" // /* MW 3 */
+ 8982 "00010010" // /* MW 2 */
+ 8983 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 32
+.delay_slot
+ 8984 "01000100" // MOVXM p6, #509200 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8985 "00100000" // /* MW 5 */
+ 8986 "11001010" // /* MW 4 */
+ 8987 "11001100" // /* MW 3 */
+ 8988 "00000111" // /* MW 2 */
+ 8989 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 32 first
+.delay_slot
+ 8990 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8991 "01010001" // /* MW 3 */
+ 8992 "00000110" // /* MW 2 */
+ 8993 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8995 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8997 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8999 "00000000" // /* MW 1 */
+ 9000 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */
+ 9001 "00000000" // /* MW 5 */
+ 9002 "00000000" // /* MW 4 */
+ 9003 "11111000" // /* MW 3 */
+ 9004 "00010001" // /* MW 2 */
+ 9005 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.src_ref 8 "superkernels.cpp" 558 19
+.delay_slot
+ 9006 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9007 "00010000" // /* MW 9 */
+ 9008 "01101000" // /* MW 8 */
+ 9009 "10110010" // /* MW 7 */
+ 9010 "11110011" // /* MW 6 */
+ 9011 "00000001" // /* MW 5 */
+ 9012 "00000000" // /* MW 4 */
+ 9013 "00000000" // /* MW 3 */
+ 9014 "01001110" // /* MW 2 */
+ 9015 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9016 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9017 "00010000" // /* MW 9 */
+ 9018 "00100000" // /* MW 8 */
+ 9019 "00110010" // /* MW 7 */
+ 9020 "11110001" // /* MW 6 */
+ 9021 "00000001" // /* MW 5 */
+ 9022 "00000000" // /* MW 4 */
+ 9023 "00000000" // /* MW 3 */
+ 9024 "00101111" // /* MW 2 */
+ 9025 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9026 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9027 "00000001" // /* MW 3 */
+ 9028 "00011010" // /* MW 2 */
+ 9029 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9031 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9032 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9033 "00011100" // /* MW 7 */
+ 9034 "00000000" // /* MW 6 */
+ 9035 "00000000" // /* MW 5 */
+ 9036 "00000100" // /* MW 4 */
+ 9037 "11110000" // /* MW 3 */
+ 9038 "00101100" // /* MW 2 */
+ 9039 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504
+ 9040 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */
+ 9041 "00000000" // /* MW 5 */
+ 9042 "00000000" // /* MW 4 */
+ 9043 "11111000" // /* MW 3 */
+ 9044 "00010001" // /* MW 2 */
+ 9045 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.src_ref 8 "superkernels.cpp" 558 19
+.delay_slot
+ 9046 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9047 "00010000" // /* MW 9 */
+ 9048 "01101000" // /* MW 8 */
+ 9049 "10110010" // /* MW 7 */
+ 9050 "11110011" // /* MW 6 */
+ 9051 "00000001" // /* MW 5 */
+ 9052 "00000000" // /* MW 4 */
+ 9053 "00000000" // /* MW 3 */
+ 9054 "01001110" // /* MW 2 */
+ 9055 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9056 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9057 "00010000" // /* MW 9 */
+ 9058 "00100000" // /* MW 8 */
+ 9059 "00110010" // /* MW 7 */
+ 9060 "11110001" // /* MW 6 */
+ 9061 "00000001" // /* MW 5 */
+ 9062 "00000000" // /* MW 4 */
+ 9063 "00000000" // /* MW 3 */
+ 9064 "00101111" // /* MW 2 */
+ 9065 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9066 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9067 "00000001" // /* MW 3 */
+ 9068 "00011010" // /* MW 2 */
+ 9069 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9073 "00000000" // /* MW 15 */
+ 9074 "00000000" // /* MW 14 */
+ 9075 "01111000" // /* MW 13 */
+ 9076 "10100101" // /* MW 12 */
+ 9077 "00000001" // /* MW 11 */
+ 9078 "00000000" // /* MW 10 */
+ 9079 "00000000" // /* MW 9 */
+ 9080 "00000000" // /* MW 8 */
+ 9081 "01011011" // /* MW 7 */
+ 9082 "00000001" // /* MW 6 */
+ 9083 "00100000" // /* MW 5 */
+ 9084 "00000000" // /* MW 4 */
+ 9085 "11110000" // /* MW 3 */
+ 9086 "00101100" // /* MW 2 */
+ 9087 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552
+.src_ref 8 "superkernels.cpp" 532 27
+.src_ref 8 "superkernels.cpp" 533 31
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+ 9088 "10111010" // MOVA r13, #0; MOVXM p6, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9089 "00010000" // /* MW 9 */
+ 9090 "01110010" // /* MW 8 */
+ 9091 "00110010" // /* MW 7 */
+ 9092 "11110011" // /* MW 6 */
+ 9093 "00000001" // /* MW 5 */
+ 9094 "00000000" // /* MW 4 */
+ 9095 "00000000" // /* MW 3 */
+ 9096 "00001101" // /* MW 2 */
+ 9097 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 27 first
+.src_ref 8 "superkernels.cpp" 532 63
+.src_ref 8 "superkernels.cpp" 552 2
+ 9098 "10111010" // LDA r18, [p6]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9099 "00010000" // /* MW 9 */
+ 9100 "00100000" // /* MW 8 */
+ 9101 "00110010" // /* MW 7 */
+ 9102 "11110001" // /* MW 6 */
+ 9103 "00000001" // /* MW 5 */
+ 9104 "00000000" // /* MW 4 */
+ 9105 "11010000" // /* MW 3 */
+ 9106 "11001010" // /* MW 2 */
+ 9107 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 63
+.src_ref 8 "superkernels.cpp" 533 46
+ 9108 "10111010" // LDA r16, [p2]; MOVXM p6, #509144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9109 "00010000" // /* MW 9 */
+ 9110 "01101100" // /* MW 8 */
+ 9111 "00110010" // /* MW 7 */
+ 9112 "11110011" // /* MW 6 */
+ 9113 "00000001" // /* MW 5 */
+ 9114 "00000000" // /* MW 4 */
+ 9115 "11010000" // /* MW 3 */
+ 9116 "11000010" // /* MW 2 */
+ 9117 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 46 first
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+ 9118 "00101100" // LDA r17, [p6]; MOVX r15, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9119 "00001010" // /* MW 5 */
+ 9120 "00111100" // /* MW 4 */
+ 9121 "11010000" // /* MW 3 */
+ 9122 "11000110" // /* MW 2 */
+ 9123 "11000000" // /* MW 1 */
+ 9124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9125 "00000000" // /* MW 1 */
+ 9126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9127 "00000000" // /* MW 1 */
+ 9128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9129 "00000000" // /* MW 1 */
+ 9130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9131 "00000000" // /* MW 1 */
+ 9132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9133 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 39 first
+ 9134 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9135 "00001111" // /* MW 3 */
+ 9136 "10100101" // /* MW 2 */
+ 9137 "00010100" // /* MW 1 */
+ 9138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9139 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31 first
+.src_ref 8 "superkernels.cpp" 533 31 first
+ 9140 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9141 "10000010" // /* MW 5 */
+ 9142 "00110010" // /* MW 4 */
+ 9143 "00111010" // /* MW 3 */
+ 9144 "11100100" // /* MW 2 */
+ 9145 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9146 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9147 "00011100" // /* MW 3 */
+ 9148 "00110111" // /* MW 2 */
+ 9149 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9150 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9151 "00000010" // /* MW 3 */
+ 9152 "11100111" // /* MW 2 */
+ 9153 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 44
+ 9154 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9155 "00011100" // /* MW 3 */
+ 9156 "10110111" // /* MW 2 */
+ 9157 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9158 "00011000" // SEL.EQZ r17, r13, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9159 "00110010" // /* MW 3 */
+ 9160 "01100011" // /* MW 2 */
+ 9161 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 67 first
+ 9162 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9163 "00010001" // /* MW 3 */
+ 9164 "00100101" // /* MW 2 */
+ 9165 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 535 37 first
+ 9166 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9167 "00001000" // /* MW 3 */
+ 9168 "01100001" // /* MW 2 */
+ 9169 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 9170 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */
+ 9171 "00000001" // /* MW 5 */
+ 9172 "01000000" // /* MW 4 */
+ 9173 "01000000" // /* MW 3 */
+ 9174 "00010010" // /* MW 2 */
+ 9175 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 33
+.delay_slot
+ 9176 "01000100" // MOVXM p6, #509208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9177 "00110000" // /* MW 5 */
+ 9178 "11001010" // /* MW 4 */
+ 9179 "11001100" // /* MW 3 */
+ 9180 "00000111" // /* MW 2 */
+ 9181 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 33 first
+.delay_slot
+ 9182 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9183 "01010001" // /* MW 3 */
+ 9184 "00000110" // /* MW 2 */
+ 9185 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9187 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9189 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.delay_slot
+ 9190 "10111010" // NOPA; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9191 "00010000" // /* MW 9 */
+ 9192 "01101000" // /* MW 8 */
+ 9193 "10110010" // /* MW 7 */
+ 9194 "11110011" // /* MW 6 */
+ 9195 "00000001" // /* MW 5 */
+ 9196 "00000000" // /* MW 4 */
+ 9197 "11110000" // /* MW 3 */
+ 9198 "00101100" // /* MW 2 */
+ 9199 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9200 "00111010" // MOVS p6, r12; J #9408 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */
+ 9201 "00100001" // /* MW 9 */
+ 9202 "00000000" // /* MW 8 */
+ 9203 "00000000" // /* MW 7 */
+ 9204 "10011000" // /* MW 6 */
+ 9205 "00000100" // /* MW 5 */
+ 9206 "00000000" // /* MW 4 */
+ 9207 "01100000" // /* MW 3 */
+ 9208 "10000001" // /* MW 2 */
+ 9209 "11010001" // /* MW 1 */
+.delay_slot
+ 9210 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9211 "10010001" // /* MW 3 */
+ 9212 "11100101" // /* MW 2 */
+ 9213 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9215 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9217 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9219 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9220 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9221 "10000001" // /* MW 11 */
+ 9222 "10101101" // /* MW 10 */
+ 9223 "00000000" // /* MW 9 */
+ 9224 "00000000" // /* MW 8 */
+ 9225 "00000000" // /* MW 7 */
+ 9226 "00000000" // /* MW 6 */
+ 9227 "00100000" // /* MW 5 */
+ 9228 "00000000" // /* MW 4 */
+ 9229 "11110000" // /* MW 3 */
+ 9230 "00101100" // /* MW 2 */
+ 9231 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696
+.src_ref 8 "superkernels.cpp" 541 26
+ 9232 "01000100" // MOVXM p6, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9233 "10001000" // /* MW 5 */
+ 9234 "11001001" // /* MW 4 */
+ 9235 "11001100" // /* MW 3 */
+ 9236 "00000111" // /* MW 2 */
+ 9237 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 26 first
+.src_ref 8 "superkernels.cpp" 541 61
+ 9238 "10111010" // LDA r19, [p6]; MOVXM p6, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9239 "00010000" // /* MW 9 */
+ 9240 "00100010" // /* MW 8 */
+ 9241 "00110010" // /* MW 7 */
+ 9242 "11110011" // /* MW 6 */
+ 9243 "00000001" // /* MW 5 */
+ 9244 "00000000" // /* MW 4 */
+ 9245 "11010000" // /* MW 3 */
+ 9246 "11001110" // /* MW 2 */
+ 9247 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 61
+.src_ref 8 "superkernels.cpp" 542 44
+ 9248 "10111010" // LDA r16, [p6]; MOVXM p6, #509148 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9249 "00010000" // /* MW 9 */
+ 9250 "01101110" // /* MW 8 */
+ 9251 "00110010" // /* MW 7 */
+ 9252 "11110011" // /* MW 6 */
+ 9253 "00000001" // /* MW 5 */
+ 9254 "00000000" // /* MW 4 */
+ 9255 "11010000" // /* MW 3 */
+ 9256 "11000010" // /* MW 2 */
+ 9257 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 44 first
+ 9258 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9259 "01010110" // /* MW 3 */
+ 9260 "00000110" // /* MW 2 */
+ 9261 "00000110" // /* MW 1 */
+ 9262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9263 "00000000" // /* MW 1 */
+ 9264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9265 "00000000" // /* MW 1 */
+ 9266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9267 "00000000" // /* MW 1 */
+ 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9269 "00000000" // /* MW 1 */
+ 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9271 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 37 first
+ 9272 "10011000" // MUL r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9273 "00001111" // /* MW 3 */
+ 9274 "11100111" // /* MW 2 */
+ 9275 "00010100" // /* MW 1 */
+ 9276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9277 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30 first
+.src_ref 8 "superkernels.cpp" 542 30 first
+ 9278 "10100100" // SUB r20, r18, r19; ADD.NC r21, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9279 "10000010" // /* MW 5 */
+ 9280 "10110011" // /* MW 4 */
+ 9281 "00111010" // /* MW 3 */
+ 9282 "00100110" // /* MW 2 */
+ 9283 "10010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9284 "10011000" // LTU r27, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9285 "00101100" // /* MW 3 */
+ 9286 "01110111" // /* MW 2 */
+ 9287 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9288 "00011000" // SEL.EQZ r20, r20, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9289 "00000010" // /* MW 3 */
+ 9290 "00101001" // /* MW 2 */
+ 9291 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+.src_ref 8 "superkernels.cpp" 542 42
+ 9292 "01100100" // LTU r27, r19, r18; MOV r17, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9293 "00000001" // /* MW 5 */
+ 9294 "10100000" // /* MW 4 */
+ 9295 "10011000" // /* MW 3 */
+ 9296 "11100101" // /* MW 2 */
+ 9297 "10011110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9298 "00011000" // SEL.EQZ r17, r17, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9299 "01000010" // /* MW 3 */
+ 9300 "01100011" // /* MW 2 */
+ 9301 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 69 first
+ 9302 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9303 "00010001" // /* MW 3 */
+ 9304 "00100101" // /* MW 2 */
+ 9305 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 544 38 first
+ 9306 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9307 "00000111" // /* MW 3 */
+ 9308 "01100001" // /* MW 2 */
+ 9309 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 9310 "10000100" // JNZ r16, #10176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10176 delay_slots=5 */
+ 9311 "00000001" // /* MW 5 */
+ 9312 "01000000" // /* MW 4 */
+ 9313 "11100000" // /* MW 3 */
+ 9314 "00010011" // /* MW 2 */
+ 9315 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 34
+.delay_slot
+ 9316 "01000100" // MOVXM p6, #509216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9317 "01000000" // /* MW 5 */
+ 9318 "11001010" // /* MW 4 */
+ 9319 "11001100" // /* MW 3 */
+ 9320 "00000111" // /* MW 2 */
+ 9321 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 34 first
+.delay_slot
+ 9322 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9323 "01010001" // /* MW 3 */
+ 9324 "00000110" // /* MW 2 */
+ 9325 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9327 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9329 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 9330 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 9331 "00011100" // /* MW 13 */
+ 9332 "00000000" // /* MW 12 */
+ 9333 "00000000" // /* MW 11 */
+ 9334 "01010111" // /* MW 10 */
+ 9335 "00011010" // /* MW 9 */
+ 9336 "01000000" // /* MW 8 */
+ 9337 "00000000" // /* MW 7 */
+ 9338 "00000000" // /* MW 6 */
+ 9339 "10110110" // /* MW 5 */
+ 9340 "00000010" // /* MW 4 */
+ 9341 "11110000" // /* MW 3 */
+ 9342 "00101100" // /* MW 2 */
+ 9343 "00000000" // /* MW 1 */
+.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 8 "superkernels.cpp" 558 19
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 9344 "01110110" // LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9345 "01011000" // /* MW 11 */
+ 9346 "00000001" // /* MW 10 */
+ 9347 "11101000" // /* MW 9 */
+ 9348 "01001001" // /* MW 8 */
+ 9349 "11100000" // /* MW 7 */
+ 9350 "00000000" // /* MW 6 */
+ 9351 "00001011" // /* MW 5 */
+ 9352 "10001100" // /* MW 4 */
+ 9353 "00100110" // /* MW 3 */
+ 9354 "10000011" // /* MW 2 */
+ 9355 "11110111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9356 "00011000" // LDA p1, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9357 "10011001" // /* MW 3 */
+ 9358 "10111100" // /* MW 2 */
+ 9359 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9360 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9361 "10010001" // /* MW 3 */
+ 9362 "11100101" // /* MW 2 */
+ 9363 "00000111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 287 11 first
+.aggressive_scheduled_block_id 7
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9364 "00000100" // JL #4176 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4176 delay_slots=5 */
+ 9365 "00000001" // /* MW 5 */
+ 9366 "00000000" // /* MW 4 */
+ 9367 "00101000" // /* MW 3 */
+ 9368 "00001000" // /* MW 2 */
+ 9369 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9370 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9371 "11000000" // /* MW 3 */
+ 9372 "01100000" // /* MW 2 */
+ 9373 "00011111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9374 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9375 "00000001" // /* MW 3 */
+ 9376 "00011010" // /* MW 2 */
+ 9377 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9379 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9381 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 287 11
+.delay_slot
+ 9382 "10111010" // NOPA; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9383 "00010000" // /* MW 9 */
+ 9384 "10000000" // /* MW 8 */
+ 9385 "00110010" // /* MW 7 */
+ 9386 "11110001" // /* MW 6 */
+ 9387 "00000001" // /* MW 5 */
+ 9388 "00000000" // /* MW 4 */
+ 9389 "11110000" // /* MW 3 */
+ 9390 "00101100" // /* MW 2 */
+ 9391 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 552 2
+.return_address
+ 9392 "00111010" // MOVS p0, p7; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9393 "00010001" // /* MW 9 */
+ 9394 "00100000" // /* MW 8 */
+ 9395 "00110010" // /* MW 7 */
+ 9396 "11110001" // /* MW 6 */
+ 9397 "00000001" // /* MW 5 */
+ 9398 "00000000" // /* MW 4 */
+ 9399 "01100000" // /* MW 3 */
+ 9400 "10010001" // /* MW 2 */
+ 9401 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+ 9402 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9403 "10100000" // /* MW 5 */
+ 9404 "11001001" // /* MW 4 */
+ 9405 "11001110" // /* MW 3 */
+ 9406 "00000111" // /* MW 2 */
+ 9407 "00000000" // /* MW 1 */
+.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9408 "10011000" // LDA p1, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9409 "10011110" // /* MW 3 */
+ 9410 "01011100" // /* MW 2 */
+ 9411 "00000110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2 first
+.no_stack_arguments
+ 9412 "00000100" // JL #4848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4848 delay_slots=5 */
+ 9413 "00000001" // /* MW 5 */
+ 9414 "00000000" // /* MW 4 */
+ 9415 "01111000" // /* MW 3 */
+ 9416 "00001001" // /* MW 2 */
+ 9417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9421 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9423 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 9427 "00011100" // /* MW 13 */
+ 9428 "00000000" // /* MW 12 */
+ 9429 "00000000" // /* MW 11 */
+ 9430 "01010111" // /* MW 10 */
+ 9431 "00011010" // /* MW 9 */
+ 9432 "01000000" // /* MW 8 */
+ 9433 "00000000" // /* MW 7 */
+ 9434 "00000000" // /* MW 6 */
+ 9435 "10110110" // /* MW 5 */
+ 9436 "00000010" // /* MW 4 */
+ 9437 "11110000" // /* MW 3 */
+ 9438 "00101100" // /* MW 2 */
+ 9439 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7 first
+.return_address
+ 9440 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9441 "00010110" // /* MW 3 */
+ 9442 "00000110" // /* MW 2 */
+ 9443 "00000111" // /* MW 1 */
+ 9444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9445 "00000000" // /* MW 1 */
+ 9446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9447 "00000000" // /* MW 1 */
+ 9448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9449 "00000000" // /* MW 1 */
+ 9450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9451 "00000000" // /* MW 1 */
+ 9452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9453 "00000000" // /* MW 1 */
+ 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9455 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 19
+ 9456 "10011000" // NE r17, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9457 "00001000" // /* MW 3 */
+ 9458 "11100011" // /* MW 2 */
+ 9459 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 25
+ 9460 "10000100" // JNZ r17, #9664 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9664 delay_slots=5 */
+ 9461 "00000001" // /* MW 5 */
+ 9462 "01000000" // /* MW 4 */
+ 9463 "11100000" // /* MW 3 */
+ 9464 "00010010" // /* MW 2 */
+ 9465 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9475 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 555 15
+ 9476 "01000100" // MOVXM p7, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9477 "10001000" // /* MW 5 */
+ 9478 "11001001" // /* MW 4 */
+ 9479 "11001110" // /* MW 3 */
+ 9480 "00000111" // /* MW 2 */
+ 9481 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 554 67
+ 9482 "10111010" // LDA r16, [p7]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9483 "00010000" // /* MW 9 */
+ 9484 "00110000" // /* MW 8 */
+ 9485 "00110010" // /* MW 7 */
+ 9486 "11110001" // /* MW 6 */
+ 9487 "00000001" // /* MW 5 */
+ 9488 "00000000" // /* MW 4 */
+ 9489 "11010000" // /* MW 3 */
+ 9490 "11000010" // /* MW 2 */
+ 9491 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 67
+ 9492 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9493 "00111010" // /* MW 3 */
+ 9494 "00000100" // /* MW 2 */
+ 9495 "00000010" // /* MW 1 */
+ 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9497 "00000000" // /* MW 1 */
+ 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9499 "00000000" // /* MW 1 */
+ 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9501 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.no_stack_arguments
+ 9502 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9503 "00000001" // /* MW 5 */
+ 9504 "00000000" // /* MW 4 */
+ 9505 "11111000" // /* MW 3 */
+ 9506 "00010011" // /* MW 2 */
+ 9507 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9509 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.delay_slot
+ 9510 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9511 "00000111" // /* MW 3 */
+ 9512 "00100000" // /* MW 2 */
+ 9513 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9514 "01011100" // ST r16, [p7]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9515 "10110101" // /* MW 5 */
+ 9516 "01101101" // /* MW 4 */
+ 9517 "00111000" // /* MW 3 */
+ 9518 "11000010" // /* MW 2 */
+ 9519 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9520 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9521 "01000001" // /* MW 5 */
+ 9522 "10111011" // /* MW 4 */
+ 9523 "00110111" // /* MW 3 */
+ 9524 "01100000" // /* MW 2 */
+ 9525 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9526 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9527 "00010010" // /* MW 9 */
+ 9528 "00000001" // /* MW 8 */
+ 9529 "00000100" // /* MW 7 */
+ 9530 "00000000" // /* MW 6 */
+ 9531 "01011011" // /* MW 5 */
+ 9532 "00000001" // /* MW 4 */
+ 9533 "11110000" // /* MW 3 */
+ 9534 "00101100" // /* MW 2 */
+ 9535 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 9536 "10111010" // LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9537 "01111000" // /* MW 9 */
+ 9538 "11010000" // /* MW 8 */
+ 9539 "01101011" // /* MW 7 */
+ 9540 "10001111" // /* MW 6 */
+ 9541 "00000001" // /* MW 5 */
+ 9542 "00011011" // /* MW 4 */
+ 9543 "00100000" // /* MW 3 */
+ 9544 "10100011" // /* MW 2 */
+ 9545 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+ 9546 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9547 "00000010" // /* MW 3 */
+ 9548 "11100001" // /* MW 2 */
+ 9549 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 6
+.src_ref 8 "superkernels.cpp" 554 78
+ 9550 "10000100" // JNZ r16, #9632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9632 delay_slots=5 */
+ 9551 "00000001" // /* MW 5 */
+ 9552 "01000000" // /* MW 4 */
+ 9553 "11010000" // /* MW 3 */
+ 9554 "00010010" // /* MW 2 */
+ 9555 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9556 "00011000" // MOVX r15, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9557 "00000101" // /* MW 3 */
+ 9558 "00011110" // /* MW 2 */
+ 9559 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9561 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9563 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9565 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9567 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 555 15 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9568 "00001100" // LDA r16, [p2, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9569 "01100011" // /* MW 5 */
+ 9570 "00001011" // /* MW 4 */
+ 9571 "11011110" // /* MW 3 */
+ 9572 "11000010" // /* MW 2 */
+ 9573 "01001010" // /* MW 1 */
+ 9574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9575 "00000000" // /* MW 1 */
+ 9576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9577 "00000000" // /* MW 1 */
+ 9578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9579 "00000000" // /* MW 1 */
+ 9580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9581 "00000000" // /* MW 1 */
+ 9582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9583 "00000000" // /* MW 1 */
+ 9584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9585 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9586 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9587 "11111000" // /* MW 3 */
+ 9588 "00010000" // /* MW 2 */
+ 9589 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 7
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9590 "10111010" // LDA r16, [p6, #-8]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9591 "00010000" // /* MW 9 */
+ 9592 "01101000" // /* MW 8 */
+ 9593 "10110010" // /* MW 7 */
+ 9594 "11110011" // /* MW 6 */
+ 9595 "00000001" // /* MW 5 */
+ 9596 "00000000" // /* MW 4 */
+ 9597 "11010000" // /* MW 3 */
+ 9598 "11000010" // /* MW 2 */
+ 9599 "11011100" // /* MW 1 */
+ 9600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9601 "00000000" // /* MW 1 */
+ 9602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9603 "00000000" // /* MW 1 */
+ 9604 "10000100" // J #9648 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9648 delay_slots=5 */
+ 9605 "00000000" // /* MW 5 */
+ 9606 "00000000" // /* MW 4 */
+ 9607 "11011000" // /* MW 3 */
+ 9608 "00010010" // /* MW 2 */
+ 9609 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.delay_slot
+ 9616 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9617 "00000001" // /* MW 3 */
+ 9618 "11100001" // /* MW 2 */
+ 9619 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.delay_slot
+ 9620 "00110110" // NOPA; NOPB; ST r16, [p6, #-8]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9621 "11000001" // /* MW 11 */
+ 9622 "00001000" // /* MW 10 */
+ 9623 "01110011" // /* MW 9 */
+ 9624 "00000011" // /* MW 8 */
+ 9625 "00000000" // /* MW 7 */
+ 9626 "00000000" // /* MW 6 */
+ 9627 "00100000" // /* MW 5 */
+ 9628 "00000000" // /* MW 4 */
+ 9629 "11110000" // /* MW 3 */
+ 9630 "00101100" // /* MW 2 */
+ 9631 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096
+.src_ref 8 "superkernels.cpp" 558 7
+ 9632 "11100001" // NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9633 "00000000" // /* MW 15 */
+ 9634 "00000000" // /* MW 14 */
+ 9635 "00010000" // /* MW 13 */
+ 9636 "01101000" // /* MW 12 */
+ 9637 "10110010" // /* MW 11 */
+ 9638 "11110011" // /* MW 10 */
+ 9639 "00000001" // /* MW 9 */
+ 9640 "00000000" // /* MW 8 */
+ 9641 "01011011" // /* MW 7 */
+ 9642 "00000001" // /* MW 6 */
+ 9643 "00100000" // /* MW 5 */
+ 9644 "00000000" // /* MW 4 */
+ 9645 "11110000" // /* MW 3 */
+ 9646 "00101100" // /* MW 2 */
+ 9647 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112
+.src_ref 8 "superkernels.cpp" 558 7 first
+ 9648 "11100001" // LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9649 "00000000" // /* MW 15 */
+ 9650 "00000000" // /* MW 14 */
+ 9651 "01111000" // /* MW 13 */
+ 9652 "10100101" // /* MW 12 */
+ 9653 "00000001" // /* MW 11 */
+ 9654 "00000000" // /* MW 10 */
+ 9655 "00000000" // /* MW 9 */
+ 9656 "00000000" // /* MW 8 */
+ 9657 "01011011" // /* MW 7 */
+ 9658 "00000001" // /* MW 6 */
+ 9659 "00100000" // /* MW 5 */
+ 9660 "00000000" // /* MW 4 */
+ 9661 "11010000" // /* MW 3 */
+ 9662 "11000010" // /* MW 2 */
+ 9663 "11100000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128
+.src_ref 8 "superkernels.cpp" 558 43
+ 9664 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9665 "00000001" // /* MW 3 */
+ 9666 "00100010" // /* MW 2 */
+ 9667 "00010000" // /* MW 1 */
+ 9668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9669 "00000000" // /* MW 1 */
+ 9670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9671 "00000000" // /* MW 1 */
+ 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9673 "00000000" // /* MW 1 */
+ 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9675 "00000000" // /* MW 1 */
+ 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9677 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 19
+ 9678 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9679 "00001000" // /* MW 3 */
+ 9680 "10100001" // /* MW 2 */
+ 9681 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 25
+ 9682 "10000100" // JNZ r16, #9872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9872 delay_slots=5 */
+ 9683 "00000001" // /* MW 5 */
+ 9684 "01000000" // /* MW 4 */
+ 9685 "01001000" // /* MW 3 */
+ 9686 "00010011" // /* MW 2 */
+ 9687 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.src_ref 8 "superkernels.cpp" 559 15
+.delay_slot
+ 9688 "01000100" // MOVXM p7, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9689 "11000000" // /* MW 5 */
+ 9690 "11001001" // /* MW 4 */
+ 9691 "11001110" // /* MW 3 */
+ 9692 "00000111" // /* MW 2 */
+ 9693 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 67
+.delay_slot
+ 9694 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9695 "11000000" // /* MW 5 */
+ 9696 "11001000" // /* MW 4 */
+ 9697 "11000100" // /* MW 3 */
+ 9698 "00000111" // /* MW 2 */
+ 9699 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9701 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9703 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9705 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+ 9706 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9707 "00010110" // /* MW 3 */
+ 9708 "00000110" // /* MW 2 */
+ 9709 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 67
+ 9710 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9711 "00111010" // /* MW 3 */
+ 9712 "00000100" // /* MW 2 */
+ 9713 "00000010" // /* MW 1 */
+ 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9715 "00000000" // /* MW 1 */
+ 9716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9717 "00000000" // /* MW 1 */
+ 9718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9719 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.no_stack_arguments
+ 9720 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9721 "00000001" // /* MW 5 */
+ 9722 "00000000" // /* MW 4 */
+ 9723 "11111000" // /* MW 3 */
+ 9724 "00010011" // /* MW 2 */
+ 9725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9727 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.delay_slot
+ 9728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9729 "00000111" // /* MW 3 */
+ 9730 "00100000" // /* MW 2 */
+ 9731 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9732 "01011100" // ST r16, [p7]; LT r27, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9733 "00110101" // /* MW 5 */
+ 9734 "01101110" // /* MW 4 */
+ 9735 "00111000" // /* MW 3 */
+ 9736 "11000010" // /* MW 2 */
+ 9737 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9738 "11100100" // SUB r17, r17, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9739 "01000001" // /* MW 5 */
+ 9740 "00111011" // /* MW 4 */
+ 9741 "00110111" // /* MW 3 */
+ 9742 "01100000" // /* MW 2 */
+ 9743 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9744 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9745 "00000000" // /* MW 15 */
+ 9746 "00000000" // /* MW 14 */
+ 9747 "01111000" // /* MW 13 */
+ 9748 "10100101" // /* MW 12 */
+ 9749 "00000001" // /* MW 11 */
+ 9750 "10010000" // /* MW 10 */
+ 9751 "00001000" // /* MW 9 */
+ 9752 "00100000" // /* MW 8 */
+ 9753 "01011011" // /* MW 7 */
+ 9754 "00000001" // /* MW 6 */
+ 9755 "00100000" // /* MW 5 */
+ 9756 "00000000" // /* MW 4 */
+ 9757 "11110000" // /* MW 3 */
+ 9758 "00101100" // /* MW 2 */
+ 9759 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 9760 "10111010" // LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9761 "01111000" // /* MW 9 */
+ 9762 "10010000" // /* MW 8 */
+ 9763 "01101011" // /* MW 7 */
+ 9764 "10001111" // /* MW 6 */
+ 9765 "00000001" // /* MW 5 */
+ 9766 "00011011" // /* MW 4 */
+ 9767 "00100000" // /* MW 3 */
+ 9768 "10010011" // /* MW 2 */
+ 9769 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+ 9770 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9771 "00000010" // /* MW 3 */
+ 9772 "11100001" // /* MW 2 */
+ 9773 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 6
+.src_ref 8 "superkernels.cpp" 558 78
+ 9774 "10000100" // JNZ r16, #9840 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9840 delay_slots=5 */
+ 9775 "00000001" // /* MW 5 */
+ 9776 "01000000" // /* MW 4 */
+ 9777 "00111000" // /* MW 3 */
+ 9778 "00010011" // /* MW 2 */
+ 9779 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 7
+.delay_slot
+ 9780 "01000100" // MOVXM p2, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9781 "10100000" // /* MW 5 */
+ 9782 "11001001" // /* MW 4 */
+ 9783 "11000100" // /* MW 3 */
+ 9784 "00000111" // /* MW 2 */
+ 9785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9789 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9791 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9793 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 559 15 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9794 "00001100" // LDA r16, [p1, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9795 "01100011" // /* MW 5 */
+ 9796 "00001011" // /* MW 4 */
+ 9797 "11011110" // /* MW 3 */
+ 9798 "11000010" // /* MW 2 */
+ 9799 "00101010" // /* MW 1 */
+ 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9801 "00000000" // /* MW 1 */
+ 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9803 "00000000" // /* MW 1 */
+ 9804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9805 "00000000" // /* MW 1 */
+ 9806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9807 "00000000" // /* MW 1 */
+ 9808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9809 "00000000" // /* MW 1 */
+ 9810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9812 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9813 "11111000" // /* MW 3 */
+ 9814 "00010000" // /* MW 2 */
+ 9815 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9816 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9817 "00010110" // /* MW 3 */
+ 9818 "11100110" // /* MW 2 */
+ 9819 "00000110" // /* MW 1 */
+ 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9821 "00000000" // /* MW 1 */
+ 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9823 "00000000" // /* MW 1 */
+ 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9825 "00000000" // /* MW 1 */
+ 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9827 "00000000" // /* MW 1 */
+ 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9829 "00000000" // /* MW 1 */
+ 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9831 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 9832 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9833 "00000001" // /* MW 3 */
+ 9834 "11100001" // /* MW 2 */
+ 9835 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 9836 "10011000" // ST r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9837 "00010001" // /* MW 3 */
+ 9838 "11100110" // /* MW 2 */
+ 9839 "00001110" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304
+ 9840 "10000100" // J #9888 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9888 delay_slots=5 */
+ 9841 "00000000" // /* MW 5 */
+ 9842 "00000000" // /* MW 4 */
+ 9843 "01010000" // /* MW 3 */
+ 9844 "00010011" // /* MW 2 */
+ 9845 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.delay_slot
+ 9846 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9847 "11000000" // /* MW 3 */
+ 9848 "01100010" // /* MW 2 */
+ 9849 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9851 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9853 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9855 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9856 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9857 "00000000" // /* MW 15 */
+ 9858 "00000000" // /* MW 14 */
+ 9859 "01111000" // /* MW 13 */
+ 9860 "10100101" // /* MW 12 */
+ 9861 "00000001" // /* MW 11 */
+ 9862 "00000000" // /* MW 10 */
+ 9863 "00000000" // /* MW 9 */
+ 9864 "00000000" // /* MW 8 */
+ 9865 "01011011" // /* MW 7 */
+ 9866 "00000001" // /* MW 6 */
+ 9867 "00100000" // /* MW 5 */
+ 9868 "00000000" // /* MW 4 */
+ 9869 "11110000" // /* MW 3 */
+ 9870 "00101100" // /* MW 2 */
+ 9871 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336
+.src_ref 8 "superkernels.cpp" 562 7
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9872 "11100001" // LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9873 "00000000" // /* MW 15 */
+ 9874 "00000000" // /* MW 14 */
+ 9875 "00010000" // /* MW 13 */
+ 9876 "01101000" // /* MW 12 */
+ 9877 "00110010" // /* MW 11 */
+ 9878 "11110001" // /* MW 10 */
+ 9879 "00000001" // /* MW 9 */
+ 9880 "00000000" // /* MW 8 */
+ 9881 "01011011" // /* MW 7 */
+ 9882 "00000001" // /* MW 6 */
+ 9883 "00100000" // /* MW 5 */
+ 9884 "00000000" // /* MW 4 */
+ 9885 "00100000" // /* MW 3 */
+ 9886 "11110011" // /* MW 2 */
+ 9887 "11111011" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352
+.src_ref 8 "superkernels.cpp" 562 7 first
+.src_ref 8 "superkernels.cpp" 562 19
+ 9888 "00101100" // LDA r16, [p2]; MOVX r17, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9889 "00100010" // /* MW 5 */
+ 9890 "01000100" // /* MW 4 */
+ 9891 "11010000" // /* MW 3 */
+ 9892 "11000010" // /* MW 2 */
+ 9893 "01000000" // /* MW 1 */
+ 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9895 "00000000" // /* MW 1 */
+ 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9897 "00000000" // /* MW 1 */
+ 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9899 "00000000" // /* MW 1 */
+ 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9901 "00000000" // /* MW 1 */
+ 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9903 "00000000" // /* MW 1 */
+ 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9905 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 19
+ 9906 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9907 "00001000" // /* MW 3 */
+ 9908 "01100001" // /* MW 2 */
+ 9909 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 25
+ 9910 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */
+ 9911 "00000001" // /* MW 5 */
+ 9912 "01000000" // /* MW 4 */
+ 9913 "10101000" // /* MW 3 */
+ 9914 "00010011" // /* MW 2 */
+ 9915 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.delay_slot
+ 9916 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9917 "11001000" // /* MW 5 */
+ 9918 "11001001" // /* MW 4 */
+ 9919 "11000100" // /* MW 3 */
+ 9920 "00000111" // /* MW 2 */
+ 9921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9929 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.src_ref 8 "superkernels.cpp" 562 68
+ 9930 "10111010" // LDA r16, [p2]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9931 "00010000" // /* MW 9 */
+ 9932 "00110000" // /* MW 8 */
+ 9933 "10110010" // /* MW 7 */
+ 9934 "11110000" // /* MW 6 */
+ 9935 "00000001" // /* MW 5 */
+ 9936 "00000000" // /* MW 4 */
+ 9937 "11010000" // /* MW 3 */
+ 9938 "11000010" // /* MW 2 */
+ 9939 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 68
+ 9940 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9941 "00111010" // /* MW 3 */
+ 9942 "00000100" // /* MW 2 */
+ 9943 "00000001" // /* MW 1 */
+ 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9945 "00000000" // /* MW 1 */
+ 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9947 "00000000" // /* MW 1 */
+ 9948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9949 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.no_stack_arguments
+ 9950 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9951 "00000001" // /* MW 5 */
+ 9952 "00000000" // /* MW 4 */
+ 9953 "11111000" // /* MW 3 */
+ 9954 "00010011" // /* MW 2 */
+ 9955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9957 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.delay_slot
+ 9958 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9959 "00000111" // /* MW 3 */
+ 9960 "00100000" // /* MW 2 */
+ 9961 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9962 "01011100" // ST r16, [p2]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9963 "10110101" // /* MW 5 */
+ 9964 "01101101" // /* MW 4 */
+ 9965 "00111000" // /* MW 3 */
+ 9966 "11000010" // /* MW 2 */
+ 9967 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9968 "11100100" // SUB r17, r13, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9969 "01000001" // /* MW 5 */
+ 9970 "00111011" // /* MW 4 */
+ 9971 "00110111" // /* MW 3 */
+ 9972 "01100000" // /* MW 2 */
+ 9973 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9974 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9975 "00010010" // /* MW 9 */
+ 9976 "00000001" // /* MW 8 */
+ 9977 "00000100" // /* MW 7 */
+ 9978 "00000000" // /* MW 6 */
+ 9979 "01011011" // /* MW 5 */
+ 9980 "00000001" // /* MW 4 */
+ 9981 "11110000" // /* MW 3 */
+ 9982 "00101100" // /* MW 2 */
+ 9983 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 562 44
+.return_address
+ 9984 "11100100" // SUB r16, r13, r3; MOV r27, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9985 "01000001" // /* MW 5 */
+ 9986 "10101110" // /* MW 4 */
+ 9987 "00111101" // /* MW 3 */
+ 9988 "00000110" // /* MW 2 */
+ 9989 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+ 9990 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9991 "00000010" // /* MW 3 */
+ 9992 "11100001" // /* MW 2 */
+ 9993 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 6
+.src_ref 8 "superkernels.cpp" 562 79
+ 9994 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */
+ 9995 "00000001" // /* MW 5 */
+ 9996 "01000000" // /* MW 4 */
+ 9997 "10101000" // /* MW 3 */
+ 9998 "00010011" // /* MW 2 */
+ 9999 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 563 16
+.delay_slot
+ 10000 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10001 "11001000" // /* MW 5 */
+ 10002 "11001001" // /* MW 4 */
+ 10003 "11000100" // /* MW 3 */
+ 10004 "00000111" // /* MW 2 */
+ 10005 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10007 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10013 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 563 16 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 10014 "00001100" // LDA r16, [p7, #20]; ST r13, [p2] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10015 "01100011" // /* MW 5 */
+ 10016 "00001011" // /* MW 4 */
+ 10017 "11010100" // /* MW 3 */
+ 10018 "11000010" // /* MW 2 */
+ 10019 "11101010" // /* MW 1 */
+ 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10021 "00000000" // /* MW 1 */
+ 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10023 "00000000" // /* MW 1 */
+ 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10025 "00000000" // /* MW 1 */
+ 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10027 "00000000" // /* MW 1 */
+ 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10029 "00000000" // /* MW 1 */
+ 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10031 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 10032 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10033 "11111000" // /* MW 3 */
+ 10034 "00010000" // /* MW 2 */
+ 10035 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 10036 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10037 "00010110" // /* MW 3 */
+ 10038 "11100110" // /* MW 2 */
+ 10039 "00000110" // /* MW 1 */
+ 10040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10041 "00000000" // /* MW 1 */
+ 10042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10043 "00000000" // /* MW 1 */
+ 10044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10045 "00000000" // /* MW 1 */
+ 10046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10047 "00000000" // /* MW 1 */
+ 10048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10049 "00000000" // /* MW 1 */
+ 10050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10051 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 10052 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10053 "00000001" // /* MW 3 */
+ 10054 "11100001" // /* MW 2 */
+ 10055 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 10056 "00000010" // ST r16, [p6, #-8]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10057 "01110000" // /* MW 7 */
+ 10058 "10100101" // /* MW 6 */
+ 10059 "00000001" // /* MW 5 */
+ 10060 "00000000" // /* MW 4 */
+ 10061 "00110000" // /* MW 3 */
+ 10062 "11000010" // /* MW 2 */
+ 10063 "11011100" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528
+.src_ref 8 "superkernels.cpp" 566 6
+.src_ref 8 "superkernels.cpp" 567 14
+ 10064 "01000100" // MOVXM p6, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10065 "10000000" // /* MW 5 */
+ 10066 "11001001" // /* MW 4 */
+ 10067 "11001100" // /* MW 3 */
+ 10068 "00000111" // /* MW 2 */
+ 10069 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 6 first
+.src_ref 8 "superkernels.cpp" 566 19
+ 10070 "10111010" // LDA r16, [p6]; MOVXM p2, #509160 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10071 "00010000" // /* MW 9 */
+ 10072 "01110100" // /* MW 8 */
+ 10073 "00110010" // /* MW 7 */
+ 10074 "11110001" // /* MW 6 */
+ 10075 "00000001" // /* MW 5 */
+ 10076 "00000000" // /* MW 4 */
+ 10077 "11010000" // /* MW 3 */
+ 10078 "11000010" // /* MW 2 */
+ 10079 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 19
+ 10080 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10081 "00110110" // /* MW 3 */
+ 10082 "00000110" // /* MW 2 */
+ 10083 "00000010" // /* MW 1 */
+ 10084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10085 "00000000" // /* MW 1 */
+ 10086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10087 "00000000" // /* MW 1 */
+ 10088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10089 "00000000" // /* MW 1 */
+ 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10091 "00000000" // /* MW 1 */
+ 10092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10093 "00000000" // /* MW 1 */
+ 10094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10095 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 16
+ 10096 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10097 "00001000" // /* MW 3 */
+ 10098 "01100001" // /* MW 2 */
+ 10099 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 6
+ 10100 "10000100" // JNZ r16, #10128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10128 delay_slots=5 */
+ 10101 "00000001" // /* MW 5 */
+ 10102 "01000000" // /* MW 4 */
+ 10103 "11001000" // /* MW 3 */
+ 10104 "00010011" // /* MW 2 */
+ 10105 "10000000" // /* MW 1 */
+.delay_slot
+ 10106 "00011000" // LDA p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10107 "10011001" // /* MW 3 */
+ 10108 "11101111" // /* MW 2 */
+ 10109 "00000111" // /* MW 1 */
+.delay_slot
+ 10110 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10111 "11110001" // /* MW 3 */
+ 10112 "11110001" // /* MW 2 */
+ 10113 "00000111" // /* MW 1 */
+.delay_slot
+ 10114 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10115 "11010001" // /* MW 3 */
+ 10116 "11110101" // /* MW 2 */
+ 10117 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10119 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10121 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 567 14 first
+ 10122 "00001100" // NOPA; ST r13, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10123 "01100011" // /* MW 5 */
+ 10124 "00001011" // /* MW 4 */
+ 10125 "11111100" // /* MW 3 */
+ 10126 "00101100" // /* MW 2 */
+ 10127 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592
+.src_ref 8 "superkernels.cpp" 569
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 10128 "11010100" // LDA r11, [sp, #-8]; MOV lr, r11 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10129 "01000001" // /* MW 5 */
+ 10130 "11101011" // /* MW 4 */
+ 10131 "00101110" // /* MW 3 */
+ 10132 "00101110" // /* MW 2 */
+ 10133 "11111111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 10134 "00011000" // LDA r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10135 "10010001" // /* MW 3 */
+ 10136 "11111101" // /* MW 2 */
+ 10137 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10138 "00011000" // LDA r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10139 "10110001" // /* MW 3 */
+ 10140 "11101001" // /* MW 2 */
+ 10141 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 569 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10142 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10143 "00000000" // /* MW 3 */
+ 10144 "00101000" // /* MW 2 */
+ 10145 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10146 "11111000" // MOV p6, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10147 "00100000" // /* MW 3 */
+ 10148 "01100110" // /* MW 2 */
+ 10149 "00011110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 569
+.delay_slot
+ 10150 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10151 "00000001" // /* MW 5 */
+ 10152 "00000000" // /* MW 4 */
+ 10153 "00000000" // /* MW 3 */
+ 10154 "11110000" // /* MW 2 */
+ 10155 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10157 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10159 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10161 "00000000" // /* MW 15 */
+ 10162 "00000000" // /* MW 14 */
+ 10163 "01111000" // /* MW 13 */
+ 10164 "10100101" // /* MW 12 */
+ 10165 "00000001" // /* MW 11 */
+ 10166 "00000000" // /* MW 10 */
+ 10167 "00000000" // /* MW 9 */
+ 10168 "00000000" // /* MW 8 */
+ 10169 "01011011" // /* MW 7 */
+ 10170 "00000001" // /* MW 6 */
+ 10171 "00100000" // /* MW 5 */
+ 10172 "00000000" // /* MW 4 */
+ 10173 "11110000" // /* MW 3 */
+ 10174 "00101100" // /* MW 2 */
+ 10175 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 10176 "01110110" // MOVA r13, #0; MOVS p6, r12; J #9408 /* MW 12 */ /* control_operation: words=12 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */
+ 10177 "00100000" // /* MW 11 */
+ 10178 "00000000" // /* MW 10 */
+ 10179 "00000000" // /* MW 9 */
+ 10180 "10011000" // /* MW 8 */
+ 10181 "00000100" // /* MW 7 */
+ 10182 "00000000" // /* MW 6 */
+ 10183 "00001011" // /* MW 5 */
+ 10184 "10001100" // /* MW 4 */
+ 10185 "00000110" // /* MW 3 */
+ 10186 "00001101" // /* MW 2 */
+ 10187 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 8 "superkernels.cpp" 558 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 10188 "01100100" // MOVX r15, #1; MOV r14, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10189 "00001001" // /* MW 5 */
+ 10190 "00100000" // /* MW 4 */
+ 10191 "10100111" // /* MW 3 */
+ 10192 "11000000" // /* MW 2 */
+ 10193 "00000011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.delay_slot
+ 10194 "01000100" // MOVXM p2, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10195 "10000000" // /* MW 5 */
+ 10196 "11001000" // /* MW 4 */
+ 10197 "11000100" // /* MW 3 */
+ 10198 "00000111" // /* MW 2 */
+ 10199 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.delay_slot
+ 10200 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10201 "10100000" // /* MW 5 */
+ 10202 "11001001" // /* MW 4 */
+ 10203 "11001110" // /* MW 3 */
+ 10204 "00000111" // /* MW 2 */
+ 10205 "00000000" // /* MW 1 */
+.delay_slot
+ 10206 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10207 "10010001" // /* MW 3 */
+ 10208 "11100101" // /* MW 2 */
+ 10209 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 10211 "00000000" // /* MW 1 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_
+.src_ref 9 "me_div.c" 108 19
+.src_ref 9 "me_div.c" 108 19
+.src_ref 9 "me_div.c" 115 4 first
+.function_start
+ 10224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10225 "01000001" // /* MW 5 */
+ 10226 "10100000" // /* MW 4 */
+ 10227 "00101111" // /* MW 3 */
+ 10228 "11000000" // /* MW 2 */
+ 10229 "00000000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10231 "00011100" // /* MW 3 */
+ 10232 "11000110" // /* MW 2 */
+ 10233 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10235 "00011100" // /* MW 3 */
+ 10236 "11000110" // /* MW 2 */
+ 10237 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10239 "00011100" // /* MW 3 */
+ 10240 "11000110" // /* MW 2 */
+ 10241 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10243 "00011100" // /* MW 3 */
+ 10244 "11000110" // /* MW 2 */
+ 10245 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10247 "00011100" // /* MW 3 */
+ 10248 "11000110" // /* MW 2 */
+ 10249 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10251 "00011100" // /* MW 3 */
+ 10252 "11000110" // /* MW 2 */
+ 10253 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10255 "00011100" // /* MW 3 */
+ 10256 "11000110" // /* MW 2 */
+ 10257 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10259 "00011100" // /* MW 3 */
+ 10260 "11000110" // /* MW 2 */
+ 10261 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10263 "00011100" // /* MW 3 */
+ 10264 "11000110" // /* MW 2 */
+ 10265 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10267 "00011100" // /* MW 3 */
+ 10268 "11000110" // /* MW 2 */
+ 10269 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10271 "00011100" // /* MW 3 */
+ 10272 "11000110" // /* MW 2 */
+ 10273 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10275 "00011100" // /* MW 3 */
+ 10276 "11000110" // /* MW 2 */
+ 10277 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10279 "00011100" // /* MW 3 */
+ 10280 "11000110" // /* MW 2 */
+ 10281 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10283 "00011100" // /* MW 3 */
+ 10284 "11000110" // /* MW 2 */
+ 10285 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10287 "00011100" // /* MW 3 */
+ 10288 "11000110" // /* MW 2 */
+ 10289 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10291 "00011100" // /* MW 3 */
+ 10292 "11000110" // /* MW 2 */
+ 10293 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10295 "00011100" // /* MW 3 */
+ 10296 "11000110" // /* MW 2 */
+ 10297 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10299 "00011100" // /* MW 3 */
+ 10300 "11000110" // /* MW 2 */
+ 10301 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10303 "00011100" // /* MW 3 */
+ 10304 "11000110" // /* MW 2 */
+ 10305 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10307 "00011100" // /* MW 3 */
+ 10308 "11000110" // /* MW 2 */
+ 10309 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10311 "00011100" // /* MW 3 */
+ 10312 "11000110" // /* MW 2 */
+ 10313 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10315 "00011100" // /* MW 3 */
+ 10316 "11000110" // /* MW 2 */
+ 10317 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10319 "00011100" // /* MW 3 */
+ 10320 "11000110" // /* MW 2 */
+ 10321 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10323 "00011100" // /* MW 3 */
+ 10324 "11000110" // /* MW 2 */
+ 10325 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10327 "00011100" // /* MW 3 */
+ 10328 "11000110" // /* MW 2 */
+ 10329 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10331 "00011100" // /* MW 3 */
+ 10332 "11000110" // /* MW 2 */
+ 10333 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10335 "00011100" // /* MW 3 */
+ 10336 "11000110" // /* MW 2 */
+ 10337 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10339 "00011100" // /* MW 3 */
+ 10340 "11000110" // /* MW 2 */
+ 10341 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 119 first
+ 10342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10343 "00000000" // /* MW 3 */
+ 10344 "00101000" // /* MW 2 */
+ 10345 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19 first
+.delay_slot
+ 10346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10347 "00011100" // /* MW 3 */
+ 10348 "11000110" // /* MW 2 */
+ 10349 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10351 "00011100" // /* MW 3 */
+ 10352 "11000110" // /* MW 2 */
+ 10353 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10355 "00011100" // /* MW 3 */
+ 10356 "11000110" // /* MW 2 */
+ 10357 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10359 "00011100" // /* MW 3 */
+ 10360 "11000110" // /* MW 2 */
+ 10361 "00010000" // /* MW 1 */
+.delay_slot
+ 10362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10363 "10100000" // /* MW 3 */
+ 10364 "10011111" // /* MW 2 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+ 10365 "00011000" // /* MW 1 */
+.label _ZL19propagateFloat32NaNjj
+.function propagateFloat32NaN _ZL19propagateFloat32NaNjj
+.src_ref 10 "softfloat-specialize" 78 24
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 143 4 first
+.function_start
+ 10368 "10111010" // MOVA r3, #-22; MOVXM r18, #-16777216 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10369 "00010000" // /* MW 9 */
+ 10370 "00000000" // /* MW 8 */
+ 10371 "01001000" // /* MW 7 */
+ 10372 "00000010" // /* MW 6 */
+ 10373 "11000000" // /* MW 5 */
+ 10374 "00111111" // /* MW 4 */
+ 10375 "00000000" // /* MW 3 */
+ 10376 "01000011" // /* MW 2 */
+ 10377 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 140 6
+.src_ref 10 "softfloat-specialize" 141 6
+ 10378 "10111010" // MOVA r7, #511; MOVXM r0, #4194304 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10379 "00010000" // /* MW 9 */
+ 10380 "00000000" // /* MW 8 */
+ 10381 "00001000" // /* MW 7 */
+ 10382 "00000000" // /* MW 6 */
+ 10383 "00010000" // /* MW 5 */
+ 10384 "00000000" // /* MW 4 */
+ 10385 "00000000" // /* MW 3 */
+ 10386 "11100111" // /* MW 2 */
+ 10387 "00111111" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 38
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 140 6 first
+ 10388 "10111010" // MOVA r16, #1; OR r4, r1, r0; MOV r5, #510 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10389 "01011000" // /* MW 9 */
+ 10390 "11111110" // /* MW 8 */
+ 10391 "10101001" // /* MW 7 */
+ 10392 "00101100" // /* MW 6 */
+ 10393 "01000000" // /* MW 5 */
+ 10394 "00000010" // /* MW 4 */
+ 10395 "00000000" // /* MW 3 */
+ 10396 "00110000" // /* MW 2 */
+ 10397 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 141 6 first
+ 10398 "10011000" // OR r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10399 "00000101" // /* MW 3 */
+ 10400 "10000000" // /* MW 2 */
+ 10401 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10402 "10011000" // LSHL r6, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10403 "00111101" // /* MW 3 */
+ 10404 "01001100" // /* MW 2 */
+ 10405 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+ 10406 "10011000" // LSHL r3, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10407 "00111101" // /* MW 3 */
+ 10408 "10000110" // /* MW 2 */
+ 10409 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22
+ 10410 "10011000" // AND r3, r7, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10411 "00110100" // /* MW 3 */
+ 10412 "11000110" // /* MW 2 */
+ 10413 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10414 "10011000" // AND r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10415 "01100100" // /* MW 3 */
+ 10416 "11001100" // /* MW 2 */
+ 10417 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+ 10418 "10011000" // EQ r6, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10419 "01100111" // /* MW 3 */
+ 10420 "01001100" // /* MW 2 */
+ 10421 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 38 first
+ 10422 "10011000" // LSHL r17, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10423 "00001101" // /* MW 3 */
+ 10424 "10100011" // /* MW 2 */
+ 10425 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 24
+ 10426 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10427 "00011100" // /* MW 3 */
+ 10428 "10110111" // /* MW 2 */
+ 10429 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 62 first
+ 10430 "00011000" // SEL.EQZ r17, r4, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10431 "00000010" // /* MW 3 */
+ 10432 "00100010" // /* MW 2 */
+ 10433 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+ 10434 "01000100" // MOVXM r16, #4194303 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10435 "11111110" // /* MW 5 */
+ 10436 "00111111" // /* MW 4 */
+ 10437 "11111000" // /* MW 3 */
+ 10438 "00111111" // /* MW 2 */
+ 10439 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+ 10440 "10011000" // AND r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10441 "00000100" // /* MW 3 */
+ 10442 "10000101" // /* MW 2 */
+ 10443 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22
+ 10444 "00011000" // NEZ r2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10445 "11110000" // /* MW 3 */
+ 10446 "10000100" // /* MW 2 */
+ 10447 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10448 "10011000" // AND r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10449 "00000100" // /* MW 3 */
+ 10450 "01000011" // /* MW 2 */
+ 10451 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+ 10452 "00011000" // NEZ r1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10453 "11110000" // /* MW 3 */
+ 10454 "01000010" // /* MW 2 */
+ 10455 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 4 first
+ 10456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10457 "00000000" // /* MW 3 */
+ 10458 "00101000" // /* MW 2 */
+ 10459 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+.delay_slot
+ 10460 "10011000" // AND r27, r1, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10461 "01100100" // /* MW 3 */
+ 10462 "01110110" // /* MW 2 */
+ 10463 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+.delay_slot
+ 10464 "10011000" // EQ r1, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10465 "01010111" // /* MW 3 */
+ 10466 "11000010" // /* MW 2 */
+ 10467 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 49 first
+.delay_slot
+ 10468 "00011000" // SEL.EQZ r3, r17, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10469 "01000010" // /* MW 3 */
+ 10470 "01000110" // /* MW 2 */
+ 10471 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+.delay_slot
+ 10472 "10011000" // AND r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10473 "00100100" // /* MW 3 */
+ 10474 "01110110" // /* MW 2 */
+ 10475 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 27 first
+.delay_slot
+ 10476 "00011000" // SEL.EQZ r0, r3, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10477 "00000010" // /* MW 3 */
+ 10478 "11000000" // /* MW 2 */
+.label _ZL19propagateFloat32NaNjj__end
+ 10479 "00010000" // /* MW 1 */
+.label _ZL19roundAndPackFloat32iij
+.function roundAndPackFloat32 _ZL19roundAndPackFloat32iij
+.src_ref 10 "softfloat.c" 154 first
+.src_ref 10 "softfloat.c" 161 19
+.src_ref 10 "softfloat.c" 203 30
+.function_start
+ 10480 "10111010" // MOVA r0, #64; MOVXM p0, #509172 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10481 "00010000" // /* MW 9 */
+ 10482 "01111010" // /* MW 8 */
+ 10483 "00110010" // /* MW 7 */
+ 10484 "11110000" // /* MW 6 */
+ 10485 "00000001" // /* MW 5 */
+ 10486 "00000000" // /* MW 4 */
+ 10487 "00000000" // /* MW 3 */
+ 10488 "00000000" // /* MW 2 */
+ 10489 "00001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 161 19 first
+.src_ref 10 "softfloat.c" 171 16
+.src_ref 10 "softfloat.c" 174 16
+.src_ref 10 "softfloat.c" 178 21
+.src_ref 10 "softfloat.c" 194 29
+ 10490 "00101100" // LDA r4, [p0]; MOVX r6, #127 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10491 "11111010" // /* MW 5 */
+ 10492 "10011001" // /* MW 4 */
+ 10493 "11010000" // /* MW 3 */
+ 10494 "10010010" // /* MW 2 */
+ 10495 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10497 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10499 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10501 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10503 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10505 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10507 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 162 36 first
+.src_ref 10 "softfloat.c" 164 4 first
+ 10508 "10000100" // JZ r4, #10576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10576 delay_slots=5 */
+ 10509 "00000001" // /* MW 5 */
+ 10510 "00000000" // /* MW 4 */
+ 10511 "10101000" // /* MW 3 */
+ 10512 "00010100" // /* MW 2 */
+ 10513 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 40
+.src_ref 10 "softfloat.c" 185 68
+.src_ref 10 "softfloat.c" 202 18
+.delay_slot
+ 10514 "00011000" // MOVX r5, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10515 "00000001" // /* MW 3 */
+ 10516 "01001010" // /* MW 2 */
+ 10517 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10519 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10521 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10525 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 8
+.src_ref 10 "softfloat.c" 171 16
+.src_ref 10 "softfloat.c" 171 34
+.src_ref 10 "softfloat.c" 174 16
+.src_ref 10 "softfloat.c" 174 34
+ 10526 "10111010" // MOVA r16, #3; MOVX r7, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10527 "01011000" // /* MW 9 */
+ 10528 "00000000" // /* MW 8 */
+ 10529 "00001000" // /* MW 7 */
+ 10530 "01001011" // /* MW 6 */
+ 10531 "01110000" // /* MW 5 */
+ 10532 "00000000" // /* MW 4 */
+ 10533 "00000000" // /* MW 3 */
+ 10534 "01110000" // /* MW 2 */
+ 10535 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 26
+.src_ref 10 "softfloat.c" 171 34 first
+ 10536 "01100100" // EQ r27, r7, r4; MOV r5, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10537 "00000101" // /* MW 5 */
+ 10538 "10100000" // /* MW 4 */
+ 10539 "11110010" // /* MW 3 */
+ 10540 "11001000" // /* MW 2 */
+ 10541 "00111110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 171 16
+ 10542 "00011000" // SEL.EQZ r7, r6, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10543 "10000010" // /* MW 3 */
+ 10544 "10001111" // /* MW 2 */
+ 10545 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 174 34 first
+ 10546 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10547 "00000111" // /* MW 3 */
+ 10548 "00110111" // /* MW 2 */
+ 10549 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 170 12
+.src_ref 10 "softfloat.c" 174 16
+ 10550 "11100100" // SEL.EQZ r16, r6, r24, r27; MOV r27, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10551 "01000001" // /* MW 5 */
+ 10552 "10100001" // /* MW 4 */
+ 10553 "01001101" // /* MW 3 */
+ 10554 "00110000" // /* MW 2 */
+ 10555 "00110100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 170 12 first
+.src_ref 10 "softfloat.c" 170 12 first
+ 10556 "00011000" // SEL.EQZ r7, r16, r7, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10557 "01110010" // /* MW 3 */
+ 10558 "00001110" // /* MW 2 */
+ 10559 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 26 first
+ 10560 "10011000" // EQ r27, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10561 "01000111" // /* MW 3 */
+ 10562 "01110110" // /* MW 2 */
+ 10563 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 8
+ 10564 "00110110" // NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10565 "10000001" // /* MW 11 */
+ 10566 "10101101" // /* MW 10 */
+ 10567 "00000000" // /* MW 9 */
+ 10568 "00010000" // /* MW 8 */
+ 10569 "01011100" // /* MW 7 */
+ 10570 "00001110" // /* MW 6 */
+ 10571 "00100000" // /* MW 5 */
+ 10572 "00000000" // /* MW 4 */
+ 10573 "11110000" // /* MW 3 */
+ 10574 "00101100" // /* MW 2 */
+ 10575 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_96
+.src_ref 10 "softfloat.c" 179 14
+.src_ref 10 "softfloat.c" 179 17 first
+.src_ref 10 "softfloat.c" 180 23
+.src_ref 10 "softfloat.c" 181 28
+ 10576 "01100100" // EXTEND.u16 r18, r2; MOV r16, #253 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10577 "11110101" // /* MW 5 */
+ 10578 "00100011" // /* MW 4 */
+ 10579 "00001000" // /* MW 3 */
+ 10580 "10010110" // /* MW 2 */
+ 10581 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 179 14
+ 10582 "10011000" // LT r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10583 "00001010" // /* MW 3 */
+ 10584 "10100101" // /* MW 2 */
+ 10585 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 179 4
+ 10586 "10000100" // JNZ r18, #10768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10768 delay_slots=5 */
+ 10587 "00000001" // /* MW 5 */
+ 10588 "01000000" // /* MW 4 */
+ 10589 "00001000" // /* MW 3 */
+ 10590 "00010101" // /* MW 2 */
+ 10591 "10010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 178 21 first
+.delay_slot
+ 10592 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10593 "01100100" // /* MW 3 */
+ 10594 "11100010" // /* MW 2 */
+ 10595 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+.src_ref 10 "softfloat.c" 128 31
+.delay_slot
+ 10596 "00011000" // MOVX r7, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10597 "01111101" // /* MW 3 */
+ 10598 "00001110" // /* MW 2 */
+ 10599 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 10600 "10011000" // LSHL r1, r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10601 "01111101" // /* MW 3 */
+ 10602 "01000010" // /* MW 2 */
+ 10603 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10605 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10607 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 181 28 first
+.src_ref 10 "softfloat.c" 182 40 first
+.src_ref 10 "softfloat.c" 182 59
+ 10608 "10111010" // MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10609 "10101000" // /* MW 9 */
+ 10610 "11001010" // /* MW 8 */
+ 10611 "10001000" // /* MW 7 */
+ 10612 "00111110" // /* MW 6 */
+ 10613 "00111000" // /* MW 5 */
+ 10614 "00000101" // /* MW 4 */
+ 10615 "00000000" // /* MW 3 */
+ 10616 "00010010" // /* MW 2 */
+ 10617 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 59
+ 10618 "10011000" // LT r20, r20, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10619 "00101010" // /* MW 3 */
+ 10620 "00101001" // /* MW 2 */
+ 10621 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 180 23 first
+ 10622 "10011000" // LT r16, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10623 "00101010" // /* MW 3 */
+ 10624 "00100000" // /* MW 2 */
+ 10625 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 18 first
+ 10626 "10011000" // AND r19, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10627 "01000100" // /* MW 3 */
+ 10628 "11100111" // /* MW 2 */
+ 10629 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 181 13 first
+ 10630 "10011000" // OR r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10631 "00000101" // /* MW 3 */
+ 10632 "11100111" // /* MW 2 */
+ 10633 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 180 8 first
+ 10634 "10000100" // JNZ r19, #10848 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10848 delay_slots=5 */
+ 10635 "00000001" // /* MW 5 */
+ 10636 "01000000" // /* MW 4 */
+ 10637 "00110000" // /* MW 3 */
+ 10638 "00010101" // /* MW 2 */
+ 10639 "10011000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 187 18
+.src_ref 10 "softfloat.c" 192 39
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+.delay_slot
+ 10640 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10641 "00000001" // /* MW 3 */
+ 10642 "00100000" // /* MW 2 */
+ 10643 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10647 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10649 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10651 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 187 18 first
+ 10652 "10011000" // GE r19, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10653 "00001001" // /* MW 3 */
+ 10654 "10100111" // /* MW 2 */
+ 10655 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 187 8
+ 10656 "10000100" // JNZ r19, #10784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10784 delay_slots=5 */
+ 10657 "00000001" // /* MW 5 */
+ 10658 "01000000" // /* MW 4 */
+ 10659 "00010000" // /* MW 3 */
+ 10660 "00010101" // /* MW 2 */
+ 10661 "10011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10663 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10665 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10671 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 192 39 first
+ 10672 "10011000" // SUB r2, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10673 "00100001" // /* MW 3 */
+ 10674 "00000100" // /* MW 2 */
+ 10675 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 10676 "10000100" // JZ r2, #10736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10736 delay_slots=5 */
+ 10677 "00000001" // /* MW 5 */
+ 10678 "00000000" // /* MW 4 */
+ 10679 "11111000" // /* MW 3 */
+ 10680 "00010100" // /* MW 2 */
+ 10681 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10683 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10685 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10687 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10689 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10691 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 10692 "01100100" // SUB r17, r16, r2; MOV r19, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10693 "10000001" // /* MW 5 */
+ 10694 "10100000" // /* MW 4 */
+ 10695 "00111001" // /* MW 3 */
+ 10696 "01000100" // /* MW 2 */
+ 10697 "10000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 10698 "10011000" // AND r7, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10699 "00010100" // /* MW 3 */
+ 10700 "11001111" // /* MW 2 */
+ 10701 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 10702 "10011000" // LSHL r7, r3, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10703 "01111101" // /* MW 3 */
+ 10704 "11001110" // /* MW 2 */
+ 10705 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 10706 "10011000" // LSHL r17, r3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10707 "00011101" // /* MW 3 */
+ 10708 "11100011" // /* MW 2 */
+ 10709 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 10710 "10011000" // LT r27, r2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10711 "00111010" // /* MW 3 */
+ 10712 "10110111" // /* MW 2 */
+ 10713 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 10714 "00011000" // NEZ r7, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10715 "11110000" // /* MW 3 */
+ 10716 "11001110" // /* MW 2 */
+ 10717 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 10718 "00011000" // NEZ r3, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10719 "11110000" // /* MW 3 */
+ 10720 "11000110" // /* MW 2 */
+ 10721 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 10722 "10011000" // OR r2, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10723 "00010101" // /* MW 3 */
+ 10724 "11000101" // /* MW 2 */
+ 10725 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 10726 "01111010" // NOPA; NOPS; SEL.EQZ r3, r3, r2, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10727 "00100010" // /* MW 9 */
+ 10728 "11000110" // /* MW 8 */
+ 10729 "00000000" // /* MW 7 */
+ 10730 "00000000" // /* MW 6 */
+ 10731 "01011011" // /* MW 5 */
+ 10732 "00000001" // /* MW 4 */
+ 10733 "11110000" // /* MW 3 */
+ 10734 "00101100" // /* MW 2 */
+ 10735 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_256
+ 10736 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */
+ 10737 "00000000" // /* MW 5 */
+ 10738 "00000000" // /* MW 4 */
+ 10739 "00010000" // /* MW 3 */
+ 10740 "00010101" // /* MW 2 */
+ 10741 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 194 29 first
+.delay_slot
+ 10742 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10743 "01100100" // /* MW 3 */
+ 10744 "11100010" // /* MW 2 */
+ 10745 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+.delay_slot
+ 10746 "00011000" // MOVX r2, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10747 "00000001" // /* MW 3 */
+ 10748 "00000100" // /* MW 2 */
+ 10749 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10751 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10754 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 10755 "00011100" // /* MW 13 */
+ 10756 "00000000" // /* MW 12 */
+ 10757 "00000000" // /* MW 11 */
+ 10758 "01010111" // /* MW 10 */
+ 10759 "00011010" // /* MW 9 */
+ 10760 "01000000" // /* MW 8 */
+ 10761 "00000000" // /* MW 7 */
+ 10762 "00000000" // /* MW 6 */
+ 10763 "10110110" // /* MW 5 */
+ 10764 "00000010" // /* MW 4 */
+ 10765 "11110000" // /* MW 3 */
+ 10766 "00101100" // /* MW 2 */
+ 10767 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_288
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+ 10768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10769 "00000000" // /* MW 15 */
+ 10770 "00000000" // /* MW 14 */
+ 10771 "01111000" // /* MW 13 */
+ 10772 "10100101" // /* MW 12 */
+ 10773 "00000001" // /* MW 11 */
+ 10774 "00001000" // /* MW 10 */
+ 10775 "00000000" // /* MW 9 */
+ 10776 "00000001" // /* MW 8 */
+ 10777 "01011011" // /* MW 7 */
+ 10778 "00000001" // /* MW 6 */
+ 10779 "00100000" // /* MW 5 */
+ 10780 "00000000" // /* MW 4 */
+ 10781 "11110000" // /* MW 3 */
+ 10782 "00101100" // /* MW 2 */
+ 10783 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_304
+.src_ref 10 "softfloat.c" 202 18 first
+.src_ref 10 "softfloat.c" 202 36
+.src_ref 10 "softfloat.c" 203 30 first
+ 10784 "10111010" // MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10785 "10101000" // /* MW 9 */
+ 10786 "11001010" // /* MW 8 */
+ 10787 "10101000" // /* MW 7 */
+ 10788 "00110100" // /* MW 6 */
+ 10789 "00110000" // /* MW 5 */
+ 10790 "00100010" // /* MW 4 */
+ 10791 "00000000" // /* MW 3 */
+ 10792 "00100000" // /* MW 2 */
+ 10793 "11111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 59
+.src_ref 10 "softfloat.c" 203 12
+.src_ref 10 "softfloat.c" 203 46
+ 10794 "10111010" // MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10795 "01011000" // /* MW 9 */
+ 10796 "11111111" // /* MW 8 */
+ 10797 "10001111" // /* MW 7 */
+ 10798 "00101100" // /* MW 6 */
+ 10799 "01100010" // /* MW 5 */
+ 10800 "00000110" // /* MW 4 */
+ 10801 "00000000" // /* MW 3 */
+ 10802 "11100011" // /* MW 2 */
+ 10803 "00000010" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 46
+ 10804 "00011000" // EQZ r6, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10805 "11010000" // /* MW 3 */
+ 10806 "10001100" // /* MW 2 */
+ 10807 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 202 36
+ 10808 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10809 "00001101" // /* MW 3 */
+ 10810 "01000000" // /* MW 2 */
+ 10811 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 12
+ 10812 "10011000" // XOR r4, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10813 "01000110" // /* MW 3 */
+ 10814 "10001000" // /* MW 2 */
+ 10815 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 205 4 first
+ 10816 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10817 "00000000" // /* MW 3 */
+ 10818 "00101000" // /* MW 2 */
+ 10819 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 9 first
+.delay_slot
+ 10820 "10011000" // AND r27, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10821 "00000100" // /* MW 3 */
+ 10822 "00110110" // /* MW 2 */
+ 10823 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 204 4 first
+.src_ref 10 "softfloat.c" 204 14 first
+.delay_slot
+ 10824 "00011000" // SEL.EQZ r2, r16, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10825 "00100010" // /* MW 3 */
+ 10826 "00000100" // /* MW 2 */
+ 10827 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 59 first
+.delay_slot
+ 10828 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10829 "00111101" // /* MW 3 */
+ 10830 "10000100" // /* MW 2 */
+ 10831 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 10832 "10011000" // ADD r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10833 "00100000" // /* MW 3 */
+ 10834 "01000100" // /* MW 2 */
+ 10835 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 66
+.delay_slot
+ 10836 "00110110" // NOPA; NOPB; NOPS; ADD r0, r27, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10837 "10000001" // /* MW 11 */
+ 10838 "10101101" // /* MW 10 */
+ 10839 "00000000" // /* MW 9 */
+ 10840 "00000100" // /* MW 8 */
+ 10841 "00000001" // /* MW 7 */
+ 10842 "00110110" // /* MW 6 */
+ 10843 "00100000" // /* MW 5 */
+ 10844 "00000000" // /* MW 4 */
+ 10845 "11110000" // /* MW 3 */
+ 10846 "00101100" // /* MW 2 */
+ 10847 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_368
+.src_ref 10 "softfloat.c" 185 12 first
+ 10848 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10849 "00000000" // /* MW 3 */
+ 10850 "00101000" // /* MW 2 */
+ 10851 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 10852 "01000100" // MOVXM r2, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10853 "00000000" // /* MW 5 */
+ 10854 "00100000" // /* MW 4 */
+ 10855 "00000001" // /* MW 3 */
+ 10856 "10000000" // /* MW 2 */
+ 10857 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38 first
+.delay_slot
+ 10858 "10011000" // ADD r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10859 "00100000" // /* MW 3 */
+ 10860 "01000110" // /* MW 2 */
+ 10861 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 185 68 first
+.delay_slot
+ 10862 "00011000" // EQZ r2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10863 "11010000" // /* MW 3 */
+ 10864 "01000100" // /* MW 2 */
+ 10865 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 185 49
+.delay_slot
+ 10866 "10011000" // SUB r0, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10867 "00100001" // /* MW 3 */
+ 10868 "11000000" // /* MW 2 */
+ 10869 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL19roundAndPackFloat32iij__end
+ 10871 "00000000" // /* MW 1 */
+.label _ZL28normalizeRoundAndPackFloat32iij
+.function normalizeRoundAndPackFloat32 _ZL28normalizeRoundAndPackFloat32iij
+.src_ref 10 "softfloat.c" 218 first
+.src_ref 10 "softfloat.c" 224 11 first
+.tail_call
+.function_start
+ 10880 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 10881 "00000000" // /* MW 5 */
+ 10882 "00000000" // /* MW 4 */
+ 10883 "01111000" // /* MW 3 */
+ 10884 "00010100" // /* MW 2 */
+ 10885 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 552 53 first
+.delay_slot
+ 10886 "00011000" // CLZ r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10887 "00110000" // /* MW 3 */
+ 10888 "11100000" // /* MW 2 */
+ 10889 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 223 45 first
+.delay_slot
+ 10890 "00011000" // ADD r16, r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10891 "11111111" // /* MW 3 */
+ 10892 "00100001" // /* MW 2 */
+ 10893 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 224 44 first
+.delay_slot
+ 10894 "10011000" // SUB r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10895 "00000001" // /* MW 3 */
+ 10896 "10000101" // /* MW 2 */
+ 10897 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 224 62
+.delay_slot
+ 10898 "10011000" // LSHL r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10899 "00001101" // /* MW 3 */
+ 10900 "11000111" // /* MW 2 */
+ 10901 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL28normalizeRoundAndPackFloat32iij__end
+ 10903 "00000000" // /* MW 1 */
+.label int32_to_float32
+.function int32_to_float32 int32_to_float32
+.src_ref 10 "softfloat.c" 477 first
+.src_ref 10 "softfloat.c" 481 4
+.src_ref 10 "softfloat.c" 481 11 first
+.function_start
+ 10912 "10000100" // JZ r1, #10992 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10992 delay_slots=5 */
+ 10913 "00000001" // /* MW 5 */
+ 10914 "00000000" // /* MW 4 */
+ 10915 "01111000" // /* MW 3 */
+ 10916 "00010101" // /* MW 2 */
+ 10917 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10919 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10927 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 11
+ 10928 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10929 "00000000" // /* MW 5 */
+ 10930 "00100000" // /* MW 4 */
+ 10931 "00001000" // /* MW 3 */
+ 10932 "00000000" // /* MW 2 */
+ 10933 "10000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 11 first
+ 10934 "10011000" // EQ r16, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10935 "00000111" // /* MW 3 */
+ 10936 "01100001" // /* MW 2 */
+ 10937 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 4
+ 10938 "10000100" // JNZ r16, #11008 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11008 delay_slots=5 */
+ 10939 "00000001" // /* MW 5 */
+ 10940 "01000000" // /* MW 4 */
+ 10941 "10000000" // /* MW 3 */
+ 10942 "00010101" // /* MW 2 */
+ 10943 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10945 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10947 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10949 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10951 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10953 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 484 11
+.src_ref 10 "softfloat.c" 484 11 first
+.tail_call
+ 10954 "10111010" // MOVA r2, #156; J #10880 /* MW 10 */ /* control_operation: words=10 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */
+ 10955 "00100000" // /* MW 9 */
+ 10956 "00000000" // /* MW 8 */
+ 10957 "00000000" // /* MW 7 */
+ 10958 "01010000" // /* MW 6 */
+ 10959 "00000101" // /* MW 5 */
+ 10960 "00000000" // /* MW 4 */
+ 10961 "00000000" // /* MW 3 */
+ 10962 "10000010" // /* MW 2 */
+ 10963 "00010011" // /* MW 1 */
+.src_ref 10 "softfloat.c" 484 60
+.src_ref 10 "softfloat.c" 484 62
+.delay_slot
+ 10964 "00011000" // ABS r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10965 "00010000" // /* MW 3 */
+ 10966 "01000111" // /* MW 2 */
+ 10967 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 483 16
+.delay_slot
+ 10968 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10969 "00000001" // /* MW 3 */
+ 10970 "00100000" // /* MW 2 */
+ 10971 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 483 16 first
+.delay_slot
+ 10972 "10011000" // LT r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10973 "00001010" // /* MW 3 */
+ 10974 "01000011" // /* MW 2 */
+ 10975 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10977 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10978 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 10979 "00011100" // /* MW 13 */
+ 10980 "00000000" // /* MW 12 */
+ 10981 "00000000" // /* MW 11 */
+ 10982 "01010111" // /* MW 10 */
+ 10983 "00011010" // /* MW 9 */
+ 10984 "01000000" // /* MW 8 */
+ 10985 "00000000" // /* MW 7 */
+ 10986 "00000000" // /* MW 6 */
+ 10987 "10110110" // /* MW 5 */
+ 10988 "00000010" // /* MW 4 */
+ 10989 "11110000" // /* MW 3 */
+ 10990 "00101100" // /* MW 2 */
+ 10991 "00000000" // /* MW 1 */
+.label TGT_Fint32_to_float32_80
+.src_ref 10 "softfloat.c" 481 18 first
+.return_address
+ 10992 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10993 "00000000" // /* MW 3 */
+ 10994 "00101000" // /* MW 2 */
+ 10995 "00010000" // /* MW 1 */
+.delay_slot
+ 10996 "00011000" // MOVX r0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10997 "00000001" // /* MW 3 */
+ 10998 "00000000" // /* MW 2 */
+ 10999 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11001 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11003 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11005 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11007 "00000000" // /* MW 1 */
+.label TGT_Fint32_to_float32_96
+.src_ref 10 "softfloat.c" 482 37 first
+ 11008 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11009 "00000000" // /* MW 3 */
+ 11010 "00101000" // /* MW 2 */
+ 11011 "00010000" // /* MW 1 */
+.delay_slot
+ 11012 "01000100" // MOVXM r0, #-822083584 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11013 "00000000" // /* MW 5 */
+ 11014 "00100000" // /* MW 4 */
+ 11015 "00000000" // /* MW 3 */
+ 11016 "00000000" // /* MW 2 */
+ 11017 "11001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11019 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11023 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label int32_to_float32__end
+ 11025 "00000000" // /* MW 1 */
+.label _ZL14addFloat32Sigsjji
+.function addFloat32Sigs _ZL14addFloat32Sigsjji
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 81 14
+.src_ref 10 "softfloat.c" 734 first
+.function_start
+ 11040 "10111010" // MOVA r18, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11041 "10010000" // /* MW 9 */
+ 11042 "11111111" // /* MW 8 */
+ 11043 "00001111" // /* MW 7 */
+ 11044 "11111110" // /* MW 6 */
+ 11045 "00011111" // /* MW 5 */
+ 11046 "00000000" // /* MW 4 */
+ 11047 "00000000" // /* MW 3 */
+ 11048 "00110010" // /* MW 2 */
+ 11049 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14 first
+ 11050 "10011000" // LSHL r17, r1, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11051 "00101101" // /* MW 3 */
+ 11052 "01100011" // /* MW 2 */
+ 11053 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14
+ 11054 "10011000" // LSHL r4, r2, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11055 "00101101" // /* MW 3 */
+ 11056 "10001001" // /* MW 2 */
+ 11057 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11058 "00011000" // EXTEND.u8 r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11059 "10010000" // /* MW 3 */
+ 11060 "01110110" // /* MW 2 */
+ 11061 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11062 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11063 "10010000" // /* MW 3 */
+ 11064 "00110010" // /* MW 2 */
+ 11065 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 744 19 first
+.src_ref 10 "softfloat.c" 747 11
+.src_ref 10 "softfloat.c" 761 22
+.src_ref 10 "softfloat.c" 772 35
+.src_ref 10 "softfloat.c" 788 24
+ 11066 "01100100" // SUB r17, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11067 "00000001" // /* MW 5 */
+ 11068 "00100000" // /* MW 4 */
+ 11069 "00111100" // /* MW 3 */
+ 11070 "01110010" // /* MW 2 */
+ 11071 "11011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 747 11 first
+ 11072 "10011000" // LT r4, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11073 "00011010" // /* MW 3 */
+ 11074 "00001001" // /* MW 2 */
+ 11075 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 747 4
+ 11076 "10000100" // JNZ r4, #11248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11248 delay_slots=5 */
+ 11077 "00000001" // /* MW 5 */
+ 11078 "01000000" // /* MW 4 */
+ 11079 "11111000" // /* MW 3 */
+ 11080 "00010101" // /* MW 2 */
+ 11081 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+.delay_slot
+ 11082 "10011000" // AND r19, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11083 "00000100" // /* MW 3 */
+ 11084 "01100111" // /* MW 2 */
+ 11085 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 745 9
+.src_ref 10 "softfloat.c" 746 9
+.delay_slot
+ 11086 "01100100" // AND r16, r2, r16; MOV r0, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11087 "00011001" // /* MW 5 */
+ 11088 "00100000" // /* MW 4 */
+ 11089 "10010000" // /* MW 3 */
+ 11090 "00100000" // /* MW 2 */
+ 11091 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 745 9 first
+.delay_slot
+ 11092 "10011000" // LSHL r19, r19, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11093 "00001101" // /* MW 3 */
+ 11094 "11100110" // /* MW 2 */
+ 11095 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 746 9 first
+.src_ref 10 "softfloat.c" 748 18
+.src_ref 10 "softfloat.c" 762 18
+.delay_slot
+ 11096 "01100100" // LSHL r16, r16, r0; MOV r20, #255 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11097 "11111101" // /* MW 5 */
+ 11098 "00100011" // /* MW 4 */
+ 11099 "10111010" // /* MW 3 */
+ 11100 "00000001" // /* MW 2 */
+ 11101 "10000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+.src_ref 10 "softfloat.c" 128 31
+.src_ref 10 "softfloat.c" 748 18 first
+.delay_slot
+ 11102 "01100100" // EQ r0, r27, r20; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11103 "01111101" // /* MW 5 */
+ 11104 "00100000" // /* MW 4 */
+ 11105 "11111001" // /* MW 3 */
+ 11106 "00101000" // /* MW 2 */
+ 11107 "11011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 761 22 first
+ 11108 "10011000" // GE r5, r17, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11109 "10001001" // /* MW 3 */
+ 11110 "01001011" // /* MW 2 */
+ 11111 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 761 9
+ 11112 "10000100" // JNZ r5, #11440 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11440 delay_slots=5 */
+ 11113 "00000001" // /* MW 5 */
+ 11114 "01000000" // /* MW 4 */
+ 11115 "01011000" // /* MW 3 */
+ 11116 "00010110" // /* MW 2 */
+ 11117 "00101000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 11118 "10011000" // LSHL r4, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11119 "00101101" // /* MW 3 */
+ 11120 "11001001" // /* MW 2 */
+ 11121 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11123 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11125 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11127 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11129 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 762 18 first
+ 11130 "10011000" // EQ r20, r25, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11131 "01000111" // /* MW 3 */
+ 11132 "01101001" // /* MW 2 */
+ 11133 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 762 8
+ 11134 "10000100" // JNZ r20, #11392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11392 delay_slots=5 */
+ 11135 "00000001" // /* MW 5 */
+ 11136 "01000000" // /* MW 4 */
+ 11137 "01000000" // /* MW 3 */
+ 11138 "00010110" // /* MW 2 */
+ 11139 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11141 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11143 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11145 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11147 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11149 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11150 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11151 "10100000" // /* MW 3 */
+ 11152 "01010001" // /* MW 2 */
+ 11153 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 787 4
+ 11154 "11111000" // MOV r2, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11155 "10100000" // /* MW 3 */
+ 11156 "10011100" // /* MW 2 */
+ 11157 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 767 12 first
+ 11158 "00011000" // ADD r0, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11159 "00000111" // /* MW 3 */
+ 11160 "01000000" // /* MW 2 */
+ 11161 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 766 8 first
+ 11162 "00011000" // SEL.EQZ r17, r0, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11163 "00010010" // /* MW 3 */
+ 11164 "00100011" // /* MW 2 */
+ 11165 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 772 35 first
+ 11166 "10011000" // SUB r17, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11167 "00010001" // /* MW 3 */
+ 11168 "00100011" // /* MW 2 */
+ 11169 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11170 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */
+ 11171 "00000001" // /* MW 5 */
+ 11172 "00000000" // /* MW 4 */
+ 11173 "00101000" // /* MW 3 */
+ 11174 "00010110" // /* MW 2 */
+ 11175 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 770 17
+.src_ref 10 "softfloat.c" 785 9
+.delay_slot
+ 11176 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11177 "00000000" // /* MW 5 */
+ 11178 "00100000" // /* MW 4 */
+ 11179 "00001010" // /* MW 3 */
+ 11180 "00000000" // /* MW 2 */
+ 11181 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 770 17 first
+.delay_slot
+ 11182 "10011000" // OR r3, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11183 "01000101" // /* MW 3 */
+ 11184 "11000111" // /* MW 2 */
+ 11185 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 766 8 first
+.delay_slot
+ 11186 "00011000" // SEL.EQZ r19, r19, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11187 "00110010" // /* MW 3 */
+ 11188 "11100110" // /* MW 2 */
+ 11189 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11191 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11193 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 11194 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11195 "10000001" // /* MW 5 */
+ 11196 "00100000" // /* MW 4 */
+ 11197 "00110000" // /* MW 3 */
+ 11198 "11100010" // /* MW 2 */
+ 11199 "11000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11200 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11201 "00100100" // /* MW 3 */
+ 11202 "11100101" // /* MW 2 */
+ 11203 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11204 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11205 "00101101" // /* MW 3 */
+ 11206 "11100101" // /* MW 2 */
+ 11207 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11208 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11209 "00001010" // /* MW 3 */
+ 11210 "01110110" // /* MW 2 */
+ 11211 "00010100" // /* MW 1 */
+ 11212 "10000100" // J #11344 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11344 delay_slots=5 */
+ 11213 "00000000" // /* MW 5 */
+ 11214 "00000000" // /* MW 4 */
+ 11215 "00101000" // /* MW 3 */
+ 11216 "00010110" // /* MW 2 */
+ 11217 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15 first
+.delay_slot
+ 11218 "10011000" // LSHL r3, r19, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11219 "00111101" // /* MW 3 */
+ 11220 "11000110" // /* MW 2 */
+ 11221 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57
+.delay_slot
+ 11222 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11223 "11110000" // /* MW 3 */
+ 11224 "10100100" // /* MW 2 */
+ 11225 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+.delay_slot
+ 11226 "00011000" // NEZ r17, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11227 "11110000" // /* MW 3 */
+ 11228 "11100010" // /* MW 2 */
+ 11229 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+.delay_slot
+ 11230 "10011000" // OR r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11231 "00100101" // /* MW 3 */
+ 11232 "11100101" // /* MW 2 */
+ 11233 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+.delay_slot
+ 11234 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11235 "01100000" // /* MW 13 */
+ 11236 "00101011" // /* MW 12 */
+ 11237 "00000000" // /* MW 11 */
+ 11238 "10101111" // /* MW 10 */
+ 11239 "00110100" // /* MW 9 */
+ 11240 "00000000" // /* MW 8 */
+ 11241 "00100010" // /* MW 7 */
+ 11242 "01100111" // /* MW 6 */
+ 11243 "00100100" // /* MW 5 */
+ 11244 "00000000" // /* MW 4 */
+ 11245 "11110000" // /* MW 3 */
+ 11246 "00101100" // /* MW 2 */
+ 11247 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_208
+.src_ref 10 "softfloat.c" 748 8 first
+ 11248 "10000100" // JNZ r0, #11504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11504 delay_slots=5 */
+ 11249 "00000001" // /* MW 5 */
+ 11250 "01000000" // /* MW 4 */
+ 11251 "01111000" // /* MW 3 */
+ 11252 "00010110" // /* MW 2 */
+ 11253 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 756 17
+.src_ref 10 "softfloat.c" 785 9
+.delay_slot
+ 11254 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11255 "00000000" // /* MW 5 */
+ 11256 "00100000" // /* MW 4 */
+ 11257 "00001010" // /* MW 3 */
+ 11258 "00000000" // /* MW 2 */
+ 11259 "00100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11261 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11263 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11265 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11267 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11268 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11269 "10100000" // /* MW 3 */
+ 11270 "01010001" // /* MW 2 */
+ 11271 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 753 12 first
+.src_ref 10 "softfloat.c" 787 4
+ 11272 "11100100" // ADD r3, r17, #-1; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11273 "01000001" // /* MW 5 */
+ 11274 "00111011" // /* MW 4 */
+ 11275 "11100001" // /* MW 3 */
+ 11276 "11111111" // /* MW 2 */
+ 11277 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8
+.src_ref 10 "softfloat.c" 752 18
+ 11278 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11279 "10100000" // /* MW 3 */
+ 11280 "11011100" // /* MW 2 */
+ 11281 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8 first
+.src_ref 10 "softfloat.c" 752 18 first
+ 11282 "00011000" // SEL.EQZ r17, r3, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11283 "00010010" // /* MW 3 */
+ 11284 "11100011" // /* MW 2 */
+ 11285 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11286 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */
+ 11287 "00000001" // /* MW 5 */
+ 11288 "00000000" // /* MW 4 */
+ 11289 "00101000" // /* MW 3 */
+ 11290 "00010110" // /* MW 2 */
+ 11291 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 756 17 first
+.delay_slot
+ 11292 "10011000" // OR r0, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11293 "00000101" // /* MW 3 */
+ 11294 "00000001" // /* MW 2 */
+ 11295 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8 first
+.src_ref 10 "softfloat.c" 752 18 first
+.delay_slot
+ 11296 "00011000" // SEL.EQZ r16, r16, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11297 "00000010" // /* MW 3 */
+ 11298 "00100000" // /* MW 2 */
+ 11299 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11301 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11303 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11305 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 11306 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11307 "10000001" // /* MW 5 */
+ 11308 "00100000" // /* MW 4 */
+ 11309 "00110000" // /* MW 3 */
+ 11310 "11100010" // /* MW 2 */
+ 11311 "11000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11312 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11313 "00100100" // /* MW 3 */
+ 11314 "11100101" // /* MW 2 */
+ 11315 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11316 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11317 "00101101" // /* MW 3 */
+ 11318 "00100101" // /* MW 2 */
+ 11319 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 11320 "10011000" // LSHL r3, r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11321 "00111101" // /* MW 3 */
+ 11322 "00000110" // /* MW 2 */
+ 11323 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11324 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11325 "00001010" // /* MW 3 */
+ 11326 "01110110" // /* MW 2 */
+ 11327 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 11328 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11329 "11110000" // /* MW 3 */
+ 11330 "10100100" // /* MW 2 */
+ 11331 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11332 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11333 "11110000" // /* MW 3 */
+ 11334 "00100000" // /* MW 2 */
+ 11335 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 11336 "10011000" // OR r17, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11337 "00100101" // /* MW 3 */
+ 11338 "11100011" // /* MW 2 */
+ 11339 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 11340 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11341 "00010010" // /* MW 3 */
+ 11342 "00100001" // /* MW 2 */
+ 11343 "00010100" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_304
+.src_ref 10 "softfloat.c" 785 9 first
+.src_ref 10 "softfloat.c" 786 26
+.src_ref 10 "softfloat.c" 787 4 first
+ 11344 "10111010" // MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11345 "11001000" // /* MW 9 */
+ 11346 "10111111" // /* MW 8 */
+ 11347 "00101000" // /* MW 7 */
+ 11348 "00101110" // /* MW 6 */
+ 11349 "00111010" // /* MW 5 */
+ 11350 "00100111" // /* MW 4 */
+ 11351 "00000000" // /* MW 3 */
+ 11352 "00110010" // /* MW 2 */
+ 11353 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 786 18 first
+.src_ref 10 "softfloat.c" 790 8 first
+ 11354 "00100100" // ADD r19, r19, r16; ADD.NC r16, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11355 "00000001" // /* MW 5 */
+ 11356 "00110001" // /* MW 4 */
+ 11357 "00011000" // /* MW 3 */
+ 11358 "11100000" // /* MW 2 */
+ 11359 "10011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 786 26
+ 11360 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11361 "00101101" // /* MW 3 */
+ 11362 "11100101" // /* MW 2 */
+ 11363 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 24 first
+ 11364 "10011000" // LT r27, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11365 "10001010" // /* MW 3 */
+ 11366 "10110111" // /* MW 2 */
+ 11367 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 4
+ 11368 "00011000" // SEL.EQZ r2, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11369 "00000010" // /* MW 3 */
+ 11370 "01000101" // /* MW 2 */
+ 11371 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 4
+ 11372 "00011000" // SEL.EQZ r3, r18, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11373 "00110010" // /* MW 3 */
+ 11374 "10000111" // /* MW 2 */
+ 11375 "00010100" // /* MW 1 */
+.label __ll1__ZL14addFloat32Sigsjji
+.src_ref 10 "softfloat.c" 793 11 first
+.tail_call
+ 11376 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 11377 "00000000" // /* MW 5 */
+ 11378 "00000000" // /* MW 4 */
+ 11379 "01111000" // /* MW 3 */
+ 11380 "00010100" // /* MW 2 */
+ 11381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11383 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11385 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11387 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11391 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_352
+.src_ref 10 "softfloat.c" 763 12 first
+.return_address
+ 11392 "10000100" // JNZ r16, #11536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11536 delay_slots=5 */
+ 11393 "00000001" // /* MW 5 */
+ 11394 "01000000" // /* MW 4 */
+ 11395 "10001000" // /* MW 3 */
+ 11396 "00010110" // /* MW 2 */
+ 11397 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11399 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11407 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 764 12 first
+ 11408 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11409 "00000000" // /* MW 3 */
+ 11410 "00101000" // /* MW 2 */
+ 11411 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 11412 "01000100" // MOVXM r16, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11413 "00000000" // /* MW 5 */
+ 11414 "00100000" // /* MW 4 */
+ 11415 "00001000" // /* MW 3 */
+ 11416 "10000000" // /* MW 2 */
+ 11417 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38 first
+.delay_slot
+ 11418 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11419 "00000000" // /* MW 3 */
+ 11420 "00000001" // /* MW 2 */
+ 11421 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11423 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11427 "00011100" // /* MW 13 */
+ 11428 "00000000" // /* MW 12 */
+ 11429 "00000000" // /* MW 11 */
+ 11430 "01010111" // /* MW 10 */
+ 11431 "00011010" // /* MW 9 */
+ 11432 "01000000" // /* MW 8 */
+ 11433 "00000000" // /* MW 7 */
+ 11434 "00000000" // /* MW 6 */
+ 11435 "10110110" // /* MW 5 */
+ 11436 "00000010" // /* MW 4 */
+ 11437 "11110000" // /* MW 3 */
+ 11438 "00101100" // /* MW 2 */
+ 11439 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_400
+.src_ref 10 "softfloat.c" 776 8 first
+ 11440 "10000100" // JNZ r0, #11552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11552 delay_slots=5 */
+ 11441 "00000001" // /* MW 5 */
+ 11442 "01000000" // /* MW 4 */
+ 11443 "10010000" // /* MW 3 */
+ 11444 "00010110" // /* MW 2 */
+ 11445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11451 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11455 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 8 first
+ 11456 "10000100" // JZ r27, #11600 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11600 delay_slots=5 */
+ 11457 "00000001" // /* MW 5 */
+ 11458 "00000000" // /* MW 4 */
+ 11459 "10101000" // /* MW 3 */
+ 11460 "00010110" // /* MW 2 */
+ 11461 "11011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11463 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11471 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11472 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11473 "10100000" // /* MW 3 */
+ 11474 "01010001" // /* MW 2 */
+ 11475 "00011000" // /* MW 1 */
+ 11476 "10000100" // J #11376 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11376 delay_slots=5 */
+ 11477 "00000000" // /* MW 5 */
+ 11478 "00000000" // /* MW 4 */
+ 11479 "00111000" // /* MW 3 */
+ 11480 "00010110" // /* MW 2 */
+ 11481 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 26
+.delay_slot
+ 11482 "01000100" // MOVXM r17, #1073741824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11483 "00000000" // /* MW 5 */
+ 11484 "10100000" // /* MW 4 */
+ 11485 "00001000" // /* MW 3 */
+ 11486 "00000000" // /* MW 2 */
+ 11487 "01000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 26 first
+.src_ref 10 "softfloat.c" 793 11
+.delay_slot
+ 11488 "11100100" // ADD r17, r19, r17; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11489 "01000001" // /* MW 5 */
+ 11490 "00111011" // /* MW 4 */
+ 11491 "00010001" // /* MW 3 */
+ 11492 "01100010" // /* MW 2 */
+ 11493 "10011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 33
+.delay_slot
+ 11494 "10011000" // ADD r3, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11495 "00000000" // /* MW 3 */
+ 11496 "01000111" // /* MW 2 */
+ 11497 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11500 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11501 "01100111" // /* MW 3 */
+ 11502 "00000001" // /* MW 2 */
+ 11503 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_464
+.src_ref 10 "softfloat.c" 749 12 first
+ 11504 "10000100" // JNZ r19, #11632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11632 delay_slots=5 */
+ 11505 "00000001" // /* MW 5 */
+ 11506 "01000000" // /* MW 4 */
+ 11507 "10111000" // /* MW 3 */
+ 11508 "00010110" // /* MW 2 */
+ 11509 "10011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11511 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11513 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11515 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11517 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11519 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 750 12 first
+ 11520 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11521 "00000000" // /* MW 3 */
+ 11522 "00101000" // /* MW 2 */
+ 11523 "00010000" // /* MW 1 */
+.delay_slot
+ 11524 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11525 "10100000" // /* MW 3 */
+ 11526 "00010000" // /* MW 2 */
+ 11527 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11535 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_496
+.src_ref 10 "softfloat.c" 763 31 first
+.tail_call
+ 11536 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11537 "00000000" // /* MW 5 */
+ 11538 "00000000" // /* MW 4 */
+ 11539 "01000000" // /* MW 3 */
+ 11540 "00010100" // /* MW 2 */
+ 11541 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11543 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11549 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11551 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_512
+.src_ref 10 "softfloat.c" 777 22 first
+.return_address
+ 11552 "10011000" // OR r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11553 "00000101" // /* MW 3 */
+ 11554 "11100001" // /* MW 2 */
+ 11555 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 777 12
+ 11556 "10000100" // JNZ r16, #11648 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11648 delay_slots=5 */
+ 11557 "00000001" // /* MW 5 */
+ 11558 "01000000" // /* MW 4 */
+ 11559 "11000000" // /* MW 3 */
+ 11560 "00010110" // /* MW 2 */
+ 11561 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11563 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11565 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11567 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11571 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 778 12 first
+ 11572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11573 "00000000" // /* MW 3 */
+ 11574 "00101000" // /* MW 2 */
+ 11575 "00010000" // /* MW 1 */
+.delay_slot
+ 11576 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11577 "10100000" // /* MW 3 */
+ 11578 "00010000" // /* MW 2 */
+ 11579 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11581 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11583 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11585 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11586 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11587 "00011100" // /* MW 13 */
+ 11588 "00000000" // /* MW 12 */
+ 11589 "00000000" // /* MW 11 */
+ 11590 "01010111" // /* MW 10 */
+ 11591 "00011010" // /* MW 9 */
+ 11592 "01000000" // /* MW 8 */
+ 11593 "00000000" // /* MW 7 */
+ 11594 "00000000" // /* MW 6 */
+ 11595 "10110110" // /* MW 5 */
+ 11596 "00000010" // /* MW 4 */
+ 11597 "11110000" // /* MW 3 */
+ 11598 "00101100" // /* MW 2 */
+ 11599 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_560
+.src_ref 10 "softfloat.c" 780 25 first
+.src_ref 10 "softfloat.c" 780 62 first
+ 11600 "10100100" // RET lr; ADD.NC r16, r19, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11601 "10000010" // /* MW 5 */
+ 11602 "00110011" // /* MW 4 */
+ 11603 "00001000" // /* MW 3 */
+ 11604 "00000000" // /* MW 2 */
+ 11605 "00000101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 70
+.delay_slot
+ 11606 "00011000" // MOVX r17, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11607 "11101001" // /* MW 3 */
+ 11608 "11100010" // /* MW 2 */
+ 11609 "00010111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 70
+.delay_slot
+ 11610 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11611 "00011101" // /* MW 3 */
+ 11612 "00100001" // /* MW 2 */
+ 11613 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 66 first
+.delay_slot
+ 11614 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11615 "00000000" // /* MW 3 */
+ 11616 "00000001" // /* MW 2 */
+ 11617 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11619 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11620 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 11621 "10000001" // /* MW 11 */
+ 11622 "10101101" // /* MW 10 */
+ 11623 "00000000" // /* MW 9 */
+ 11624 "00000000" // /* MW 8 */
+ 11625 "00000000" // /* MW 7 */
+ 11626 "00000000" // /* MW 6 */
+ 11627 "00100000" // /* MW 5 */
+ 11628 "00000000" // /* MW 4 */
+ 11629 "11110000" // /* MW 3 */
+ 11630 "00101100" // /* MW 2 */
+ 11631 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_592
+.src_ref 10 "softfloat.c" 749 31 first
+.tail_call
+ 11632 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11633 "00000000" // /* MW 5 */
+ 11634 "00000000" // /* MW 4 */
+ 11635 "01000000" // /* MW 3 */
+ 11636 "00010100" // /* MW 2 */
+ 11637 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11639 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11641 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11643 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11647 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_608
+.src_ref 10 "softfloat.c" 777 38 first
+.tail_call
+.return_address
+ 11648 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11649 "00000000" // /* MW 5 */
+ 11650 "00000000" // /* MW 4 */
+ 11651 "01000000" // /* MW 3 */
+ 11652 "00010100" // /* MW 2 */
+ 11653 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11655 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11659 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11661 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL14addFloat32Sigsjji__end
+ 11663 "00000000" // /* MW 1 */
+.label _ZL14subFloat32Sigsjji
+.function subFloat32Sigs _ZL14subFloat32Sigsjji
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 81 14
+.src_ref 10 "softfloat.c" 805 first
+.function_start
+ 11664 "10111010" // MOVA r17, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11665 "10010000" // /* MW 9 */
+ 11666 "11111111" // /* MW 8 */
+ 11667 "00001111" // /* MW 7 */
+ 11668 "11111110" // /* MW 6 */
+ 11669 "00011111" // /* MW 5 */
+ 11670 "00000000" // /* MW 4 */
+ 11671 "00000000" // /* MW 3 */
+ 11672 "00110001" // /* MW 2 */
+ 11673 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14 first
+ 11674 "10011000" // LSHL r4, r2, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11675 "00011101" // /* MW 3 */
+ 11676 "10001001" // /* MW 2 */
+ 11677 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14
+ 11678 "10011000" // LSHL r18, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11679 "00011101" // /* MW 3 */
+ 11680 "01100101" // /* MW 2 */
+ 11681 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+ 11682 "10011000" // AND r20, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11683 "00000100" // /* MW 3 */
+ 11684 "01101001" // /* MW 2 */
+ 11685 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21 first
+ 11686 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11687 "10010000" // /* MW 3 */
+ 11688 "00110010" // /* MW 2 */
+ 11689 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11690 "00011000" // EXTEND.u8 r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11691 "10010000" // /* MW 3 */
+ 11692 "10110110" // /* MW 2 */
+ 11693 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+.src_ref 10 "softfloat.c" 816 9
+.src_ref 10 "softfloat.c" 817 9
+ 11694 "01100100" // AND r16, r2, r16; MOV r19, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11695 "00011101" // /* MW 5 */
+ 11696 "10100000" // /* MW 4 */
+ 11697 "10011001" // /* MW 3 */
+ 11698 "00100000" // /* MW 2 */
+ 11699 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 816 9 first
+ 11700 "10011000" // LSHL r17, r20, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11701 "00111101" // /* MW 3 */
+ 11702 "00100011" // /* MW 2 */
+ 11703 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 815 19 first
+.src_ref 10 "softfloat.c" 818 11
+.src_ref 10 "softfloat.c" 819 17
+.src_ref 10 "softfloat.c" 843 31
+ 11704 "01100100" // SUB r18, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11705 "00000001" // /* MW 5 */
+ 11706 "00100000" // /* MW 4 */
+ 11707 "00111100" // /* MW 3 */
+ 11708 "10110010" // /* MW 2 */
+ 11709 "11011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 818 11 first
+ 11710 "10011000" // LT r5, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11711 "00101010" // /* MW 3 */
+ 11712 "00001011" // /* MW 2 */
+ 11713 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 818 4
+ 11714 "10000100" // JNZ r5, #11904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11904 delay_slots=5 */
+ 11715 "00000001" // /* MW 5 */
+ 11716 "01000000" // /* MW 4 */
+ 11717 "01000000" // /* MW 3 */
+ 11718 "00010111" // /* MW 2 */
+ 11719 "00101000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 817 9 first
+.delay_slot
+ 11720 "10011000" // LSHL r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11721 "00111101" // /* MW 3 */
+ 11722 "00100001" // /* MW 2 */
+ 11723 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 14
+.src_ref 10 "softfloat.c" 851 14
+.src_ref 10 "softfloat.c" 859 13
+.src_ref 10 "softfloat.c" 862 9
+.delay_slot
+ 11724 "10111010" // MOVA r0, #255; MOVXM r4, #1073741824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11725 "00010000" // /* MW 9 */
+ 11726 "00000000" // /* MW 8 */
+ 11727 "10001000" // /* MW 7 */
+ 11728 "00000000" // /* MW 6 */
+ 11729 "00000000" // /* MW 5 */
+ 11730 "00010000" // /* MW 4 */
+ 11731 "00000000" // /* MW 3 */
+ 11732 "11100000" // /* MW 2 */
+ 11733 "00011111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 851 14 first
+.delay_slot
+ 11734 "10011000" // EQ r20, r27, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11735 "00000111" // /* MW 3 */
+ 11736 "11101000" // /* MW 2 */
+ 11737 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 862 9 first
+.delay_slot
+ 11738 "10011000" // OR r19, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11739 "01000101" // /* MW 3 */
+ 11740 "01100110" // /* MW 2 */
+ 11741 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 859 13 first
+.delay_slot
+ 11742 "10011000" // OR r4, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11743 "00000101" // /* MW 3 */
+ 11744 "00001001" // /* MW 2 */
+ 11745 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 819 17 first
+ 11746 "10011000" // GE r6, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11747 "10001001" // /* MW 3 */
+ 11748 "10001101" // /* MW 2 */
+ 11749 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 819 4
+ 11750 "10000100" // JNZ r6, #12064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12064 delay_slots=5 */
+ 11751 "00000001" // /* MW 5 */
+ 11752 "01000000" // /* MW 4 */
+ 11753 "10010000" // /* MW 3 */
+ 11754 "00010111" // /* MW 2 */
+ 11755 "00110000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.src_ref 10 "softfloat.c" 835 34
+.delay_slot
+ 11756 "00011000" // MOVX r5, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11757 "00000101" // /* MW 3 */
+ 11758 "00001010" // /* MW 2 */
+ 11759 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 835 34 first
+.delay_slot
+ 11760 "10011000" // XOR r7, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11761 "01010110" // /* MW 3 */
+ 11762 "11001110" // /* MW 2 */
+ 11763 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11765 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11767 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11769 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 14 first
+ 11770 "10011000" // EQ r20, r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11771 "00000111" // /* MW 3 */
+ 11772 "01101000" // /* MW 2 */
+ 11773 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 4
+ 11774 "10000100" // JNZ r20, #12176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12176 delay_slots=5 */
+ 11775 "00000001" // /* MW 5 */
+ 11776 "01000000" // /* MW 4 */
+ 11777 "11001000" // /* MW 3 */
+ 11778 "00010111" // /* MW 2 */
+ 11779 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11781 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11783 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11789 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+ 11790 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11791 "10100000" // /* MW 3 */
+ 11792 "01010011" // /* MW 2 */
+ 11793 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 838 8 first
+ 11794 "00011000" // ADD r16, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11795 "00000111" // /* MW 3 */
+ 11796 "10100000" // /* MW 2 */
+ 11797 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 837 4 first
+ 11798 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11799 "00100010" // /* MW 3 */
+ 11800 "00100001" // /* MW 2 */
+ 11801 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 843 31 first
+ 11802 "10011000" // SUB r16, r24, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11803 "00000001" // /* MW 3 */
+ 11804 "00100001" // /* MW 2 */
+ 11805 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11806 "10000100" // JZ r16, #11872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11872 delay_slots=5 */
+ 11807 "00000001" // /* MW 5 */
+ 11808 "00000000" // /* MW 4 */
+ 11809 "00110000" // /* MW 3 */
+ 11810 "00010111" // /* MW 2 */
+ 11811 "10000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 837 4 first
+.delay_slot
+ 11812 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11813 "00110010" // /* MW 3 */
+ 11814 "01100011" // /* MW 2 */
+ 11815 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11817 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11819 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11821 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11823 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+.src_ref 10 "softfloat-macros" 50 48
+ 11824 "10111010" // MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11825 "01011000" // /* MW 9 */
+ 11826 "00011111" // /* MW 8 */
+ 11827 "01001000" // /* MW 7 */
+ 11828 "00001110" // /* MW 6 */
+ 11829 "00111000" // /* MW 5 */
+ 11830 "00110000" // /* MW 4 */
+ 11831 "00000000" // /* MW 3 */
+ 11832 "00010100" // /* MW 2 */
+ 11833 "00000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11834 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11835 "00100100" // /* MW 3 */
+ 11836 "11100101" // /* MW 2 */
+ 11837 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11838 "10011000" // LSHL r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11839 "00101101" // /* MW 3 */
+ 11840 "01100101" // /* MW 2 */
+ 11841 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11842 "00011000" // NEZ r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11843 "11110000" // /* MW 3 */
+ 11844 "01100110" // /* MW 2 */
+ 11845 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11846 "10011000" // LT r27, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11847 "01001010" // /* MW 3 */
+ 11848 "00110111" // /* MW 2 */
+ 11849 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15 first
+ 11850 "10011000" // LSHL r17, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11851 "00111101" // /* MW 3 */
+ 11852 "01100010" // /* MW 2 */
+ 11853 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57
+ 11854 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11855 "11110000" // /* MW 3 */
+ 11856 "10100100" // /* MW 2 */
+ 11857 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25
+ 11858 "10011000" // OR r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11859 "00100101" // /* MW 3 */
+ 11860 "01100001" // /* MW 2 */
+ 11861 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 11862 "01111010" // NOPA; NOPS; SEL.EQZ r17, r19, r16, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11863 "00000010" // /* MW 9 */
+ 11864 "11100011" // /* MW 8 */
+ 11865 "00000100" // /* MW 7 */
+ 11866 "00000000" // /* MW 6 */
+ 11867 "01011011" // /* MW 5 */
+ 11868 "00000001" // /* MW 4 */
+ 11869 "11110000" // /* MW 3 */
+ 11870 "00101100" // /* MW 2 */
+ 11871 "00000000" // /* MW 1 */
+.label __ll2__ZL14subFloat32Sigsjji
+ 11872 "10000100" // J #12032 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12032 delay_slots=5 */
+ 11873 "00000000" // /* MW 5 */
+ 11874 "00000000" // /* MW 4 */
+ 11875 "10000000" // /* MW 3 */
+ 11876 "00010111" // /* MW 2 */
+ 11877 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 846 16 first
+.delay_slot
+ 11878 "10011000" // SUB r3, r4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11879 "00010001" // /* MW 3 */
+ 11880 "00000111" // /* MW 2 */
+ 11881 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11883 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11885 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11887 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11889 "00000000" // /* MW 15 */
+ 11890 "00000000" // /* MW 14 */
+ 11891 "01111000" // /* MW 13 */
+ 11892 "10100101" // /* MW 12 */
+ 11893 "00000001" // /* MW 11 */
+ 11894 "00000000" // /* MW 10 */
+ 11895 "00000000" // /* MW 9 */
+ 11896 "00000000" // /* MW 8 */
+ 11897 "01011011" // /* MW 7 */
+ 11898 "00000001" // /* MW 6 */
+ 11899 "00100000" // /* MW 5 */
+ 11900 "00000000" // /* MW 4 */
+ 11901 "11110000" // /* MW 3 */
+ 11902 "00101100" // /* MW 2 */
+ 11903 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_240
+.src_ref 10 "softfloat.c" 851 4 first
+ 11904 "10000100" // JNZ r20, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */
+ 11905 "00000001" // /* MW 5 */
+ 11906 "01000000" // /* MW 4 */
+ 11907 "11100000" // /* MW 3 */
+ 11908 "00010111" // /* MW 2 */
+ 11909 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11911 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11913 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11915 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11917 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11919 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+ 11920 "11111000" // MOV r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11921 "10100000" // /* MW 3 */
+ 11922 "00011101" // /* MW 2 */
+ 11923 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+ 11924 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11925 "10100000" // /* MW 3 */
+ 11926 "01010001" // /* MW 2 */
+ 11927 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4
+.src_ref 10 "softfloat.c" 855 14
+ 11928 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11929 "10100000" // /* MW 3 */
+ 11930 "11011100" // /* MW 2 */
+ 11931 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+ 11932 "11111000" // MOV r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11933 "00100000" // /* MW 3 */
+ 11934 "01010000" // /* MW 2 */
+ 11935 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 856 8 first
+ 11936 "00011000" // ADD r17, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11937 "11111111" // /* MW 3 */
+ 11938 "10100011" // /* MW 2 */
+ 11939 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4 first
+.src_ref 10 "softfloat.c" 855 14 first
+ 11940 "00011000" // SEL.EQZ r17, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11941 "00100010" // /* MW 3 */
+ 11942 "01100011" // /* MW 2 */
+ 11943 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11944 "10000100" // JZ r17, #12016 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12016 delay_slots=5 */
+ 11945 "00000001" // /* MW 5 */
+ 11946 "00000000" // /* MW 4 */
+ 11947 "01111000" // /* MW 3 */
+ 11948 "00010111" // /* MW 2 */
+ 11949 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4 first
+.src_ref 10 "softfloat.c" 855 14 first
+.delay_slot
+ 11950 "00011000" // SEL.EQZ r16, r16, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11951 "01000010" // /* MW 3 */
+ 11952 "00100000" // /* MW 2 */
+ 11953 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11957 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11959 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11961 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+.src_ref 10 "softfloat-macros" 50 48
+ 11962 "10111010" // MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11963 "01011000" // /* MW 9 */
+ 11964 "00011111" // /* MW 8 */
+ 11965 "10001000" // /* MW 7 */
+ 11966 "10001110" // /* MW 6 */
+ 11967 "00101000" // /* MW 5 */
+ 11968 "00110001" // /* MW 4 */
+ 11969 "00000000" // /* MW 3 */
+ 11970 "00000011" // /* MW 2 */
+ 11971 "00000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11972 "10011000" // AND r20, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11973 "01000100" // /* MW 3 */
+ 11974 "10101001" // /* MW 2 */
+ 11975 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11976 "10011000" // LSHL r20, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11977 "01001101" // /* MW 3 */
+ 11978 "00101001" // /* MW 2 */
+ 11979 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 11980 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11981 "00101101" // /* MW 3 */
+ 11982 "00100101" // /* MW 2 */
+ 11983 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11984 "10011000" // LT r27, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11985 "00111010" // /* MW 3 */
+ 11986 "01110110" // /* MW 2 */
+ 11987 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 11988 "00011000" // NEZ r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11989 "11110000" // /* MW 3 */
+ 11990 "00101000" // /* MW 2 */
+ 11991 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11992 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11993 "11110000" // /* MW 3 */
+ 11994 "00100000" // /* MW 2 */
+ 11995 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 11996 "10011000" // OR r17, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11997 "01000101" // /* MW 3 */
+ 11998 "10100011" // /* MW 2 */
+ 11999 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 12000 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12001 "00000000" // /* MW 15 */
+ 12002 "00000000" // /* MW 14 */
+ 12003 "01111000" // /* MW 13 */
+ 12004 "10100101" // /* MW 12 */
+ 12005 "00000001" // /* MW 11 */
+ 12006 "10010000" // /* MW 10 */
+ 12007 "00001000" // /* MW 9 */
+ 12008 "00100001" // /* MW 8 */
+ 12009 "01011011" // /* MW 7 */
+ 12010 "00000001" // /* MW 6 */
+ 12011 "00100000" // /* MW 5 */
+ 12012 "00000000" // /* MW 4 */
+ 12013 "11110000" // /* MW 3 */
+ 12014 "00101100" // /* MW 2 */
+ 12015 "00000000" // /* MW 1 */
+.label __ll1__ZL14subFloat32Sigsjji
+.src_ref 10 "softfloat.c" 864 16 first
+ 12016 "11100001" // NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12017 "00000000" // /* MW 15 */
+ 12018 "00000000" // /* MW 14 */
+ 12019 "01111000" // /* MW 13 */
+ 12020 "10100101" // /* MW 12 */
+ 12021 "00000001" // /* MW 11 */
+ 12022 "00001100" // /* MW 10 */
+ 12023 "00111000" // /* MW 9 */
+ 12024 "00100110" // /* MW 8 */
+ 12025 "01011011" // /* MW 7 */
+ 12026 "00000001" // /* MW 6 */
+ 12027 "00100000" // /* MW 5 */
+ 12028 "00000000" // /* MW 4 */
+ 12029 "11110000" // /* MW 3 */
+ 12030 "00101100" // /* MW 2 */
+ 12031 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_368
+.src_ref 10 "softfloat.c" 868 11 first
+.tail_call
+ 12032 "10000100" // J #10880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */
+ 12033 "00000000" // /* MW 5 */
+ 12034 "00000000" // /* MW 4 */
+ 12035 "01000000" // /* MW 3 */
+ 12036 "00010101" // /* MW 2 */
+ 12037 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4 first
+.delay_slot
+ 12038 "00011000" // ADD r2, r25, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12039 "11111111" // /* MW 3 */
+ 12040 "01000101" // /* MW 2 */
+ 12041 "00010110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12043 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12045 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12047 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12048 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12049 "00000000" // /* MW 15 */
+ 12050 "00000000" // /* MW 14 */
+ 12051 "01111000" // /* MW 13 */
+ 12052 "10100101" // /* MW 12 */
+ 12053 "00000001" // /* MW 11 */
+ 12054 "00000000" // /* MW 10 */
+ 12055 "00000000" // /* MW 9 */
+ 12056 "00000000" // /* MW 8 */
+ 12057 "01011011" // /* MW 7 */
+ 12058 "00000001" // /* MW 6 */
+ 12059 "00100000" // /* MW 5 */
+ 12060 "00000000" // /* MW 4 */
+ 12061 "11110000" // /* MW 3 */
+ 12062 "00101100" // /* MW 2 */
+ 12063 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_400
+.src_ref 10 "softfloat.c" 820 4 first
+.return_address
+ 12064 "10000100" // JNZ r20, #12256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12256 delay_slots=5 */
+ 12065 "00000001" // /* MW 5 */
+ 12066 "01000000" // /* MW 4 */
+ 12067 "11110000" // /* MW 3 */
+ 12068 "00010111" // /* MW 2 */
+ 12069 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12073 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12075 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12077 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12079 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 829 14 first
+ 12080 "10011000" // LTU r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12081 "00011100" // /* MW 3 */
+ 12082 "00100111" // /* MW 2 */
+ 12083 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 829 4
+ 12084 "10000100" // JNZ r19, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */
+ 12085 "00000001" // /* MW 5 */
+ 12086 "01000000" // /* MW 4 */
+ 12087 "00001000" // /* MW 3 */
+ 12088 "00011000" // /* MW 2 */
+ 12089 "10011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4 first
+.delay_slot
+ 12090 "00011000" // SEL.EQZ r24, r5, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12091 "10010010" // /* MW 3 */
+ 12092 "01110001" // /* MW 2 */
+ 12093 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.delay_slot
+ 12094 "11111000" // MOV r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12095 "10100000" // /* MW 3 */
+ 12096 "10011101" // /* MW 2 */
+ 12097 "00011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.delay_slot
+ 12098 "00011000" // SEL.EQZ r25, r5, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12099 "00100010" // /* MW 3 */
+ 12100 "01110011" // /* MW 2 */
+ 12101 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12103 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12105 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 830 14 first
+ 12106 "10011000" // LTU r18, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12107 "00001100" // /* MW 3 */
+ 12108 "01100101" // /* MW 2 */
+ 12109 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 830 4
+ 12110 "10000100" // JNZ r18, #12336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12336 delay_slots=5 */
+ 12111 "00000001" // /* MW 5 */
+ 12112 "01000000" // /* MW 4 */
+ 12113 "00011000" // /* MW 3 */
+ 12114 "00011000" // /* MW 2 */
+ 12115 "10010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12117 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12119 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12121 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12123 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12125 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31
+ 12126 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12127 "01111101" // /* MW 3 */
+ 12128 "00100000" // /* MW 2 */
+ 12129 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 24
+ 12130 "01000100" // MOVXM p0, #509172 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12131 "11101000" // /* MW 5 */
+ 12132 "11001001" // /* MW 4 */
+ 12133 "11000000" // /* MW 3 */
+ 12134 "00000111" // /* MW 2 */
+ 12135 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 24 first
+ 12136 "10011000" // LDA r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12137 "01010110" // /* MW 3 */
+ 12138 "00000110" // /* MW 2 */
+ 12139 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12141 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 12142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12143 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 4
+ 12144 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12145 "00000000" // /* MW 3 */
+ 12146 "00101000" // /* MW 2 */
+ 12147 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 44
+.delay_slot
+ 12148 "00011000" // MOVX r17, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12149 "00001101" // /* MW 3 */
+ 12150 "00100010" // /* MW 2 */
+ 12151 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12153 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12155 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 44
+.delay_slot
+ 12156 "10011000" // EQ r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12157 "00100111" // /* MW 3 */
+ 12158 "01100011" // /* MW 2 */
+ 12159 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 12160 "11100001" // NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12161 "00000000" // /* MW 15 */
+ 12162 "00000000" // /* MW 14 */
+ 12163 "01111000" // /* MW 13 */
+ 12164 "10100101" // /* MW 12 */
+ 12165 "00000001" // /* MW 11 */
+ 12166 "01101100" // /* MW 10 */
+ 12167 "00001000" // /* MW 9 */
+ 12168 "00100010" // /* MW 8 */
+ 12169 "01011011" // /* MW 7 */
+ 12170 "00000001" // /* MW 6 */
+ 12171 "00100000" // /* MW 5 */
+ 12172 "00000000" // /* MW 4 */
+ 12173 "11110000" // /* MW 3 */
+ 12174 "00101100" // /* MW 2 */
+ 12175 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_512
+.src_ref 10 "softfloat.c" 834 8 first
+ 12176 "10000100" // JNZ r16, #12368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12368 delay_slots=5 */
+ 12177 "00000001" // /* MW 5 */
+ 12178 "01000000" // /* MW 4 */
+ 12179 "00101000" // /* MW 3 */
+ 12180 "00011000" // /* MW 2 */
+ 12181 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12183 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12185 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12187 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12189 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12191 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31
+ 12192 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12193 "01111101" // /* MW 3 */
+ 12194 "00100000" // /* MW 2 */
+ 12195 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 835 8 first
+ 12196 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12197 "00000000" // /* MW 3 */
+ 12198 "00101000" // /* MW 2 */
+ 12199 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 12200 "10011000" // LSHL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12201 "00001101" // /* MW 3 */
+ 12202 "11100001" // /* MW 2 */
+ 12203 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 12204 "01000100" // MOVXM r17, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12205 "00000000" // /* MW 5 */
+ 12206 "10100000" // /* MW 4 */
+ 12207 "00001000" // /* MW 3 */
+ 12208 "10000000" // /* MW 2 */
+ 12209 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 12210 "10011000" // ADD r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12211 "00000000" // /* MW 3 */
+ 12212 "01000001" // /* MW 2 */
+ 12213 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12215 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12216 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 12217 "00011100" // /* MW 7 */
+ 12218 "00000000" // /* MW 6 */
+ 12219 "00000000" // /* MW 5 */
+ 12220 "00000100" // /* MW 4 */
+ 12221 "11110000" // /* MW 3 */
+ 12222 "00101100" // /* MW 2 */
+ 12223 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_560
+.src_ref 10 "softfloat.c" 852 8 first
+ 12224 "10000100" // JNZ r17, #12384 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12384 delay_slots=5 */
+ 12225 "00000001" // /* MW 5 */
+ 12226 "01000000" // /* MW 4 */
+ 12227 "00110000" // /* MW 3 */
+ 12228 "00011000" // /* MW 2 */
+ 12229 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12233 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12235 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12237 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12239 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 853 8 first
+ 12240 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12241 "00000000" // /* MW 3 */
+ 12242 "00101000" // /* MW 2 */
+ 12243 "00010000" // /* MW 1 */
+.delay_slot
+ 12244 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12245 "10100000" // /* MW 3 */
+ 12246 "00010000" // /* MW 2 */
+ 12247 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12249 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12251 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12255 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_592
+.src_ref 10 "softfloat.c" 821 18 first
+ 12256 "10011000" // OR r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12257 "00000101" // /* MW 3 */
+ 12258 "01100001" // /* MW 2 */
+ 12259 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 821 8
+ 12260 "10000100" // JNZ r16, #12400 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12400 delay_slots=5 */
+ 12261 "00000001" // /* MW 5 */
+ 12262 "01000000" // /* MW 4 */
+ 12263 "00111000" // /* MW 3 */
+ 12264 "00011000" // /* MW 2 */
+ 12265 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12267 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12269 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12271 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12273 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12275 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 823 8 first
+ 12276 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12277 "00000000" // /* MW 3 */
+ 12278 "00101000" // /* MW 2 */
+ 12279 "00010000" // /* MW 1 */
+.delay_slot
+ 12280 "01000100" // MOVXM r0, #2147483647 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12281 "11111110" // /* MW 5 */
+ 12282 "00111111" // /* MW 4 */
+ 12283 "11110000" // /* MW 3 */
+ 12284 "11111111" // /* MW 2 */
+ 12285 "01111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12287 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12289 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12292 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12293 "10000001" // /* MW 11 */
+ 12294 "10101101" // /* MW 10 */
+ 12295 "00000000" // /* MW 9 */
+ 12296 "00000000" // /* MW 8 */
+ 12297 "00000000" // /* MW 7 */
+ 12298 "00000000" // /* MW 6 */
+ 12299 "00100000" // /* MW 5 */
+ 12300 "00000000" // /* MW 4 */
+ 12301 "11110000" // /* MW 3 */
+ 12302 "00101100" // /* MW 2 */
+ 12303 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_640
+ 12304 "10000100" // J #12016 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12016 delay_slots=5 */
+ 12305 "00000000" // /* MW 5 */
+ 12306 "00000000" // /* MW 4 */
+ 12307 "01111000" // /* MW 3 */
+ 12308 "00010111" // /* MW 2 */
+ 12309 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+.delay_slot
+ 12310 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12311 "10100000" // /* MW 3 */
+ 12312 "01010001" // /* MW 2 */
+ 12313 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 864 16
+.delay_slot
+ 12314 "11111000" // MOV r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12315 "10100000" // /* MW 3 */
+ 12316 "11011000" // /* MW 2 */
+ 12317 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12319 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12321 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12322 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12323 "00011100" // /* MW 13 */
+ 12324 "00000000" // /* MW 12 */
+ 12325 "00000000" // /* MW 11 */
+ 12326 "01010111" // /* MW 10 */
+ 12327 "00011010" // /* MW 9 */
+ 12328 "01000000" // /* MW 8 */
+ 12329 "00000000" // /* MW 7 */
+ 12330 "00000000" // /* MW 6 */
+ 12331 "10110110" // /* MW 5 */
+ 12332 "00000010" // /* MW 4 */
+ 12333 "11110000" // /* MW 3 */
+ 12334 "00101100" // /* MW 2 */
+ 12335 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_672
+ 12336 "10000100" // J #11872 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11872 delay_slots=5 */
+ 12337 "00000000" // /* MW 5 */
+ 12338 "00000000" // /* MW 4 */
+ 12339 "00110000" // /* MW 3 */
+ 12340 "00010111" // /* MW 2 */
+ 12341 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 846 16
+.delay_slot
+ 12342 "11111000" // MOV r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12343 "00100000" // /* MW 3 */
+ 12344 "00011000" // /* MW 2 */
+ 12345 "00011001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+.delay_slot
+ 12346 "11111000" // MOV r25, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12347 "00100000" // /* MW 3 */
+ 12348 "01011100" // /* MW 2 */
+ 12349 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+.delay_slot
+ 12350 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12351 "10100000" // /* MW 3 */
+ 12352 "01010011" // /* MW 2 */
+ 12353 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12355 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12356 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12357 "10000001" // /* MW 11 */
+ 12358 "10101101" // /* MW 10 */
+ 12359 "00000000" // /* MW 9 */
+ 12360 "00000000" // /* MW 8 */
+ 12361 "00000000" // /* MW 7 */
+ 12362 "00000000" // /* MW 6 */
+ 12363 "00100000" // /* MW 5 */
+ 12364 "00000000" // /* MW 4 */
+ 12365 "11110000" // /* MW 3 */
+ 12366 "00101100" // /* MW 2 */
+ 12367 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_704
+.src_ref 10 "softfloat.c" 834 27 first
+.tail_call
+ 12368 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12369 "00000000" // /* MW 5 */
+ 12370 "00000000" // /* MW 4 */
+ 12371 "01000000" // /* MW 3 */
+ 12372 "00010100" // /* MW 2 */
+ 12373 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12375 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12377 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12379 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12383 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_720
+.src_ref 10 "softfloat.c" 852 27 first
+.tail_call
+.return_address
+ 12384 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12385 "00000000" // /* MW 5 */
+ 12386 "00000000" // /* MW 4 */
+ 12387 "01000000" // /* MW 3 */
+ 12388 "00010100" // /* MW 2 */
+ 12389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12391 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12395 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12397 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12399 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_736
+.src_ref 10 "softfloat.c" 821 34 first
+.tail_call
+.return_address
+ 12400 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12401 "00000000" // /* MW 5 */
+ 12402 "00000000" // /* MW 4 */
+ 12403 "01000000" // /* MW 3 */
+ 12404 "00010100" // /* MW 2 */
+ 12405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL14subFloat32Sigsjji__end
+ 12415 "00000000" // /* MW 1 */
+.label float32_add
+.function float32_add float32_add
+.src_ref 10 "softfloat.c" 92 12
+.src_ref 10 "softfloat.c" 878 first
+.function_start
+ 12416 "00011000" // MOVX r16, #-31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12417 "10000101" // /* MW 3 */
+ 12418 "11100000" // /* MW 2 */
+ 12419 "00010111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 92 12 first
+ 12420 "10011000" // LSHL r3, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12421 "00001101" // /* MW 3 */
+ 12422 "01000111" // /* MW 2 */
+ 12423 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 92 12
+ 12424 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12425 "00001101" // /* MW 3 */
+ 12426 "10100001" // /* MW 2 */
+ 12427 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 884 15 first
+ 12428 "10011000" // EQ r16, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12429 "00000111" // /* MW 3 */
+ 12430 "11100001" // /* MW 2 */
+ 12431 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 884 4
+ 12432 "10000100" // JNZ r16, #12464 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12464 delay_slots=5 */
+ 12433 "00000001" // /* MW 5 */
+ 12434 "01000000" // /* MW 4 */
+ 12435 "01011000" // /* MW 3 */
+ 12436 "00011000" // /* MW 2 */
+ 12437 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12447 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 888 15 first
+.tail_call
+ 12448 "10000100" // J #11664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11664 delay_slots=5 */
+ 12449 "00000000" // /* MW 5 */
+ 12450 "00000000" // /* MW 4 */
+ 12451 "11001000" // /* MW 3 */
+ 12452 "00010110" // /* MW 2 */
+ 12453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12455 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12457 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12459 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12461 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12463 "00000000" // /* MW 1 */
+.label TGT_Ffloat32_add_48
+.src_ref 10 "softfloat.c" 885 15 first
+.tail_call
+.return_address
+ 12464 "10000100" // J #11040 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */
+ 12465 "00000000" // /* MW 5 */
+ 12466 "00000000" // /* MW 4 */
+ 12467 "10010000" // /* MW 3 */
+ 12468 "00010101" // /* MW 2 */
+ 12469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12475 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label float32_add__end
+ 12479 "00000000" // /* MW 1 */
+.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src"
+.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer"
+.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common"
+.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc"
+.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail"
+.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2"
+.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib"
+.dir 7 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p"
+.dir 8 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend"
+.dir 9 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib"
+.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/softfloat"
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.cmico
new file mode 100644
index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.cmico
@@ -0,0 +1 @@
++Mdec
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.lst
new file mode 100644
index 0000000000000000000000000000000000000000..da538ba51f010cb935d6faf7c98cc539440d5b5d
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.lst
@@ -0,0 +1,4815 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me
+
+// Release: ipp V-2024.06-TGT-241219
+
+.text_segment PM 2352
+.entry_point
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function_start
+ 2352 0x00 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p0]; MOV r0, r15
+ 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 2364 0xff 0x73 0xb0 0x01 0xe8 0x50 0x70 0x02 ST p7, [sp, #-8]; MOV r15, r1
+ 2372 0xff 0x82 0xb0 0x1f 0xa7 0x83 0xb0 0x60 0x79 0x3a ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0
+ 2382 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+ 2386 0x00 0x00 NOPX
+ 2388 0x00 0x00 NOPX
+ 2390 0x18 0x68 0x02 0x18 ADD.NC p0, r16, #4
+ 2394 0x00 0x1e 0x16 0x98 LDA r16, [p0], #4
+ 2398 0x00 0x3e 0x56 0x98 LDA r18, [p0], #12
+ 2402 0x00 0xee 0x36 0x98 LDA r17, [p0], #-8
+ 2406 0x00 0x07 0x76 0x98 LDA r27, [p0]
+ 2410 0x00 0x00 NOPX
+ 2412 0x00 0x00 NOPX
+ 2414 0x00 0x00 NOPX
+ 2416 0x00 0x00 NOPX
+ 2418 0x00 0x00 NOPX
+ 2420 0x00 0x00 NOPX
+ 2422 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27
+ 2426 0x08 0xd6 0x11 0x98 ST r16, [p0, #-12]
+ 2430 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 2434 0x00 0x00 NOPX
+ 2436 0x00 0x00 NOPX
+ 2438 0x00 0x00 NOPX
+ 2440 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26
+ 2444 0x00 0x00 NOPX
+ 2446 0x00 0x00 NOPX
+ 2448 0x00 0x00 NOPX
+ 2450 0x07 0x2c 0x1e 0x98 LDA p0, [p7], #8
+ 2454 0x07 0xfc 0x9e 0x98 LDA p1, [p7], #-4
+ 2458 0x07 0x05 0x1e 0x98 LDA p2, [p7]
+.no_stack_arguments
+ 2462 0x00 0x0e 0xb8 0x00 0x01 0x04 JL #7536
+.delay_slot
+ 2468 0x0f 0xf3 0x55 0x98 ST r26, [sp, #-16]
+.delay_slot
+.swstall delay_slot
+ 2472 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2474 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2476 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2478 0x00 0x00 NOPX
+.return_address
+ 2480 0x07 0xf6 0x16 0x98 LDA r16, [p7, #-4]
+ 2484 0x07 0xf3 0x51 0x18 LDA r26, [sp, #-16]
+ 2488 0x00 0x00 NOPX
+ 2490 0x00 0x00 NOPX
+ 2492 0x00 0x00 NOPX
+ 2494 0x00 0x00 NOPX
+ 2496 0x00 0x00 NOPX
+ 2498 0x18 0x68 0x08 0x18 ADD.NC p0, r16, #16
+ 2502 0x00 0x06 0x16 0x98 LDA r16, [p0]
+ 2506 0x10 0x22 0x05 0x18 MOVX r17, #1
+ 2510 0x00 0x00 NOPX
+ 2512 0x00 0x00 NOPX
+ 2514 0x00 0x00 NOPX
+ 2516 0x00 0x00 NOPX
+ 2518 0x00 0x00 NOPX
+ 2520 0x14 0x15 0x18 0x18 REL.COND r16, r17, r26
+ 2524 0xfe 0x87 0x2d 0xaf 0x41 0xd4 LDA lr, [sp, #-12]; MOV r27, r15
+ 2530 0x00 0xf6 0x16 0x98 LDA r16, [p0, #-4]
+ 2534 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+ 2538 0x00 0x00 NOPX
+ 2540 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+ 2544 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+ 2550 0x00 0x00 NOPX
+ 2552 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 2556 0x14 0x63 0x01 0x98 SUB r17, r17, r16
+.delay_slot
+ 2560 0x14 0x21 0x12 0x18 SEL.EQZ r16, r16, r17, r27
+.delay_slot
+ 2564 0x08 0xf6 0x11 0x98 ST r16, [p0, #-4]
+.delay_slot
+.swstall delay_slot
+ 2568 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2570 0x00 0x00 NOPX
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+
+.text_segment PM 2576
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.function_start
+ 2576 0x23 0x8e 0xd3 0x80 0x8b 0x3e 0x67 0x68 0x09 0x60 0x78 0x76 LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1
+ 2588 0x02 0x07 0x00 0x3e 0x25 0x09 0x30 0x07 0x08 0xba MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28
+ 2598 0x00 0x7e 0x00 0x3e 0x17 0xa8 0x08 0x60 0x78 0xba MOVA r30, #3; MOVX r1, #-3; MOV r0, p0
+ 2608 0xff 0xe5 0x00 0x00 0x00 0x3c 0x8f 0xfc 0x10 0xba MOVA r5, #-1; MOVXM r4, #65528
+ 2618 0xff 0x90 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r16, #-4; PADDXM [sp], #64
+ 2628 0x1c 0x60 0x17 0x18 ADD.NC p4, r0, #46
+ 2632 0x00 0x00 NOPX
+ 2634 0x08 0x1c 0x71 0x98 ST r3, [p0], #4
+ 2638 0x01 0x1f 0x56 0x98 LDA r26, [p1], #4
+ 2642 0x00 0x00 NOPX
+ 2644 0x00 0x00 NOPX
+ 2646 0x00 0x00 NOPX
+ 2648 0x00 0x00 NOPX
+ 2650 0x00 0x00 NOPX
+ 2652 0x00 0x00 NOPX
+ 2654 0x03 0xea 0x3d 0x44 0x89 0x5c ST r26, [p0], #4; AND r17, r26, r4
+ 2660 0x23 0xf6 0xd0 0x06 0x4d 0x7e 0xcc 0x48 0xa8 0xba LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4
+ 2670 0x16 0xa4 0x6d 0x98 LSHL r18, r26, r6
+ 2674 0x11 0x0c 0x1d 0x98 LSHL r6, r4, r1
+ 2678 0xd4 0x43 0xb0 0xb2 0xff 0x24 LSHL r17, r26, r1; ADD.NC r1, r18, #-1
+ 2684 0x00 0x00 NOPX
+ 2686 0x00 0x00 NOPX
+ 2688 0x00 0x00 NOPX
+ 2690 0x03 0xf6 0x3e 0x9c 0x4c 0x5c ST r29, [p0], #4; MAC r7, r7, r29, r2
+ 2696 0x23 0x8a 0xd7 0xff 0xb5 0x80 0x07 0x49 0xaf 0xfa LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26
+ 2706 0x10 0xe9 0xdf 0x98 MUL r20, r3, r29
+ 2710 0x10 0xf8 0x4f 0x98 MUL r28, r3, r4
+ 2714 0x17 0x6b 0xed 0x98 LSHL r21, r29, r30
+ 2718 0xec 0x8b 0xbd 0xb5 0xd0 0x24 LSHL r18, r29, r5; ADD.NC r27, r21, #-48
+ 2724 0x14 0xaf 0xff 0x18 ADD r23, r18, #-1
+ 2728 0x17 0x7b 0x6f 0x98 MUL r29, r29, r22
+ 2732 0x03 0x8a 0x3f 0x60 0x55 0x5c ST r2, [p0], #4; LT r24, r30, r2
+ 2738 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 2742 0x00 0x00 NOPX
+ 2744 0x00 0x00 NOPX
+ 2746 0x00 0x00 NOPX
+ 2748 0x00 0x00 NOPX
+ 2750 0x00 0x00 NOPX
+ 2752 0x00 0x00 NOPX
+ 2754 0x03 0x85 0x30 0x03 0xf0 0x0e 0x70 0x02 ST el0, [p0], #4; MOV r31, el0
+ 2762 0x01 0x04 0x0e 0x98 LDA eh0, [p1]
+ 2766 0x00 0x00 NOPX
+ 2768 0x00 0x00 NOPX
+ 2770 0x00 0x00 NOPX
+ 2772 0x00 0x00 NOPX
+ 2774 0x00 0x00 NOPX
+ 2776 0x00 0x00 NOPX
+ 2778 0x00 0x81 0x30 0x03 0x30 0x8e 0x70 0x02 ST eh0, [p0]; MOV r25, eh0
+ 2786 0x01 0x17 0xd6 0x98 LDA r30, [p1, #4]
+ 2790 0x00 0x00 NOPX
+ 2792 0xc0 0x05 0xb0 0x40 0x01 0x84 JNZ r24, #2912
+.delay_slot
+ 2798 0x17 0x27 0x0d 0x98 LSHL r19, r28, r16
+.delay_slot
+ 2802 0x17 0xf3 0x9f 0x98 MUL r25, r31, r25
+.delay_slot
+ 2806 0xa5 0x0b 0xb2 0xb1 0xff 0x24 LSHL r20, r20, r5; ADD.NC r5, r17, #-1
+.delay_slot
+ 2812 0x11 0x21 0x0d 0x98 LSHL r16, r4, r16
+.delay_slot
+ 2816 0x02 0xfa 0x3c 0xff 0xdf 0x5c ST r30, [p0, #4]; MUL r31, r25, r30
+ 2822 0x10 0x38 0x05 0x18 MOVX r28, #1
+ 2826 0x10 0xb9 0xc7 0x98 EQ r28, r2, r28
+ 2830 0xe0 0x07 0xe0 0x40 0x01 0x84 JNZ r28, #4032
+.delay_slot
+.swstall delay_slot
+ 2836 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2838 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2840 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2842 0x00 0x00 NOPX
+.delay_slot
+ 2844 0x10 0xed 0xff 0x18 ADD r22, r3, #-1
+ 2848 0x10 0x22 0x09 0x18 MOVX r17, #2
+ 2852 0x14 0x62 0x27 0x98 EQ r17, r17, r2
+ 2856 0x88 0x07 0xa0 0x40 0x01 0x84 JNZ r17, #3904
+.delay_slot
+.swstall delay_slot
+ 2862 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2864 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2866 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2868 0x00 0x00 NOPX
+.delay_slot
+ 2870 0x10 0x0e 0x0d 0x18 MOVX r7, #3
+ 2874 0x11 0xc4 0x27 0x98 EQ r2, r7, r2
+ 2878 0x10 0x07 0x50 0x40 0x01 0x84 JNZ r2, #3744
+.delay_slot
+.swstall delay_slot
+ 2884 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2886 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2888 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2890 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2892 0x00 0x00 NOPX
+ 2894 0x00 0x06 0xf0 0x00 0x00 0x84 J #3552
+.delay_slot
+ 2900 0x10 0x34 0x11 0x18 MOVX r26, #4
+.delay_slot
+.swstall delay_slot
+ 2904 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2906 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2908 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2910 0x00 0x00 NOPX
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336
+ 2912 0x10 0x3a 0x15 0x18 MOVX r29, #5
+ 2916 0x17 0x70 0x2a 0x98 LT r24, r29, r2
+ 2920 0xc0 0x06 0x50 0x40 0x01 0x84 JNZ r24, #3232
+.delay_slot
+.swstall delay_slot
+ 2926 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2928 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2930 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2932 0x00 0x00 NOPX
+.delay_slot
+ 2934 0x10 0x34 0x11 0x18 MOVX r26, #4
+ 2938 0x16 0xa2 0x27 0x98 EQ r17, r26, r2
+ 2942 0x88 0x06 0x10 0x40 0x01 0x84 JNZ r17, #3104
+.delay_slot
+.swstall delay_slot
+ 2948 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2950 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2952 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2954 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2956 0x00 0x00 NOPX
+ 2958 0x17 0x44 0x28 0x98 NE r2, r29, r2
+ 2962 0x10 0x06 0xf0 0x40 0x01 0x84 JNZ r2, #3552
+.delay_slot
+.swstall delay_slot
+ 2968 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2970 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2972 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2974 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2976 0x00 0x00 NOPX
+ 2978 0x83 0xd6 0xe0 0x00 0x22 0x08 0x07 0xec 0x58 0xba ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20
+ 2988 0x1f 0x9c 0xa0 0xf8 MOV r30, r25
+ 2992 0x00 0x00 NOPX
+ 2994 0x00 0x00 NOPX
+ 2996 0x00 0x00 NOPX
+ 2998 0x00 0x00 NOPX
+ 3000 0x00 0x00 NOPX
+ 3002 0x04 0x1c 0xf7 0x18 ST.s16 r7, [p4], #2
+ 3006 0x00 0x00 NOPX
+ 3008 0x00 0x00 NOPX
+ 3010 0x00 0x00 NOPX
+ 3012 0x00 0x00 NOPX
+ 3014 0x00 0x00 NOPX
+ 3016 0x00 0x00 NOPX
+ 3018 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2
+ 3022 0x00 0x00 NOPX
+ 3024 0x00 0x00 NOPX
+ 3026 0x00 0x00 NOPX
+ 3028 0x00 0x00 NOPX
+ 3030 0x00 0x00 NOPX
+ 3032 0x00 0x00 NOPX
+ 3034 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+ 3038 0x00 0x00 NOPX
+ 3040 0x00 0x00 NOPX
+ 3042 0x00 0x00 NOPX
+ 3044 0x00 0x00 NOPX
+ 3046 0x00 0x00 NOPX
+ 3048 0x00 0x00 NOPX
+ 3050 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2
+ 3054 0x00 0x00 NOPX
+ 3056 0x00 0x00 NOPX
+ 3058 0x00 0x00 NOPX
+ 3060 0x00 0x00 NOPX
+ 3062 0x00 0x00 NOPX
+ 3064 0x00 0x00 NOPX
+ 3066 0x04 0x08 0x57 0x18 ST.s16 r2, [p4], m0
+ 3070 0x00 0x00 NOPX
+ 3072 0x00 0x00 NOPX
+ 3074 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+.swstall delay_slot
+ 3080 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3082 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3084 0x00 0x00 NOPX
+.delay_slot
+ 3086 0x0c 0x06 0x51 0x98 ST r18, [p4]
+.delay_slot
+ 3090 0x00 0x2c 0xf8 0x29 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r6, [p4, #4]; NOPM; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528
+ 3104 0x83 0x92 0xe0 0x3e 0x67 0xa8 0x48 0x10 0x58 0xba ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16
+ 3114 0xfd 0x80 0x80 0x0c 0x22 0x33 0xd0 0x0e 0x78 0xba MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0
+ 3124 0x00 0x00 NOPX
+ 3126 0x00 0x00 NOPX
+ 3128 0x00 0x00 NOPX
+ 3130 0x00 0x00 NOPX
+ 3132 0x00 0x00 NOPX
+ 3134 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+ 3138 0x00 0x00 NOPX
+ 3140 0x00 0x00 NOPX
+ 3142 0x00 0x00 NOPX
+ 3144 0x00 0x00 NOPX
+ 3146 0x00 0x00 NOPX
+ 3148 0x00 0x00 NOPX
+ 3150 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2
+ 3154 0x00 0x00 NOPX
+ 3156 0x00 0x00 NOPX
+ 3158 0x00 0x00 NOPX
+ 3160 0x00 0x00 NOPX
+ 3162 0x00 0x00 NOPX
+ 3164 0x00 0x00 NOPX
+ 3166 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+ 3170 0x00 0x00 NOPX
+ 3172 0x00 0x00 NOPX
+ 3174 0x00 0x00 NOPX
+ 3176 0x00 0x00 NOPX
+ 3178 0x00 0x00 NOPX
+ 3180 0x00 0x00 NOPX
+ 3182 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2
+ 3186 0x00 0x00 NOPX
+ 3188 0x00 0x00 NOPX
+ 3190 0x00 0x00 NOPX
+ 3192 0x00 0x00 NOPX
+ 3194 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3196 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3198 0x04 0x08 0x37 0x18 ST.s16 r1, [p4], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3202 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3204 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3206 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3212 0x10 0x02 0x41 0x18 MOVX r1, #16
+.delay_slot
+.swstall delay_slot
+ 3216 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3218 0x00 0x00 NOPX
+.delay_slot
+ 3220 0x0c 0x14 0x71 0x98 ST r3, [p4, #4]
+.delay_slot
+ 3224 0x80 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p4]; NOPM
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656
+ 3232 0xff 0x8e 0x20 0x10 0x32 0x2c LDA r3, [sp, #-4]; MOVX r4, #6
+ 3238 0x10 0x88 0x47 0x98 EQ r4, r2, r4
+ 3242 0x20 0x06 0xa8 0x40 0x01 0x84 JNZ r4, #3408
+.delay_slot
+ 3248 0x10 0x02 0x41 0x18 MOVX r1, #16
+.delay_slot
+.swstall delay_slot
+ 3252 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3254 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3256 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3258 0x00 0x00 NOPX
+ 3260 0x10 0x06 0x1d 0x18 MOVX r3, #7
+ 3264 0x10 0xc4 0x28 0x98 NE r2, r3, r2
+ 3268 0x10 0x06 0xf0 0x40 0x01 0x84 JNZ r2, #3552
+.delay_slot
+.swstall delay_slot
+ 3274 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3276 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3278 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3280 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3282 0x00 0x00 NOPX
+ 3284 0x83 0x86 0xe0 0x26 0x2f 0xf8 0x07 0xec 0x58 0xba ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20
+ 3294 0xff 0x43 0x00 0x00 0x00 0x40 0x40 0x00 0x10 0xba MOVA r3, #-6; MOVXM dj0, #65536
+ 3304 0xe0 0xc7 0xbc 0x20 0x01 0x64 LSHL r3, r28, r3; MOV r24, #0
+ 3310 0x00 0x00 NOPX
+ 3312 0x00 0x00 NOPX
+ 3314 0x00 0x00 NOPX
+ 3316 0x00 0x00 NOPX
+ 3318 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2
+ 3322 0x00 0x00 NOPX
+ 3324 0x00 0x00 NOPX
+ 3326 0x00 0x00 NOPX
+ 3328 0x00 0x00 NOPX
+ 3330 0x00 0x00 NOPX
+ 3332 0x00 0x00 NOPX
+ 3334 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+ 3338 0x00 0x00 NOPX
+ 3340 0x00 0x00 NOPX
+ 3342 0x00 0x00 NOPX
+ 3344 0x00 0x00 NOPX
+ 3346 0x00 0x00 NOPX
+ 3348 0x00 0x00 NOPX
+ 3350 0x0c 0x1c 0x41 0x98 ST dj0, [p4], #4
+ 3354 0x04 0x0b 0x17 0x18 ST.s16 r24, [p4], m0
+ 3358 0x00 0x00 NOPX
+ 3360 0x00 0x00 NOPX
+ 3362 0x00 0x00 NOPX
+ 3364 0x00 0x00 NOPX
+ 3366 0x00 0x00 NOPX
+ 3368 0x00 0x00 NOPX
+ 3370 0x0c 0x07 0x51 0x98 ST r26, [p4]
+ 3374 0x0c 0x14 0x71 0x98 ST r3, [p4, #4]
+ 3378 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+ 3384 0x1f 0x9f 0xa0 0xf8 MOV r30, r31
+.delay_slot
+.swstall delay_slot
+ 3388 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3390 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3392 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3394 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832
+ 3408 0x83 0x86 0xe0 0x06 0x2b 0x70 0x48 0x10 0x58 0xba ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16
+ 3418 0xfd 0x80 0x80 0x3e 0x47 0xa8 0xd0 0x0e 0x78 0xba MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0
+ 3428 0x10 0xc8 0x4d 0x98 LSHL r4, r3, r4
+ 3432 0xf7 0x8d 0xf1 0xa4 0xff 0x24 MUL r30, r30, r6; ADD.NC r3, r4, #-1
+ 3438 0x00 0x00 NOPX
+ 3440 0x00 0x00 NOPX
+ 3442 0x00 0x00 NOPX
+ 3444 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2
+ 3448 0x00 0x00 NOPX
+ 3450 0x00 0x00 NOPX
+ 3452 0x00 0x00 NOPX
+ 3454 0x00 0x00 NOPX
+ 3456 0x00 0x00 NOPX
+ 3458 0x00 0x00 NOPX
+ 3460 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2
+ 3464 0x00 0x00 NOPX
+ 3466 0x00 0x00 NOPX
+ 3468 0x00 0x00 NOPX
+ 3470 0x00 0x00 NOPX
+ 3472 0x00 0x00 NOPX
+ 3474 0x00 0x00 NOPX
+ 3476 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+ 3480 0x00 0x00 NOPX
+ 3482 0x00 0x00 NOPX
+ 3484 0x00 0x00 NOPX
+ 3486 0x00 0x00 NOPX
+ 3488 0x00 0x00 NOPX
+ 3490 0x00 0x00 NOPX
+ 3492 0x04 0x1c 0x77 0x18 ST.s16 r3, [p4], #2
+ 3496 0x00 0x00 NOPX
+ 3498 0x00 0x00 NOPX
+ 3500 0x00 0x00 NOPX
+ 3502 0x00 0x00 NOPX
+ 3504 0x00 0x00 NOPX
+ 3506 0x00 0x00 NOPX
+ 3508 0x04 0x08 0x37 0x18 ST.s16 r1, [p4], m0
+ 3512 0x00 0x00 NOPX
+ 3514 0x00 0x00 NOPX
+ 3516 0x00 0x00 NOPX
+ 3518 0x00 0x00 NOPX
+ 3520 0x00 0x00 NOPX
+ 3522 0x00 0x00 NOPX
+ 3524 0x0c 0x06 0x31 0x98 ST r17, [p4]
+ 3528 0x82 0xd2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r20, [p4, #4]; NOPM
+.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ 3536 0x18 0x80 0x40 0xb8 MOV dj0, #32
+ 3540 0x60 0x7a 0xe0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX
+.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ 3552 0x03 0x08 0x80 0xc0 0x1e 0x14 MOVA m2, #24; ADD.NC p0, r0, #30
+ 3558 0x43 0x8a 0xd0 0x00 0x02 0x08 0x07 0xe2 0x58 0xba LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30
+ 3568 0x40 0x8e 0x52 0x00 0x99 0x54 LDA.s16 r3, [p2]; MOV m1, #38
+ 3574 0x02 0x14 0x36 0x98 LDA r1, [p2, #4]
+ 3578 0x00 0x00 NOPX
+ 3580 0x00 0x00 NOPX
+ 3582 0x00 0x2f 0xf7 0x18 ST.s16 r31, [p0], #4
+ 3586 0x00 0x00 NOPX
+ 3588 0x00 0x00 NOPX
+ 3590 0x00 0x00 NOPX
+ 3592 0x00 0x00 NOPX
+ 3594 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3596 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3598 0x00 0x5f 0x17 0x18 ST.s16 r24, [p0], #10
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3602 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3604 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3606 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3608 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3610 0x10 0x30 0x01 0x18 MOVX r24, #0
+ 3614 0x00 0x00 NOPX
+ 3616 0x00 0xcf 0x17 0x18 ST.s16 r24, [p0], #-8
+ 3620 0x00 0x48 0x9a 0x98 LDA.u16 r4, [p0], m2
+ 3624 0x00 0x00 NOPX
+ 3626 0x00 0x00 NOPX
+ 3628 0x00 0x00 NOPX
+ 3630 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3632 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3634 0x00 0xfc 0x17 0x18 ST.s16 r0, [p0], #-2
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3638 0x21 0x35 0xb2 0xa4 0xff 0x24 LSHL r4, r4, r26; ADD.NC r5, r4, #-1
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3644 0x10 0x00 0x41 0x98 SUB r0, r0, r4
+ 3648 0x00 0x00 NOPX
+ 3650 0x00 0x00 NOPX
+ 3652 0x00 0x00 NOPX
+ 3654 0x00 0x00 NOPX
+ 3656 0x00 0x08 0xb7 0x18 ST.s16 r5, [p0], m0
+ 3660 0x00 0x00 NOPX
+ 3662 0x00 0x00 NOPX
+ 3664 0x00 0x00 NOPX
+ 3666 0x00 0x00 NOPX
+ 3668 0x00 0x00 NOPX
+ 3670 0x00 0x00 NOPX
+ 3672 0x00 0x2a 0x77 0x18 ST.s16 r19, [p0], m1
+ 3676 0x00 0x00 NOPX
+ 3678 0x00 0x00 NOPX
+ 3680 0x00 0x00 NOPX
+ 3682 0x00 0x00 NOPX
+ 3684 0x00 0x00 NOPX
+ 3686 0x00 0x00 NOPX
+ 3688 0x00 0xec 0x47 0x18 ST.s8 r2, [p0], #-2
+ 3692 0x00 0x00 NOPX
+ 3694 0x00 0x00 NOPX
+ 3696 0x00 0x00 NOPX
+ 3698 0x00 0x00 NOPX
+ 3700 0x00 0x00 NOPX
+ 3702 0x00 0x00 NOPX
+ 3704 0x00 0x04 0x77 0x18 ST.s16 r3, [p0]
+ 3708 0x00 0x00 NOPX
+ 3710 0x00 0x00 NOPX
+ 3712 0x00 0x00 NOPX
+ 3714 0x00 0x00 NOPX
+ 3716 0x00 0x00 NOPX
+ 3718 0x00 0x00 NOPX
+ 3720 0x00 0xe4 0x27 0x18 ST.s8 r1, [p0, #-2]
+ 3724 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 3728 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 3734 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3736 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3738 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3740 0x00 0x01 0x67 0x98 NOPA
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168
+ 3744 0x83 0xd6 0xe0 0x00 0x00 0x3c 0xaf 0xf4 0x10 0xba ST.s16 r21, [p4], #2; MOVXM r5, #65512
+ 3754 0xff 0x8a 0x20 0x0a 0x7d 0x04 0x07 0xec 0x58 0xba LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20
+ 3764 0x00 0x9a 0x00 0x00 0x00 0x3c 0xcf 0xff 0x90 0xba MOVA r26, #4; MOVXM r6, #65535
+ 3774 0x10 0xe2 0x60 0x98 ADD r17, r3, r6
+ 3778 0x14 0x7a 0x46 0x18 MAC r29, r29, r17, r4
+ 3782 0x14 0x6a 0x4e 0x18 MSC r21, r21, r17, r4
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3786 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3788 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3792 0x11 0xc4 0x2f 0x98 MUL r2, r7, r2
+ 3796 0x00 0x00 NOPX
+ 3798 0x00 0x00 NOPX
+ 3800 0x00 0x00 NOPX
+ 3802 0x00 0x00 NOPX
+ 3804 0x00 0x00 NOPX
+ 3806 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2
+ 3810 0x00 0x00 NOPX
+ 3812 0x00 0x00 NOPX
+ 3814 0x00 0x00 NOPX
+ 3816 0x00 0x00 NOPX
+ 3818 0x00 0x00 NOPX
+ 3820 0x00 0x00 NOPX
+ 3822 0x04 0x1e 0xb7 0x18 ST.s16 r21, [p4], #2
+ 3826 0x00 0x00 NOPX
+ 3828 0x00 0x00 NOPX
+ 3830 0x00 0x00 NOPX
+ 3832 0x00 0x00 NOPX
+ 3834 0x00 0x00 NOPX
+ 3836 0x00 0x00 NOPX
+ 3838 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2
+ 3842 0x00 0x00 NOPX
+ 3844 0x00 0x00 NOPX
+ 3846 0x00 0x00 NOPX
+ 3848 0x00 0x00 NOPX
+ 3850 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3852 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3854 0x04 0x08 0x57 0x18 ST.s16 r2, [p4], m0
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3858 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3860 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3862 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3868 0x10 0x0a 0x41 0x18 MOVX r5, #16
+.delay_slot
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3872 0x11 0x45 0xd1 0x98 SUB r2, r5, r29
+.delay_slot
+ 3876 0x19 0xa1 0x1c 0xf8 MOV r6, eh0
+.delay_slot
+ 3880 0x80 0x8e 0x30 0x00 0x01 0xa5 0x70 0x02 ST r3, [p4]; NOPM
+.delay_slot
+ 3888 0x00 0x2c 0xf0 0x00 0x24 0x16 0x11 0xbd 0xe3 0x7c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328
+ 3904 0x83 0x92 0xe0 0x00 0x42 0x08 0x07 0xec 0x58 0xba ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20
+ 3914 0xff 0x86 0x20 0x06 0x2d 0x70 0x48 0x08 0x58 0xba LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8
+ 3924 0x00 0xc8 0x2d 0x20 0x11 0x64 MOVX r3, #16; MOV r26, #4
+ 3930 0x00 0x00 NOPX
+ 3932 0x00 0x00 NOPX
+ 3934 0x00 0x00 NOPX
+ 3936 0x00 0x00 NOPX
+ 3938 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2
+ 3942 0x00 0x00 NOPX
+ 3944 0x00 0x00 NOPX
+ 3946 0x00 0x00 NOPX
+ 3948 0x00 0x00 NOPX
+ 3950 0x00 0x00 NOPX
+ 3952 0x00 0x00 NOPX
+ 3954 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2
+ 3958 0x00 0x00 NOPX
+ 3960 0x00 0x00 NOPX
+ 3962 0x00 0x00 NOPX
+ 3964 0x00 0x00 NOPX
+ 3966 0x00 0x00 NOPX
+ 3968 0x00 0x00 NOPX
+ 3970 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2
+ 3974 0x00 0x00 NOPX
+ 3976 0x00 0x00 NOPX
+ 3978 0x00 0x00 NOPX
+ 3980 0x00 0x00 NOPX
+ 3982 0x00 0x00 NOPX
+ 3984 0x00 0x00 NOPX
+ 3986 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2
+ 3990 0x00 0x00 NOPX
+ 3992 0x00 0x00 NOPX
+ 3994 0x00 0x00 NOPX
+ 3996 0x00 0x00 NOPX
+ 3998 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 4000 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 4002 0x04 0x08 0x77 0x18 ST.s16 r3, [p4], m0
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4006 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4008 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4010 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4016 0x10 0x46 0x26 0x18 MAC r3, r3, r1, r2
+.delay_slot
+.swstall delay_slot
+ 4020 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4022 0x00 0x00 NOPX
+.delay_slot
+ 4024 0x0c 0x04 0xd1 0x98 ST r6, [p4]
+.delay_slot
+ 4028 0x0c 0x16 0x51 0x98 ST r18, [p4, #4]
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456
+ 4032 0x04 0x1e 0xb7 0x18 ST.s16 r21, [p4], #2
+ 4036 0x00 0x00 NOPX
+ 4038 0x00 0x00 NOPX
+ 4040 0x00 0x00 NOPX
+ 4042 0x00 0x00 NOPX
+ 4044 0x00 0x00 NOPX
+ 4046 0x00 0x00 NOPX
+ 4048 0x04 0x1c 0xf7 0x18 ST.s16 r7, [p4], #2
+ 4052 0x00 0x00 NOPX
+ 4054 0x00 0x00 NOPX
+ 4056 0x00 0x00 NOPX
+ 4058 0x00 0x00 NOPX
+ 4060 0x00 0x00 NOPX
+ 4062 0x00 0x00 NOPX
+ 4064 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2
+ 4068 0x00 0x00 NOPX
+ 4070 0x00 0x00 NOPX
+ 4072 0x00 0x00 NOPX
+ 4074 0x07 0xfc 0x71 0x18 LDA r3, [sp, #-4]
+ 4078 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 4080 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 4082 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4086 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4088 0x00 0x00 0xf0 0xbf 0xc0 0x44 MOVXM r1, #65504
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4094 0x10 0x45 0xa0 0x98 ADD r2, r1, r26
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4098 0x18 0x44 0xc0 0xa0 0x41 0x64 MAC r1, r1, r3, r2; MOV r1, #16
+ 4104 0x00 0x00 NOPX
+ 4106 0x00 0x00 NOPX
+ 4108 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2
+ 4112 0x00 0x00 NOPX
+ 4114 0x00 0x00 NOPX
+ 4116 0x00 0x00 NOPX
+ 4118 0x00 0x00 NOPX
+ 4120 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 4122 0x18 0x0f 0xd8 0xb8 MOV m0, #-20
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 4126 0x04 0x08 0xb7 0x18 ST.s16 r5, [p4], m0
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4130 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4132 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4134 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4140 0xb1 0x49 0xc2 0xa0 0x41 0x64 MSC r5, r5, r22, r4; MOV r5, #16
+.delay_slot
+.swstall delay_slot
+ 4146 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4148 0x00 0x00 NOPX
+.delay_slot
+ 4150 0x0c 0x06 0x91 0x98 ST r20, [p4]
+.delay_slot
+ 4154 0x82 0xc6 0x30 0x01 0xa0 0x8b 0xd0 0x8e 0x79 0x3a ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0
+
+.text_segment PM 4176
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+.function_start
+ 4176 0x42 0x82 0xd0 0x3e 0x47 0xc8 0x87 0xe8 0x58 0xba LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24
+ 4186 0x45 0x86 0xd0 0x3e 0x27 0xaa 0x08 0x06 0x58 0xba LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6
+ 4196 0x4f 0x96 0xd0 0x01 0x80 0x08 0x68 0x60 0x78 0xba LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0
+ 4206 0x02 0x2a 0x52 0x98 LDA.s16 r18, [p2], m1
+ 4210 0x02 0x1c 0xd6 0x98 LDA r6, [p2], #4
+ 4214 0x02 0x2c 0xf6 0x98 LDA r7, [p2], #8
+ 4218 0x02 0x06 0x36 0x98 LDA r17, [p2]
+ 4222 0x10 0x26 0x4e 0x98 ASHL r19, r0, r4
+ 4226 0x02 0x24 0x96 0x98 LDA r4, [p2, #8]
+ 4230 0x11 0x68 0x2e 0x98 ASHL r20, r5, r2
+ 4234 0x18 0x49 0x72 0xf8 VBCST.16 x0, r18
+ 4238 0x00 0x00 NOPX
+ 4240 0x14 0xe5 0x4f 0x98 MUL r18, r19, r20
+ 4244 0x10 0x67 0x11 0x98 SUB r19, r1, r17
+ 4248 0x14 0xe7 0x2f 0x98 MUL r19, r19, r18
+ 4252 0x14 0x63 0x2f 0x98 MUL r17, r17, r18
+ 4256 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+ 4260 0xc4 0x23 0x34 0xc3 0x82 0xa4 GE r16, r24, r17; ADD.NC p2, r3, r16
+ 4266 0x80 0x08 0xa0 0x40 0x01 0x84 JNZ r16, #4416
+.delay_slot
+ 4272 0x18 0x00 0x92 0xf8 VMOV bmll0, x0
+.delay_slot
+.swstall delay_slot
+ 4276 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4278 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4280 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4282 0x00 0x00 NOPX
+ 4284 0x00 0x00 0x11 0xe2 0x60 0x44 MOVXM ls, #4400
+ 4290 0x00 0x00 0x16 0xe2 0x60 0x44 MOVXM le, #4400
+ 4296 0x00 0x2b 0x60 0x02 0xbc 0x50 0x70 0x02 NOPS; MOV lc, r17
+ 4304 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4336 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4352 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4384 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224
+.loop_nesting 1
+.begin_of_loop
+.end_of_loop
+ 4400 0x00 0x2c 0xf0 0x00 0x22 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240
+.loop_nesting 0
+ 4416 0x00 0x86 0x00 0x0b 0x00 0xfe 0x29 0xcc 0xa8 0xba MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6
+ 4426 0x04 0x62 0x32 0x87 0xff 0x24 SUB r17, r0, r17; ADD.NC dn1, r7, #-1
+ 4432 0x14 0x62 0x6d 0x98 LSHL r17, r17, r6
+ 4436 0x04 0x4e 0x32 0x11 0x10 0x24 SUB r17, r0, r7; ADD.NC m1, r17, #16
+ 4442 0x11 0xe1 0x0f 0x98 MUL r16, r7, r16
+ 4446 0x14 0x4c 0x6d 0x98 LSHL r6, r17, r6
+ 4450 0x81 0x85 0xd4 0xc3 0x32 0xa4 ASHL r6, r16, r2; ADD.NC p2, r3, r6
+ 4456 0x16 0x0e 0x69 0x98 GE r7, r24, r6
+ 4460 0x38 0x09 0x08 0x40 0x01 0x84 JNZ r7, #4624
+.delay_slot
+.swstall delay_slot
+ 4466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4468 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4470 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4472 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4474 0x00 0x00 NOPX
+ 4476 0x00 0x07 0x80 0x00 0x00 0x04 0x79 0x00 0x10 0xba MOVA dc1, #0; MOVXM ls, #4608
+ 4486 0x02 0x06 0x80 0x00 0x00 0x05 0xb9 0x00 0x10 0xba MOVA dj1, #16; MOVXM le, #4608
+ 4496 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb9 0x90 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV
+ 4512 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4528 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4544 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4560 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4576 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4592 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432
+.loop_nesting 1
+.begin_of_loop
+.end_of_loop
+ 4608 0x00 0x2c 0xf0 0x00 0x22 0x30 0x2e 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448
+.loop_nesting 0
+ 4624 0x7f 0xff 0xf3 0xbf 0xf0 0x44 MOVXM r7, #2147483640
+ 4630 0x11 0xce 0x44 0x98 AND r7, r7, r4
+ 4634 0x11 0x4e 0x71 0x98 SUB r7, r5, r7
+ 4638 0x11 0xce 0x0f 0x98 MUL r7, r7, r0
+ 4642 0x11 0x04 0x2e 0x98 ASHL r2, r4, r2
+ 4646 0x11 0x48 0x41 0x98 SUB r4, r5, r4
+ 4650 0x10 0x84 0x0f 0x98 MUL r2, r2, r0
+ 4654 0x11 0x00 0x0f 0x98 MUL r0, r4, r0
+ 4658 0x08 0x45 0xf3 0x20 0x05 0x64 MUL r1, r1, r2; MOV r6, #1
+ 4664 0x10 0x00 0x6d 0x98 LSHL r0, r0, r6
+ 4668 0xc0 0x03 0x34 0xc3 0x02 0xa4 GE r0, r24, r1; ADD.NC p2, r3, r0
+ 4674 0x00 0x09 0x70 0x40 0x01 0x84 JNZ r0, #4832
+.delay_slot
+.swstall delay_slot
+ 4680 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4682 0x00 0x00 NOPX
+.delay_slot
+ 4684 0x11 0xc8 0x6d 0x98 LSHL r4, r7, r6
+.delay_slot
+ 4688 0x18 0x02 0x08 0x18 ADD.NC m0, r4, #16
+.delay_slot
+ 4692 0x18 0x41 0x7f 0x98 ADD.NC dn0, r2, #-1
+ 4696 0x00 0x03 0x80 0x00 0x00 0x04 0x79 0x68 0x10 0xba MOVA dc0, #0; MOVXM ls, #4816
+ 4706 0x02 0x02 0x80 0x00 0x00 0x05 0xb9 0x68 0x10 0xba MOVA dj0, #16; MOVXM le, #4816
+ 4716 0x1d 0x70 0xa0 0xf8 MOV lc, r1
+ 4720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4784 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4800 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640
+.loop_nesting 1
+.begin_of_loop
+.end_of_loop
+ 4816 0x00 0x2c 0xf0 0x00 0x22 0x10 0x2e 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656
+.loop_nesting 0
+ 4832 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+ 4836 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4838 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4840 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4842 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4844 0x00 0x00 NOPX
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0
+
+.text_segment PM 4848
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.function_start
+ 4848 0x18 0xd4 0xc0 0xf8 MOV r3, p2
+ 4852 0x6b 0x91 0x60 0x03 0xb0 0xcb 0x00 0x02 MOVS dn3, p7; ADD.NC p7, r3, #44
+ 4860 0x07 0x8c 0x1a 0x98 LDA.u16 r0, [p7], #-16
+ 4864 0x00 0x00 NOPX
+ 4866 0x00 0x00 NOPX
+ 4868 0x00 0x00 NOPX
+ 4870 0x00 0x00 NOPX
+ 4872 0x00 0x00 NOPX
+ 4874 0x00 0x00 NOPX
+ 4876 0x00 0x09 0xf0 0x40 0x01 0x84 JNZ r0, #5088
+.delay_slot
+ 4882 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+ 4886 0x18 0xc2 0x72 0xf8 VBCST.32 x1, r16
+.delay_slot
+.swstall delay_slot
+ 4890 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4892 0x00 0x00 NOPX
+.delay_slot
+ 4894 0x00 0x20 0x00 0x00 0x01 0xc4 PADDXM [sp], #256
+ 4900 0x1a 0x80 0x48 0xb8 MOV dj2, #36
+ 4904 0x02 0x40 0x36 0x98 LDA r1, [p2, dj2]
+ 4908 0x00 0x00 NOPX
+ 4910 0x00 0x00 NOPX
+ 4912 0x00 0x00 NOPX
+ 4914 0x00 0x00 NOPX
+ 4916 0x00 0x00 NOPX
+ 4918 0x00 0x00 NOPX
+ 4920 0x14 0x04 0x19 0x98 GE r2, r16, r1
+ 4924 0x10 0x09 0xf0 0x40 0x01 0x84 JNZ r2, #5088
+.delay_slot
+ 4930 0x1a 0x02 0x92 0xf8 VMOV bmll2, x1
+.delay_slot
+.swstall delay_slot
+ 4934 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4936 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4938 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4940 0x00 0x00 NOPX
+ 4942 0x00 0x2c 0xf3 0x84 0x8b 0x00 0x00 0x04 0x79 0xe8 0x10 0x76 NOPA; MOVS p3, p1; MOVXM ls, #5072
+ 4954 0x00 0x00 0x16 0xe7 0xa0 0x44 MOVXM le, #5072
+ 4960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb8 0x50 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV
+ 4976 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 4992 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 5008 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 5024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 5040 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 5056 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224
+.loop_nesting 1
+.begin_of_loop
+.end_of_loop
+ 5072 0x00 0x2c 0xf0 0x00 0x23 0x1d 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240
+.loop_nesting 0
+ 5088 0x1c 0x00 0x40 0xb8 MOV m4, #32
+ 5092 0x07 0x8a 0x3a 0x98 LDA.u16 r17, [p7], m4
+ 5096 0xff 0xda 0x5a 0x1f 0x19 0x54 LDA.s16 r22, [p7], #-2; MOV m5, #-58
+ 5102 0xf5 0x6b 0x51 0x00 0xb9 0x54 LDA.u16 r26, [p7], m5; MOV dj0, #46
+ 5108 0xe0 0x52 0x59 0xbd 0x81 0xd4 LDA.s16 r20, [p7, dj0]; MOV r19, p7
+ 5114 0xe0 0x4e 0x56 0xd3 0x38 0x14 LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56
+ 5120 0x03 0xde 0xb2 0x98 LDA.s16 r21, [p3], #-6
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 5124 0x03 0xff 0x9a 0x98 LDA.u16 r28, [p3], #-2
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 5128 0x67 0xc6 0x50 0x1c 0x12 0x2c LDA.s16 r17, [p3], #6; MOVX r7, #2
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5134 0x7e 0xca 0x50 0x3f 0x27 0xca 0x60 0x00 0x58 0xba LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5144 0xe0 0x1e 0x52 0x10 0x4b 0x23 0x29 0x6c 0xc8 0x01 0x58 0x76 LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5156 0x60 0xef 0x52 0x5a 0x0b 0x2c 0x73 0xec 0x48 0x3c 0x58 0x76 LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5168 0x08 0x0a 0x83 0x84 0x8b 0x29 0x43 0x6d 0x01 0xd0 0x78 0x76 MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5180 0x6a 0x12 0xb0 0x27 0x33 0x6e 0x85 0x10 0x78 0xba VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5190 0x15 0x41 0x30 0x2b 0x33 0x6f 0x04 0xd0 0x78 0xba VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5200 0x8c 0x4d 0xba 0xf2 0xfe 0x24 LSHL r17, r17, r6; ADD.NC lc, r18, #-2
+ 5206 0x94 0x4d 0xb1 0x11 0x41 0xe4 LSHL r17, r18, r6; MOV dj0, r17
+ 5212 0x19 0x01 0x30 0x10 0x4b 0x0e 0x63 0x6c 0x04 0xd0 0x78 0x76 VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19
+ 5224 0x0b 0x81 0x67 0x03 0x20 0xe4 0x14 0x30 0x3d 0x4a MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2
+ 5234 0x1d 0x21 0x34 0x5b 0x0b 0x02 0x44 0x50 0x72 0xba VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17
+ 5244 0x03 0x31 0x33 0x93 0x01 0xd4 VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 5250 0x15 0x41 0x30 0x04 0x11 0x80 0x3d 0x62 VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5258 0x03 0x50 0x95 0x98 VLDA.2D bmll1, [p3], d2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5262 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5264 0x00 0x00 0x00 0x8f 0x4c 0x02 0x10 0x28 0x3d 0x5a MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5274 0x19 0x01 0x37 0x10 0x01 0xd4 VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5280 0x08 0x06 0x80 0x00 0x24 0x84 0x8b 0x00 0x44 0x08 0x82 0x00 0x78 0xa1 0x81 0xeb MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5296 0x1d 0x21 0x30 0x00 0x21 0x5a 0x0b 0x00 0x00 0x05 0xba 0x90 0x10 0x90 0x61 0xeb VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5312 0x00 0x19 0x89 0x98 VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5316 0x6a 0x12 0xb0 0x00 0x20 0x00 0xad 0x8e 0x11 0x80 0x3d 0x66 VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5328 0x15 0x41 0x30 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5344 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5360 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x81 0x41 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5376 0x19 0x01 0x30 0x00 0x24 0x31 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5392 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0xa1 0x81 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5408 0x1d 0x21 0x30 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x90 0x61 0xeb VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2
+.loop_nesting 0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5424 0xec 0x07 0x50 0x00 0x00 0x0c 0xaf 0xc0 0x10 0xba LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5434 0x03 0x31 0x32 0x15 0x72 0xe2 0x11 0x80 0x3d 0x4a VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5444 0x18 0x41 0x72 0xf8 VBCST.16 x0, r16
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5448 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5450 0x10 0x28 0x3d 0x48 VADD.f dm0, dm1, dm2, r2
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5454 0x0c 0x31 0x06 0x98 VST.2D bmll2, [p4], d1
+ 5458 0x00 0x00 NOPX
+ 5460 0x00 0x02 0x5f 0xf9 0x12 0x0c 0x3d 0x62 ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2
+ 5468 0x11 0x40 0x08 0x98 NE r0, r5, r0
+ 5472 0x00 0x0c 0x70 0x40 0x01 0x84 JNZ r0, #6368
+.delay_slot
+.swstall delay_slot
+ 5478 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5480 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5482 0x00 0x00 NOPX
+.delay_slot
+ 5484 0x0c 0x31 0x06 0x98 VST.2D bmll2, [p4], d1
+.delay_slot
+.swstall delay_slot
+ 5488 0x00 0x00 NOPX
+ 5490 0x46 0x9a 0xd0 0x14 0x1a 0x2c LDA r6, [p2, #12]; MOVX r5, #3
+ 5496 0x00 0x00 NOPX
+ 5498 0x00 0x00 NOPX
+ 5500 0x00 0x00 NOPX
+ 5502 0x00 0x00 NOPX
+ 5504 0x00 0x00 NOPX
+ 5506 0x00 0x00 NOPX
+ 5508 0x11 0x4e 0x69 0x98 GE r7, r5, r6
+ 5512 0x38 0x0e 0x40 0x40 0x01 0x84 JNZ r7, #7296
+.delay_slot
+ 5518 0x10 0x00 0x11 0x18 MOVX r0, #4
+.delay_slot
+.swstall delay_slot
+ 5522 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5524 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5526 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5528 0x00 0x00 NOPX
+ 5530 0x11 0x8a 0x08 0x98 NE r5, r6, r0
+ 5534 0x28 0x0c 0xb8 0x40 0x01 0x84 JNZ r5, #6512
+.delay_slot
+.swstall delay_slot
+ 5540 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5542 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5544 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5546 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5548 0x00 0x00 NOPX
+ 5550 0x24 0x40 0xa9 0x83 0xc1 0xe4 MOVX r17, #257; MOV dc4, lr
+ 5556 0x00 0x00 0xfa 0xbf 0xfe 0x44 MOVXM r21, #65535
+ 5562 0x00 0x2c 0xf0 0x50 0x02 0x2c NOPA; MOVX r20, #0
+.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 5568 0x08 0x0a 0x82 0x83 0x0b 0x00 0x52 0x08 0x48 0x3c 0x58 0x76 MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60
+ 5580 0x48 0x1a 0x50 0x11 0x02 0x2c LDA.s16 r6, [p2, dj2]; MOVX r4, #32
+ 5586 0x00 0x00 NOPX
+ 5588 0x00 0x00 NOPX
+ 5590 0x00 0x00 NOPX
+ 5592 0x00 0x00 NOPX
+ 5594 0x00 0x00 NOPX
+ 5596 0x00 0x01 0x67 0x98 NOPA
+ 5600 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0c 0x52 0xf4 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV
+.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 5616 0x04 0x8a 0x87 0xfd 0xa5 0x80 0x01 0xf3 0xb2 0x78 0x10 0x76 MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168
+ 5628 0xe0 0xdc 0x57 0xfa 0x65 0x80 0x50 0x08 0x8b 0x39 0x78 0x76 LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5
+ 5640 0x48 0x1a 0xd7 0x84 0x8b 0x3f 0x67 0xe8 0x02 0x49 0x78 0x76 LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2
+ 5652 0x03 0xf8 0x00 0x02 0xd2 0x01 0x02 0x49 0x78 0xba MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2
+ 5662 0x02 0x19 0x00 0x00 0x00 0x04 0x7b 0x40 0x10 0xba MOVA r25, #16; MOVXM ls, #5760
+ 5672 0xff 0x94 0xb0 0x00 0x00 0x05 0xbc 0x60 0x10 0xba VLDA wl2, [sp, #-32]; MOVXM le, #6336
+ 5682 0x10 0x74 0x01 0x18 MOVX r26, #64
+ 5686 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 5688 0x15 0xfa 0x80 0x18 MOVX crRnd, r23
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 5692 0x08 0x02 0xc0 0x02 0xb9 0x80 0x00 0x02 VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5700 0x19 0xa0 0x92 0xf8 VMOV x3, x0
+ 5704 0x02 0xa6 0x92 0xe6 0x10 0x40 0x83 0x62 VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2
+ 5712 0x1b 0x2a 0x92 0xf8 VMOV x6, x5
+ 5716 0x00 0x00 NOPX
+ 5718 0x00 0x00 NOPX
+ 5720 0x00 0x00 NOPX
+ 5722 0x00 0x00 NOPX
+ 5724 0x09 0xc0 0x16 0x18 VCONV.bf16.fp32 wl3, bmll0
+ 5728 0x00 0x00 NOPX
+ 5730 0x10 0x06 0x83 0x48 VMSC.f dm0, dm0, x3, x4, r2
+ 5734 0x00 0x00 NOPX
+ 5736 0x00 0x00 NOPX
+ 5738 0x00 0x00 NOPX
+ 5740 0x00 0x00 NOPX
+ 5742 0x00 0x00 NOPX
+ 5744 0x00 0x2c 0xf0 0x00 0x22 0xc0 0x16 0x00 0x71 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912
+.loop_nesting 1
+.begin_of_loop
+ 5760 0x23 0xbe 0x89 0xa5 0x25 0xf4 VLDB x7, [p1], #64; VMOV bmhh4, x9
+ 5766 0x1b 0xd6 0x92 0xf8 VMOV bmhh3, x11
+ 5770 0x1f 0x1e 0xc0 0xf8 MOV r28, p7
+ 5774 0x17 0x3b 0x84 0x98 AND r29, r28, r24
+ 5778 0xee 0xc9 0x5e 0x3d 0xe0 0x24 LT r27, r29, r4; ADD.NC r28, r29, #-32
+ 5784 0x15 0xbd 0xdd 0x98 LSHL r30, r22, r29
+ 5788 0x16 0xbf 0xd1 0x98 SUB r31, r26, r29
+ 5792 0x2f 0xbc 0x48 0x70 0xcd 0xa4 SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25
+ 5798 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8
+ 5802 0x1c 0x4e 0x22 0xf8 VMOV wl8, wh7
+ 5806 0x1d 0x4f 0x22 0xf8 VMOV wl10, wl7
+ 5810 0x1c 0x90 0x92 0xf8 VMOV bmhl4, x8
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5814 0x1b 0x94 0x92 0xf8 VMOV bmhl3, x10
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5818 0x02 0x12 0x8a 0xe6 0x13 0x28 0x3d 0x62 VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5826 0x19 0x0e 0x8a 0xf8 VMOV cml1, cmh3
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5830 0x04 0x4e 0x22 0xe6 0x12 0x50 0x3d 0x62 VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5838 0x1a 0x0e 0x92 0xf8 VMOV bmll2, x7
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5842 0x1c 0xc0 0x66 0xd8 VSHIFT x9, x8, x0, r25
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5846 0x01 0x10 0x92 0xe6 0x14 0x30 0x3d 0x62 VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5854 0x1c 0x12 0x92 0xf8 VMOV bmll4, x9
+ 5858 0x1c 0x2c 0x12 0xf8 VMOV x8, bmll3
+ 5862 0x1c 0xd1 0x22 0xf8 VMOV wl9, wl8
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5866 0x1c 0x48 0x66 0xd8 VSHIFT x8, x9, x0, r25
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5870 0x01 0x10 0x92 0xe6 0x11 0x64 0x3d 0x62 VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5878 0x1b 0x12 0x92 0xf8 VMOV bmll3, x9
+ 5882 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 5886 0x1d 0x40 0x1e 0xd8 VSHIFT x10, x8, x0, r7
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5890 0x04 0x30 0x12 0xe6 0x12 0x4c 0x3d 0x62 VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 5898 0x1b 0x14 0x92 0xf8 VMOV bmll3, x10
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5902 0x04 0x40 0x1e 0xc6 0x13 0x8c 0x3d 0x62 VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5910 0x1b 0x10 0x92 0xf8 VMOV bmll3, x8
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 5914 0x1c 0x24 0x12 0xf8 VMOV x8, bmll1
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 5918 0x04 0x40 0x1e 0xc6 0x11 0x30 0x3d 0x62 VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5926 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 5930 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 5934 0x04 0x40 0x02 0xc6 0x12 0x50 0x3d 0x62 VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5942 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8
+ 5946 0x1c 0x2c 0x12 0xf8 VMOV x8, bmll3
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 5950 0x1c 0x40 0x02 0xd8 VSHIFT x8, x8, x0, r0
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5954 0x05 0x24 0x12 0xe6 0x13 0x70 0x3d 0x62 VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2
+.aggressive_scheduled_block_id 10
+.nohwbrkpt
+.noswbrkpt
+ 5962 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5966 0x05 0x50 0x02 0xc6 0x10 0x30 0x3d 0x62 VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5974 0x1c 0x14 0x92 0xf8 VMOV bmll4, x10
+ 5978 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2
+ 5982 0x1d 0xe2 0x01 0xb8 VEXTRACT.32 r23, x8, #0, vaddSign0
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 5986 0x1d 0x2c 0x12 0xf8 VMOV x10, bmll3
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 5990 0xe2 0xd0 0x83 0x54 0x03 0x74 VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 5996 0x1d 0xa0 0x12 0xf8 VMOV x11, bmll0
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6000 0xe0 0xd4 0x8a 0xb4 0x06 0xb4 VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6006 0x1c 0xd4 0xa0 0x38 VSEL.32 x9, x10, x9, r20
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6010 0x1d 0x10 0xd1 0x78 VINSERT.32 x10, x2, #0, r6
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6014 0x1c 0x12 0xf1 0x78 VINSERT.32 x8, x2, #0, r23
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6018 0x1d 0xd3 0x22 0xf8 VMOV wl11, wl9
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6022 0x1d 0x93 0x22 0xf8 VMOV wh11, wl9
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6026 0x1c 0x15 0x22 0xf8 VMOV wh8, wl10
+ 6030 0x1c 0x5c 0x00 0x38 VSEL.32 x8, x11, x8, r16
+ 6034 0x1c 0x0c 0x08 0x38 VSEL.32 x8, x1, x8, r17
+ 6038 0x1b 0xc3 0xa8 0x38 VSEL.32 x7, x8, x7, r21
+ 6042 0x18 0x0e 0x92 0xf8 VMOV bmll0, x7
+ 6046 0x1c 0xac 0x92 0xf8 VMOV x9, x6
+ 6050 0x68 0x02 0xc0 0x01 0x07 0x49 0x70 0x02 VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7
+ 6058 0x1c 0x32 0x92 0xf8 VMOV x8, x9
+ 6062 0x05 0xbb 0xcd 0xed 0xea 0x0f 0x12 0x4c 0x83 0x5a LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id first
+ 6072 0x00 0x0b 0x3e 0x91 0x11 0xec 0xa1 0x62 SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2
+.aggressive_scheduled_block_id 12
+.noswbrkpt
+ 6080 0x05 0xa5 0xe2 0x33 0x09 0x2f 0x10 0xec 0x61 0x5a SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6090 0x13 0xec 0x01 0x48 VMUL.f dm3, x6, x0, r2
+ 6094 0x00 0x00 NOPX
+ 6096 0x00 0x00 NOPX
+ 6098 0x0c 0xc1 0x16 0x18 VCONV.bf16.fp32 wl9, bmll2
+ 6102 0x00 0x00 NOPX
+ 6104 0x12 0x52 0x83 0x48 VMSC.f dm2, dm2, x9, x4, r2
+ 6108 0x00 0x00 NOPX
+ 6110 0x00 0x00 NOPX
+ 6112 0x00 0x00 NOPX
+ 6114 0x00 0x00 NOPX
+ 6116 0x00 0x00 NOPX
+ 6118 0x0c 0x41 0x16 0x18 VCONV.bf16.fp32 wl8, bmll2
+ 6122 0x00 0x00 NOPX
+ 6124 0x14 0xf0 0xa1 0x48 VMUL.f dm4, x8, x5, r2
+ 6128 0x12 0xf0 0x61 0x48 VMUL.f dm2, x8, x3, r2
+ 6132 0x00 0x00 NOPX
+ 6134 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id first
+ 6136 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 13
+.noswbrkpt
+ 6138 0x12 0xf2 0xa1 0x48 VMUL.f dm2, x9, x5, r2
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6142 0x19 0x70 0x12 0xf8 VMOV lfl0, bmll4
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6146 0x14 0x88 0x3d 0x48 VADD.f dm4, dm4, dm2, r2
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6150 0x1c 0x05 0x92 0xf8 VMOV bmll4, lfl0
+ 6154 0x00 0x00 NOPX
+ 6156 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id first
+ 6158 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 14
+.noswbrkpt
+ 6160 0x12 0xe1 0x01 0x48 VMUL.f dm2, x0, x8, r2
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6164 0x18 0x70 0x12 0xf8 VMOV lfh0, bmll4
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6168 0x14 0x88 0x3d 0x48 VADD.f dm4, dm4, dm2, r2
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6172 0x1c 0x01 0x92 0xf8 VMOV bmll4, lfh0
+ 6176 0x00 0x00 NOPX
+ 6178 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id first
+ 6180 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 15
+.noswbrkpt
+ 6182 0x11 0xf2 0x61 0x48 VMUL.f dm1, x9, x3, r2
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6186 0x19 0x70 0x12 0xf8 VMOV lfl0, bmll4
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6190 0x14 0x84 0x3d 0x48 VADD.f dm4, dm4, dm1, r2
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6194 0x1c 0x05 0x92 0xf8 VMOV bmll4, lfl0
+ 6198 0x00 0x00 NOPX
+ 6200 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id first
+ 6202 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 16
+.noswbrkpt
+ 6204 0x11 0xf2 0x01 0x48 VMUL.f dm1, x9, x0, r2
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6208 0x18 0x70 0x12 0xf8 VMOV lfh0, bmll4
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6212 0x14 0x84 0x3d 0x48 VADD.f dm4, dm4, dm1, r2
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6216 0x1c 0x01 0x92 0xf8 VMOV bmll4, lfh0
+ 6220 0x00 0x00 NOPX
+ 6222 0x00 0x00 NOPX
+ 6224 0x00 0x00 NOPX
+ 6226 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id first
+ 6228 0x1d 0x70 0x12 0xf8 VMOV lfl1, bmll4
+.aggressive_scheduled_block_id 17
+.noswbrkpt
+ 6232 0x12 0x88 0x3d 0x48 VADD.f dm2, dm4, dm2, r2
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6236 0x1c 0x15 0x92 0xf8 VMOV bmll4, lfl1
+ 6240 0x00 0x00 NOPX
+ 6242 0x00 0x00 NOPX
+ 6244 0x00 0x00 NOPX
+ 6246 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id first
+ 6248 0x1c 0x68 0x12 0xf8 VMOV lfh1, bmll2
+.aggressive_scheduled_block_id 18
+.noswbrkpt
+ 6252 0x12 0x44 0x3d 0x48 VADD.f dm2, dm2, dm1, r2
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6256 0x1a 0x11 0x92 0xf8 VMOV bmll2, lfh1
+ 6260 0x00 0x00 NOPX
+ 6262 0x00 0x00 NOPX
+ 6264 0x00 0x00 NOPX
+ 6266 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id first
+ 6268 0x1d 0x68 0x12 0xf8 VMOV lfl1, bmll2
+.aggressive_scheduled_block_id 19
+.noswbrkpt
+ 6272 0x10 0x20 0x3d 0x48 VADD.f dm0, dm1, dm0, r2
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6276 0x19 0x15 0x92 0xf8 VMOV bmll1, lfl1
+ 6280 0x00 0x00 NOPX
+ 6282 0x00 0x00 NOPX
+ 6284 0x00 0x00 NOPX
+ 6286 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id first
+ 6288 0x1c 0x60 0x12 0xf8 VMOV lfh1, bmll0
+.aggressive_scheduled_block_id 20
+.noswbrkpt
+ 6292 0x10 0x0c 0x3d 0x48 VADD.f dm0, dm0, dm3, r2
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6296 0x18 0x11 0x92 0xf8 VMOV bmll0, lfh1
+ 6300 0x00 0x00 NOPX
+ 6302 0x00 0x00 NOPX
+ 6304 0x00 0x00 NOPX
+ 6306 0x00 0x00 NOPX
+ 6308 0x0d 0xc0 0x16 0x18 VCONV.bf16.fp32 wl11, bmll0
+ 6312 0x00 0x00 NOPX
+ 6314 0x1d 0x85 0xfe 0xd8 VSHIFT x11, x0, x11, r31
+ 6318 0x1d 0xd5 0xcc 0x38 VSEL.8 x11, x10, x11, r19:r18
+ 6322 0x00 0x00 NOPX
+ 6324 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x8b 0x65 0x41 0x36 NOPA; NOPB; VST wh11, [p7, #32]; NOPX
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488
+.end_of_loop
+ 6336 0x00 0x2c 0xf0 0x00 0x27 0x8a 0xea 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV
+.loop_nesting 0
+ 6352 0x00 0x0c 0x78 0x00 0x00 0x84 J #6384
+.delay_slot
+.swstall delay_slot
+ 6358 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6360 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6362 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6364 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6366 0x00 0x00 NOPX
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520
+ 6368 0xff 0xb4 0xb0 0xb4 0x80 0x5c ST dn3, [sp, #-4]; MOVX vaddSign0, #1
+ 6374 0x00 0x2c 0xf7 0xf8 0x3d 0x80 0x00 0x00 0x00 0x7a NOPA; ST lr, [sp, #-8]; NOPX
+.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 6384 0x1f 0x61 0x91 0x18 ADD.NC p7, r3, #34
+ 6388 0xe0 0x8f 0x5b 0x64 0xc1 0xd4 LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id first
+ 6394 0x18 0x7b 0x60 0xf8 MOV crSCDEn, crMCDEn
+.aggressive_scheduled_block_id 21
+.noswbrkpt
+ 6398 0x07 0x04 0x77 0x18 ST.s16 r3, [p7]
+.aggressive_scheduled_block_id 21
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6402 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6408 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6410 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6412 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6414 0x10 0xc6 0x07 0x18 ADD r3, r3, #1
+.delay_slot
+ 6418 0x00 0x2c 0xf0 0x00 0x20 0xc0 0xb0 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM
+.return_address
+ 6432 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8]
+ 6436 0x07 0xfc 0x99 0x18 LDA p1, [sp, #-4]
+ 6440 0x07 0x54 0x77 0x18 ST.s16 r3, [p7, #10]
+ 6444 0xff 0xe0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-256
+ 6450 0x00 0x00 NOPX
+ 6452 0x00 0x00 NOPX
+ 6454 0x00 0x00 NOPX
+ 6456 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 6460 0x1f 0x62 0xc0 0xf8 MOV p7, p1
+.delay_slot
+.swstall delay_slot
+ 6464 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6468 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6470 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM
+.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 6480 0x1f 0xf4 0x00 0x00 0x02 0xb8 0x00 0x00 0x20 0xba MOVA r20, #255; J #5568
+.delay_slot
+ 6490 0x10 0x2a 0x01 0x18 MOVX r21, #0
+.delay_slot
+.swstall delay_slot
+ 6494 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6496 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6498 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6500 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664
+ 6512 0x10 0x0a 0x15 0x18 MOVX r5, #5
+ 6516 0x11 0x4a 0x67 0x98 EQ r5, r5, r6
+ 6520 0x28 0x0e 0x30 0x40 0x01 0x84 JNZ r5, #7264
+.delay_slot
+.swstall delay_slot
+ 6526 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6528 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6530 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6532 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6534 0x00 0x00 NOPX
+ 6536 0x10 0x0e 0x19 0x18 MOVX r7, #6
+ 6540 0x11 0xce 0x67 0x98 EQ r7, r7, r6
+ 6544 0x38 0x0e 0xa8 0x40 0x01 0x84 JNZ r7, #7504
+.delay_slot
+ 6550 0x10 0x0a 0x41 0x18 MOVX r5, #16
+.delay_slot
+.swstall delay_slot
+ 6554 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6556 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6558 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6560 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 6576 0x48 0x1e 0x50 0x01 0x10 0xea 0x60 0xf0 0x78 0xba LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr
+ 6586 0x89 0x8d 0x18 0xa4 0x05 0x64 NE r6, r17, r6; MOV r17, #257
+ 6592 0x30 0x0e 0x20 0x40 0x01 0x84 JNZ r6, #7232
+.delay_slot
+.swstall delay_slot
+ 6598 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6600 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6602 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6604 0x00 0x00 NOPX
+.delay_slot
+ 6606 0x11 0xca 0x5e 0x98 ASHL r5, r7, r5
+ 6610 0x04 0x8a 0x80 0x84 0x8b 0x00 0x00 0x04 0x7d 0x08 0x10 0x76 MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672
+ 6622 0x48 0x1e 0xd7 0xfd 0xa5 0x80 0x00 0x05 0xbd 0x38 0x10 0x76 LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768
+ 6634 0x00 0x1d 0x15 0x98 VLDA bmll2, [p0], #64
+ 6638 0x00 0x00 NOPX
+ 6640 0x1c 0xc2 0x92 0xf8 VMOV bmhh4, x1
+ 6644 0x00 0x00 NOPX
+ 6646 0x03 0x13 0x12 0xe6 0x11 0x68 0x3d 0x62 VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2
+ 6654 0x00 0x00 NOPX
+ 6656 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb9 0xff 0xc8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824
+.loop_nesting 1
+.begin_of_loop
+ 6672 0x03 0xa2 0xb0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV
+ 6688 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 6704 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+ 6720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id first
+ 6736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0x62 0x09 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV
+.aggressive_scheduled_block_id 22
+.noswbrkpt
+ 6752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x8b 0x41 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920
+.end_of_loop
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x89 0x89 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV
+.loop_nesting 0
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id first
+ 6784 0x02 0x10 0x00 0x00 0x01 0xf3 0xb2 0x78 0x10 0xba MOVA r16, #16; MOVXM p7, #509168
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6794 0xe0 0x90 0x50 0x00 0x61 0x08 0x98 0x01 0x58 0xba LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6804 0x10 0x22 0x05 0x18 MOVX r17, #1
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6808 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6810 0x1c 0xc4 0x12 0xf8 VMOV bmhh4, bmll1
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6814 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6816 0x19 0x33 0x12 0xf8 VMOV x2, bmhh4
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6820 0x19 0x10 0x12 0xd8 VSHIFT x2, x2, x0, r4
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6824 0x01 0x3a 0x80 0x00 0x49 0x2f 0x10 0x40 0x3d 0x5a MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6834 0x1a 0x13 0x12 0xf8 VMOV bmll2, bmhh4
+ 6838 0x19 0x16 0x72 0xf8 VBCST.32 x2, r5
+ 6842 0x19 0x04 0x92 0xf8 VMOV bmll1, x2
+ 6846 0x00 0x00 NOPX
+ 6848 0x1a 0x04 0x12 0xf8 VMOV bmll2, bmll1
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id first
+ 6852 0x19 0x20 0x12 0xf8 VMOV x2, bmll0
+.aggressive_scheduled_block_id 24
+.noswbrkpt
+ 6856 0x01 0x10 0x42 0xc6 0x10 0x0c 0x3d 0x62 VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6864 0x1b 0x04 0x92 0xf8 VMOV bmll3, x2
+ 6868 0x19 0x20 0x92 0xf8 VMOV x2, x0
+ 6872 0x00 0x00 NOPX
+ 6874 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1
+ 6878 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id first
+ 6880 0x01 0xa0 0x12 0xe6 0x14 0x40 0x83 0x62 VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2
+.aggressive_scheduled_block_id 25
+.noswbrkpt
+ 6888 0x01 0x98 0x1a 0xc6 0x10 0x08 0x3d 0x62 VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6896 0x1a 0x06 0x92 0xf8 VMOV bmll2, x3
+ 6900 0x19 0xa4 0x92 0xf8 VMOV x3, x2
+ 6904 0x00 0x00 NOPX
+ 6906 0x00 0x00 NOPX
+ 6908 0x09 0x42 0x16 0x18 VCONV.bf16.fp32 wl2, bmll4
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id first
+ 6912 0x1a 0xa0 0x12 0xf8 VMOV x5, bmll0
+.aggressive_scheduled_block_id 26
+.noswbrkpt
+ 6916 0x03 0x28 0x02 0xc6 0x10 0x08 0x3d 0x62 VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6924 0x1a 0x0c 0x92 0xf8 VMOV bmll2, x6
+ 6928 0x1a 0xa6 0x92 0xf8 VMOV x5, x3
+ 6932 0x00 0x00 NOPX
+ 6934 0x00 0x00 NOPX
+ 6936 0x00 0x00 NOPX
+ 6938 0x1b 0x20 0x12 0xf8 VMOV x6, bmll0
+ 6942 0x18 0x1a 0x01 0xb8 VEXTRACT.32 r0, x6, #0, vaddSign0
+ 6946 0x00 0x00 NOPX
+ 6948 0x1b 0x00 0x11 0x78 VINSERT.32 x6, x0, #0, r0
+ 6952 0x18 0x8b 0x08 0x38 VSEL.32 x1, x1, x6, r17
+ 6956 0x1a 0x02 0x92 0xf8 VMOV bmll2, x1
+ 6960 0x18 0xaa 0x92 0xf8 VMOV x1, x5
+ 6964 0x58 0x22 0xc0 0x01 0x91 0x49 0x70 0x02 VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1
+ 6972 0x00 0x00 NOPX
+ 6974 0x11 0x4a 0x83 0x48 VMSC.f dm1, dm2, x5, x4, r2
+ 6978 0x10 0xea 0x41 0x48 VMUL.f dm0, x5, x2, r2
+ 6982 0x00 0x00 NOPX
+ 6984 0x00 0x00 NOPX
+ 6986 0x00 0x00 NOPX
+ 6988 0x00 0x00 NOPX
+ 6990 0x08 0xc0 0x96 0x18 VCONV.bf16.fp32 wl1, bmll1
+ 6994 0x14 0x84 0x83 0x48 VMSC.f dm4, dm4, x2, x4, r2
+ 6998 0x13 0x22 0x83 0x48 VMSC.f dm3, dm1, x1, x4, r2
+ 7002 0x00 0x00 NOPX
+ 7004 0x00 0x00 NOPX
+ 7006 0x00 0x00 NOPX
+ 7008 0x00 0x00 NOPX
+ 7010 0x09 0xc2 0x16 0x18 VCONV.bf16.fp32 wl3, bmll4
+ 7014 0x0b 0x41 0x96 0x18 VCONV.bf16.fp32 wl6, bmll3
+ 7018 0x00 0x00 NOPX
+ 7020 0x12 0xec 0x61 0x48 VMUL.f dm2, x6, x3, r2
+ 7024 0x13 0xec 0x41 0x48 VMUL.f dm3, x6, x2, r2
+ 7028 0x00 0x00 NOPX
+ 7030 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id first
+ 7032 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 27
+.noswbrkpt
+ 7034 0x13 0xe2 0x61 0x48 VMUL.f dm3, x1, x3, r2
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7038 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7042 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7046 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0
+ 7050 0x00 0x00 NOPX
+ 7052 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id first
+ 7054 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 28
+.noswbrkpt
+ 7056 0x13 0xea 0x61 0x48 VMUL.f dm3, x5, x3, r2
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7060 0x18 0x68 0x12 0xf8 VMOV lfh0, bmll2
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7064 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7068 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0
+ 7072 0x00 0x00 NOPX
+ 7074 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id first
+ 7076 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 29
+.noswbrkpt
+ 7078 0x13 0xe2 0x41 0x48 VMUL.f dm3, x1, x2, r2
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7082 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7086 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7090 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0
+ 7094 0x00 0x00 NOPX
+ 7096 0x00 0x00 NOPX
+ 7098 0x00 0x00 NOPX
+ 7100 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id first
+ 7102 0x18 0x68 0x12 0xf8 VMOV lfh0, bmll2
+.aggressive_scheduled_block_id 30
+.noswbrkpt
+ 7106 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7110 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0
+ 7114 0x00 0x00 NOPX
+ 7116 0x00 0x00 NOPX
+ 7118 0x13 0xe0 0xc1 0x48 VMUL.f dm3, x0, x6, r2
+ 7122 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id first
+ 7124 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2
+.aggressive_scheduled_block_id 31
+.noswbrkpt
+ 7128 0x13 0x4c 0x3d 0x48 VADD.f dm3, dm2, dm3, r2
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7132 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0
+ 7136 0x00 0x00 NOPX
+ 7138 0x00 0x00 NOPX
+ 7140 0x11 0xe2 0x01 0x48 VMUL.f dm1, x1, x0, r2
+ 7144 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id first
+ 7146 0x18 0x6c 0x12 0xf8 VMOV lfh0, bmll3
+.aggressive_scheduled_block_id 32
+.noswbrkpt
+ 7150 0x11 0x44 0x3d 0x48 VADD.f dm1, dm2, dm1, r2
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7154 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0
+ 7158 0x00 0x00 NOPX
+ 7160 0x00 0x00 NOPX
+ 7162 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id first
+ 7164 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 33
+.noswbrkpt
+ 7166 0x00 0x24 0x12 0xe6 0x10 0x40 0x3d 0x62 VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7174 0x02 0x00 0x92 0xe6 0x14 0xea 0x01 0x62 VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2
+ 7182 0x00 0x00 NOPX
+ 7184 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id first
+ 7186 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 34
+.noswbrkpt
+ 7188 0x10 0x50 0x3d 0x48 VADD.f dm0, dm2, dm4, r2
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 0x1a 0x00 0x12 0xf8 VMOV bmll2, bmll0
+ 7196 0x00 0x00 NOPX
+ 7198 0x00 0x0c 0x78 0x00 0x00 0x84 J #6384
+.delay_slot
+ 7204 0x0f 0xfa 0x65 0x98 ST dc4, [sp, #-8]
+.delay_slot
+.swstall delay_slot
+ 7208 0x00 0x00 NOPX
+.delay_slot
+ 7210 0x1a 0x00 0x12 0xf8 VMOV bmll2, bmll0
+.delay_slot
+.swstall delay_slot
+ 7214 0x00 0x00 NOPX
+.delay_slot
+ 7216 0x00 0x2c 0xf0 0x00 0x21 0x05 0x12 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384
+ 7232 0x00 0x14 0x00 0x00 0x02 0xbe 0x00 0x00 0x20 0xba MOVA r20, #0; J #5616
+.delay_slot
+ 7242 0x10 0x2a 0x01 0x18 MOVX r21, #0
+.delay_slot
+.swstall delay_slot
+ 7246 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7248 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7250 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7252 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416
+ 7264 0x00 0x0e 0x90 0x00 0x00 0x84 J #7456
+.delay_slot
+ 7270 0xff 0x93 0xb0 0x02 0x60 0xf0 0x70 0x02 ST p1, [sp, #-4]; MOV dc4, lr
+.delay_slot
+.swstall delay_slot
+ 7278 0x00 0x00 NOPX
+.delay_slot
+ 7280 0x0f 0xf0 0x33 0x18 VST x0, [sp, #-256]
+.delay_slot
+ 7284 0x0f 0xf5 0x33 0x18 VST x4, [sp, #-192]
+.delay_slot
+ 7288 0xff 0x0e 0x60 0x00 0x01 0xa5 0x70 0x02 VST x1, [sp, #-128]; NOPM
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448
+ 7296 0x10 0x22 0x05 0x18 MOVX r17, #1
+ 7300 0x14 0x62 0x67 0x98 EQ r17, r17, r6
+ 7304 0x88 0x0e 0x90 0x40 0x01 0x84 JNZ r17, #7456
+.delay_slot
+.swstall delay_slot
+ 7310 0x00 0x00 NOPX
+.delay_slot
+ 7312 0x0f 0xf0 0x33 0x18 VST x0, [sp, #-256]
+.delay_slot
+ 7316 0x0f 0xf5 0x33 0x18 VST x4, [sp, #-192]
+.delay_slot
+ 7320 0x0f 0xf8 0x73 0x18 VST x1, [sp, #-128]
+.delay_slot
+ 7324 0xff 0x93 0xb0 0x00 0x70 0x4a 0x60 0xf0 0x79 0x3a ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr
+ 7334 0x11 0xce 0x67 0x98 EQ r7, r7, r6
+ 7338 0x38 0x0e 0x80 0x40 0x01 0x84 JNZ r7, #7424
+.delay_slot
+.swstall delay_slot
+ 7344 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7346 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7348 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7350 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7352 0x00 0x00 NOPX
+ 7354 0x11 0x4e 0x67 0x98 EQ r7, r5, r6
+ 7358 0x38 0x0e 0x70 0x40 0x01 0x84 JNZ r7, #7392
+.delay_slot
+ 7364 0x10 0x0a 0x41 0x18 MOVX r5, #16
+.delay_slot
+.swstall delay_slot
+ 7368 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7370 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7372 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7374 0x00 0x00 NOPX
+ 7376 0x00 0x0c 0xd8 0x00 0x00 0x84 J #6576
+.delay_slot
+.swstall delay_slot
+ 7382 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7384 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7386 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7388 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7390 0x00 0x00 NOPX
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544
+ 7392 0x20 0x31 0x00 0x00 0x02 0xb8 0x00 0x00 0x20 0xba MOVA r17, #257; J #5568
+.delay_slot
+ 7402 0x05 0x40 0x28 0x00 0x41 0x64 MOVX r21, #0; MOV m4, #16
+.delay_slot
+ 7408 0x10 0x28 0x01 0x18 MOVX r20, #0
+.delay_slot
+.swstall delay_slot
+ 7412 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7414 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7416 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576
+ 7424 0x00 0x0c 0xa8 0x00 0x00 0x84 J #6480
+.delay_slot
+ 7430 0x00 0x00 0xf8 0xbf 0xfe 0x44 MOVXM r17, #65535
+.delay_slot
+ 7436 0x1c 0x00 0x20 0xb8 MOV m4, #16
+.delay_slot
+.swstall delay_slot
+ 7440 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7442 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7444 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+ 7456 0xfe 0x07 0x70 0x00 0x02 0xb8 0x00 0x00 0x20 0xba VLDA x0, [sp, #-256]; J #5568
+.delay_slot
+ 7466 0xfe 0xa7 0x70 0x00 0x00 0x8a 0x88 0x00 0x58 0xba VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0
+.delay_slot
+ 7476 0xff 0x93 0x20 0x00 0x00 0x3e 0x0f 0xff 0x90 0xba LDA p1, [sp, #-4]; MOVXM r16, #65535
+.delay_slot
+ 7486 0x05 0x40 0x28 0x00 0x81 0x64 MOVX r21, #0; MOV m4, #32
+.delay_slot
+ 7492 0x11 0x22 0x05 0x18 MOVX r17, #257
+.delay_slot
+ 7496 0xff 0x0f 0x70 0x04 0x00 0x00 0x1c 0x22 VLDA x1, [sp, #-128]; NOPV
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656
+ 7504 0x00 0x0c 0xa8 0x00 0x00 0x84 J #6480
+.delay_slot
+ 7510 0x1c 0xc1 0xe0 0xf8 MOV dc4, lr
+.delay_slot
+ 7514 0x00 0x00 0xf8 0xbf 0xfe 0x44 MOVXM r17, #65535
+.delay_slot
+ 7520 0x1c 0x00 0x20 0xb8 MOV m4, #16
+.delay_slot
+.swstall delay_slot
+ 7524 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7526 0x00 0x00 NOPX
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0
+
+.text_segment PM 7536
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 7536 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7542 0xfd 0xf3 0xb0 0x00 0x01 0xf3 0xb2 0x60 0x11 0x3a ST p7, [sp, #-20]; MOVXM p7, #509120
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7552 0xe0 0xc2 0xd7 0xe7 0x1d 0x82 0x0d 0x70 0x72 0xba LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7562 0xff 0x2e 0xb0 0x21 0x04 0x81 0x68 0xf0 0x79 0x3a ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7572 0xfe 0x3e 0xb8 0x47 0xf6 0x5c ST r15, [sp, #-16]; ADD r17, r16, #-2
+ 7578 0x0f 0xe9 0xb5 0x98 ST r13, [sp, #-24]
+ 7582 0x00 0x00 NOPX
+ 7584 0x00 0x00 NOPX
+ 7586 0x00 0x00 NOPX
+ 7588 0x80 0x0f 0xf0 0x40 0x01 0x84 JNZ r16, #8160
+.delay_slot
+ 7594 0x0f 0xfd 0x95 0x98 ST r12, [sp, #-4]
+.delay_slot
+ 7598 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12]
+.delay_slot
+ 7602 0x0f 0xe0 0x1d 0x98 ST p0, [sp, #-32]
+.delay_slot
+ 7606 0x00 0x07 0xcc 0xc9 0x90 0x44 MOVXM p6, #509128
+.delay_slot
+ 7612 0x0e 0x06 0x31 0x98 ST r17, [p6]
+ 7616 0x00 0x31 0x07 0x88 0x8b 0x00 0x01 0xf1 0x32 0x76 0x10 0x76 MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7628 0x40 0xc6 0x30 0x00 0x01 0xf1 0x32 0x78 0x11 0x3a ST r17, [p2]; MOVXM p2, #509168
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7638 0x40 0xc0 0xec 0xc5 0x81 0xd4 ST.s8 r16, [p2]; MOV p6, p1
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 7644 0x00 0x05 0x08 0x00 0x01 0x04 JL #2576
+.delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7650 0x00 0x07 0xc0 0xc8 0x80 0x44 MOVXM p0, #508992
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7656 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7658 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7660 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+.swstall delay_slot
+ 7664 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.return_address
+ 7680 0x00 0x11 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r17, #0; MOVXM p2, #508992
+ 7690 0x40 0xba 0xd0 0x00 0x01 0xf1 0x32 0x64 0x10 0xba LDA r14, [p2]; MOVXM p2, #509128
+ 7700 0x40 0xca 0xd0 0x00 0x01 0xf1 0x32 0x22 0x10 0xba LDA r18, [p2]; MOVXM p2, #508996
+ 7710 0x43 0xb6 0xd0 0x00 0x01 0xf1 0xb2 0x68 0x10 0xba LDA r13, [p2], #4; MOVXM p3, #509136
+ 7720 0x42 0x85 0xd0 0x00 0x01 0xf0 0xb2 0x66 0x10 0xba LDA el0, [p2, #4]; MOVXM p1, #509132
+ 7730 0x40 0xbe 0xd8 0x39 0x81 0xd4 LDA r15, [p2]; MOV r16, p6
+ 7736 0x1a 0x68 0x14 0x18 ADD.NC p2, r16, #40
+ 7740 0x00 0x07 0xcc 0xca 0x00 0x44 MOVXM p6, #509184
+ 7746 0x00 0x07 0xc0 0xc9 0xd0 0x44 MOVXM p0, #509160
+ 7752 0x13 0xa5 0x2f 0x98 MUL r18, r14, r18
+ 7756 0x80 0x00 0x08 0x20 0x00 0x44 MOVXM r16, #-2147483648
+ 7762 0x60 0x85 0x36 0xca 0x5f 0x5c ST el0, [p3]; MUL r18, r13, r18
+ 7768 0x00 0x00 NOPX
+ 7770 0x13 0xe5 0x2f 0x98 MUL r18, r15, r18
+ 7774 0x00 0x00 NOPX
+ 7776 0x09 0x06 0x51 0x98 ST r18, [p1]
+ 7780 0x02 0x4c 0x2e 0x98 LDA el0, [p2], #16
+ 7784 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7788 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7792 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7796 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7800 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7804 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7808 0x08 0x04 0x29 0x98 ST el0, [p0]
+ 7812 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7816 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7820 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4
+ 7824 0x02 0xdc 0x36 0x98 LDA r1, [p2], #-12
+ 7828 0x00 0x00 NOPX
+ 7830 0x00 0x00 NOPX
+ 7832 0x00 0x00 NOPX
+ 7834 0x00 0x00 NOPX
+ 7836 0x00 0x00 NOPX
+ 7838 0x00 0x00 NOPX
+ 7840 0x10 0x63 0x0b 0x98 GEU r17, r1, r16
+ 7844 0x88 0x0f 0x78 0x40 0x01 0x84 JNZ r17, #7920
+.delay_slot
+ 7850 0x1b 0x1e 0xc0 0xf8 MOV r12, p7
+.delay_slot
+ 7854 0x0f 0xd9 0x1d 0x98 ST p2, [sp, #-40]
+.delay_slot
+.swstall delay_slot
+ 7858 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7860 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7862 0x00 0x00 NOPX
+.no_stack_arguments
+ 7864 0x00 0x15 0x50 0x00 0x01 0x04 JL #10912
+.delay_slot
+ 7870 0x0f 0xdd 0x95 0x98 ST r12, [sp, #-36]
+.delay_slot
+.swstall delay_slot
+ 7874 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7876 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7878 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7880 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV
+.return_address
+ 7888 0x00 0x0f 0x98 0x00 0x00 0x84 J #7984
+.delay_slot
+ 7894 0x00 0x07 0xce 0xc9 0xe0 0x44 MOVXM p7, #509168
+.delay_slot
+.swstall delay_slot
+ 7900 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7902 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7904 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7906 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384
+.no_stack_arguments
+ 7920 0x00 0x15 0x50 0x00 0x01 0x04 JL #10912
+.delay_slot
+.swstall delay_slot
+ 7926 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7928 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7930 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7932 0x00 0x01 0x67 0x98 NOPA
+.delay_slot
+ 7936 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x02 0x18 0x0c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV
+.return_address
+.no_stack_arguments
+ 7952 0x00 0x18 0x40 0x00 0x01 0x04 JL #12416
+.delay_slot
+ 7958 0x18 0x50 0x20 0xf8 MOV r1, r0
+.delay_slot
+ 7962 0x00 0x07 0xce 0xc9 0xe0 0x44 MOVXM p7, #509168
+.delay_slot
+ 7968 0x4f 0x00 0x01 0x20 0x00 0x44 MOVXM r2, #1325400064
+.delay_slot
+ 7974 0x0f 0xdd 0x95 0x98 ST r12, [sp, #-36]
+.delay_slot
+.swstall delay_slot
+ 7978 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.return_address
+ 7984 0xe0 0xc0 0x50 0x02 0xd2 0x00 0x00 0x08 0xb8 0xba LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0
+ 7994 0xfb 0x40 0x80 0x01 0x80 0x08 0x00 0x49 0x78 0xba MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0
+ 8004 0xfb 0x23 0x20 0x00 0x01 0xf1 0xb2 0x6a 0x10 0xba LDA p2, [sp, #-40]; MOVXM p3, #509140
+ 8014 0x00 0x07 0xc2 0xc9 0xb0 0x44 MOVXM p1, #509144
+ 8020 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 8026 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 8028 0x06 0x1e 0x17 0x18 ST.s16 r16, [p6], #2
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8032 0x14 0x3a 0x80 0x18 MOVX crRnd, r16
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8036 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8040 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8042 0x1c 0x01 0x01 0xb8 VEXTRACT.16 r16, x0, #0, vaddSign0
+ 8046 0x00 0x00 NOPX
+ 8048 0x00 0x00 NOPX
+ 8050 0x06 0x0b 0x07 0x18 ST.s8 r24, [p6], m0
+ 8054 0x00 0x00 NOPX
+ 8056 0x00 0x00 NOPX
+ 8058 0x00 0x00 NOPX
+ 8060 0x00 0x00 NOPX
+ 8062 0x00 0x00 NOPX
+ 8064 0x00 0x00 NOPX
+ 8066 0x0e 0x1d 0xd1 0x98 ST r14, [p6], #4
+ 8070 0x0e 0x05 0xf1 0x98 ST r15, [p6]
+ 8074 0x0e 0x15 0xb1 0x98 ST r13, [p6, #4]
+ 8078 0x02 0x1c 0x2e 0x98 LDA el0, [p2], #4
+ 8082 0x00 0x00 NOPX
+ 8084 0x00 0x00 NOPX
+ 8086 0x00 0x00 NOPX
+ 8088 0x00 0x00 NOPX
+ 8090 0x00 0x00 NOPX
+ 8092 0x00 0x00 NOPX
+ 8094 0x0b 0x04 0x29 0x98 ST el0, [p3]
+ 8098 0x02 0x04 0x2e 0x98 LDA el0, [p2]
+ 8102 0x00 0x00 NOPX
+ 8104 0x00 0x00 NOPX
+ 8106 0x00 0x00 NOPX
+ 8108 0x00 0x00 NOPX
+ 8110 0x00 0x00 NOPX
+ 8112 0x00 0x00 NOPX
+ 8114 0x09 0x04 0x29 0x98 ST el0, [p1]
+ 8118 0x02 0x14 0x2e 0x98 LDA el0, [p2, #4]
+ 8122 0x00 0x00 NOPX
+ 8124 0x00 0x0f 0xf8 0x00 0x00 0x84 J #8176
+.delay_slot
+ 8130 0x00 0x07 0xc0 0xc9 0xb8 0x44 MOVXM p0, #509148
+.delay_slot
+.swstall delay_slot
+ 8136 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8138 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8140 0x00 0x01 0x67 0x98 NOPA
+.delay_slot
+ 8144 0x00 0x2c 0xf0 0x00 0x20 0x04 0x29 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624
+ 8160 0xfb 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0x68 0x11 0x3a ST p2, [sp, #-36]; MOVXM p7, #509136
+ 8170 0x00 0x2c 0xf6 0x29 0x81 0xd4 NOPA; MOV r12, p2
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640
+ 8176 0xe0 0xc2 0xd0 0x44 0x0a 0x2c LDA r16, [p7]; MOVX r17, #1
+ 8182 0x00 0x00 NOPX
+ 8184 0x00 0x00 NOPX
+ 8186 0x00 0x00 NOPX
+ 8188 0x00 0x00 NOPX
+ 8190 0x00 0x00 NOPX
+ 8192 0x00 0x00 NOPX
+ 8194 0x14 0x63 0x08 0x98 NE r17, r17, r16
+ 8198 0x88 0x10 0x58 0x40 0x01 0x84 JNZ r17, #8368
+.delay_slot
+ 8204 0x1e 0x66 0x06 0x18 ADD.NC p6, r12, #12
+.delay_slot
+.swstall delay_slot
+ 8208 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8210 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8212 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8214 0x00 0x00 NOPX
+ 8216 0x00 0x07 0xc4 0xc9 0x88 0x44 MOVXM p2, #509124
+ 8222 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p2]; MOVXM p2, #509024
+ 8232 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2]
+ 8236 0x00 0x00 NOPX
+ 8238 0x00 0x00 NOPX
+.no_stack_arguments
+ 8240 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+ 8246 0x10 0x1a 0x01 0x18 MOVX r13, #0
+.delay_slot
+.swstall delay_slot
+ 8250 0x00 0x00 NOPX
+.delay_slot
+ 8252 0x14 0x36 0xda 0x98 LT r27, r16, r13
+.delay_slot
+ 8256 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27
+.delay_slot
+ 8262 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27
+.return_address
+ 8272 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15
+ 8278 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 8282 0x80 0x10 0x50 0x40 0x01 0x84 JNZ r16, #8352
+.delay_slot
+.swstall delay_slot
+ 8288 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8290 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8292 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8294 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8296 0x00 0x00 NOPX
+ 8298 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6
+ 8304 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4
+ 8308 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+ 8312 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 8316 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 8318 0x02 0x46 0x36 0x98 LDA r17, [p2, #16]
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8322 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8324 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8326 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8328 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8330 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8334 0x0a 0x06 0x31 0x98 ST r17, [p2]
+ 8338 0x00 0x00 NOPX
+ 8340 0x00 0x00 NOPX
+ 8342 0x00 0x00 NOPX
+ 8344 0x00 0x00 NOPX
+ 8346 0x00 0x2c 0xf8 0xa6 0x10 0x2c NOPA; ACQ r17, r16
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816
+ 8352 0x00 0x00 NOPX
+ 8354 0x00 0x00 NOPX
+ 8356 0x00 0x00 NOPX
+ 8358 0xe0 0xc2 0xd0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba LDA r16, [p7]; NOPB; NOPM
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832
+ 8368 0x10 0x1c 0x09 0x18 MOVX r14, #2
+ 8372 0x00 0x00 NOPX
+ 8374 0x00 0x00 NOPX
+ 8376 0x00 0x00 NOPX
+ 8378 0x00 0x00 NOPX
+ 8380 0x00 0x00 NOPX
+ 8382 0x13 0xa1 0x08 0x98 NE r16, r14, r16
+ 8386 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544
+.delay_slot
+.swstall delay_slot
+ 8392 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8394 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8396 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8398 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8400 0x00 0x00 NOPX
+ 8402 0x00 0x07 0xc4 0xc9 0xc0 0x44 MOVXM p2, #509152
+ 8408 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p2]; MOVXM p2, #509024
+ 8418 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2]
+ 8422 0x00 0x00 NOPX
+ 8424 0x00 0x00 NOPX
+.no_stack_arguments
+ 8426 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+ 8432 0x10 0x1a 0x01 0x18 MOVX r13, #0
+.delay_slot
+.swstall delay_slot
+ 8436 0x00 0x00 NOPX
+.delay_slot
+ 8438 0x14 0x36 0xda 0x98 LT r27, r16, r13
+.delay_slot
+ 8442 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27
+.delay_slot
+ 8448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV
+.return_address
+ 8464 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15
+ 8470 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 8474 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544
+.delay_slot
+.swstall delay_slot
+ 8480 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8482 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8484 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8486 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8488 0x00 0x00 NOPX
+ 8490 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6
+ 8496 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4
+ 8500 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+ 8504 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 8508 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 8510 0x02 0x46 0x36 0x98 LDA r17, [p2, #16]
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8514 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8516 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8518 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8520 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8522 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8526 0x0a 0x06 0x31 0x98 ST r17, [p2]
+ 8530 0x00 0x00 NOPX
+ 8532 0x00 0x00 NOPX
+ 8534 0x00 0x00 NOPX
+ 8536 0x00 0x00 NOPX
+ 8538 0x00 0x2c 0xf8 0xa6 0x10 0x2c NOPA; ACQ r17, r16
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008
+ 8544 0x00 0x00 NOPX
+ 8546 0x00 0x00 NOPX
+ 8548 0x00 0x00 NOPX
+ 8550 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x72 0x10 0xba LDA r16, [p7]; MOVXM p7, #509156
+ 8560 0x00 0x00 NOPX
+ 8562 0x00 0x00 NOPX
+ 8564 0x00 0x00 NOPX
+ 8566 0x00 0x00 NOPX
+ 8568 0x00 0x00 NOPX
+ 8570 0x10 0x24 0x11 0x18 MOVX r18, #4
+ 8574 0x14 0xa1 0x08 0x98 NE r16, r18, r16
+ 8578 0x80 0x11 0x20 0x40 0x01 0x84 JNZ r16, #8768
+.delay_slot
+ 8584 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024
+.delay_slot
+.swstall delay_slot
+ 8590 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8592 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8594 0x00 0x00 NOPX
+.delay_slot
+ 8596 0x10 0x22 0x01 0x18 MOVX r17, #0
+ 8600 0xe0 0xc2 0xd0 0x34 0x02 0x2c LDA r16, [p7]; MOVX r13, #0
+ 8606 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2]
+ 8610 0x00 0x00 NOPX
+ 8612 0x00 0x00 NOPX
+.no_stack_arguments
+ 8614 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+.swstall delay_slot
+ 8620 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8622 0x00 0x00 NOPX
+.delay_slot
+ 8624 0x14 0x37 0x1a 0x98 LT r27, r16, r17
+.delay_slot
+ 8628 0x8c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r17, r16; MOV r15, r27
+.delay_slot
+ 8634 0x00 0x2c 0xf8 0x02 0x24 0x2c NOPA; SEL.EQZ r0, r16, r17, r27
+.return_address
+ 8640 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15
+ 8646 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 8650 0x80 0x11 0x10 0x40 0x01 0x84 JNZ r16, #8736
+.delay_slot
+.swstall delay_slot
+ 8656 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8658 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8660 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8662 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8664 0x00 0x00 NOPX
+ 8666 0xdf 0xee 0xd0 0x3f 0x17 0xea 0x08 0x01 0x58 0xba LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1
+ 8676 0x06 0xfe 0x56 0x98 LDA r18, [p6], #-4
+ 8680 0x06 0xfe 0x76 0x98 LDA r19, [p6], #-4
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 8684 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 8686 0x06 0x46 0x56 0x98 LDA r18, [p6, #16]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8690 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8692 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8694 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8696 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8698 0x14 0xe5 0x22 0x18 SEL.EQZ r18, r19, r18, r27
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8702 0x0e 0x06 0x51 0x98 ST r18, [p6]
+ 8706 0x00 0x00 NOPX
+ 8708 0x00 0x00 NOPX
+ 8710 0x00 0x11 0x28 0x00 0x00 0x84 J #8784
+.delay_slot
+.swstall delay_slot
+ 8716 0x00 0x00 NOPX
+.delay_slot
+ 8718 0x14 0x93 0x18 0x18 ACQ r18, r17
+.delay_slot
+.swstall delay_slot
+ 8722 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8724 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8726 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200
+ 8736 0x00 0x11 0x28 0x00 0x00 0x84 J #8784
+.delay_slot
+ 8742 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+.swstall delay_slot
+ 8746 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8748 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8750 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232
+ 8768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x28 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248
+ 8784 0xfc 0x73 0x20 0x00 0x01 0xf3 0x32 0x66 0x10 0xba LDA p7, [sp, #-32]; MOVXM p6, #509132
+ 8794 0xc0 0xd6 0xd0 0x00 0x01 0xf1 0x32 0x68 0x10 0xba LDA r21, [p6]; MOVXM p2, #509136
+ 8804 0x40 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x60 0x10 0xba LDA r17, [p2]; MOVXM p6, #509120
+ 8814 0x06 0x06 0x96 0x98 LDA r20, [p6]
+ 8818 0x00 0x00 NOPX
+ 8820 0x00 0x00 NOPX
+ 8822 0x00 0x00 NOPX
+ 8824 0x07 0x06 0x76 0x98 LDA r19, [p7]
+ 8828 0x15 0x6b 0x0d 0x98 LSHL r21, r21, r16
+ 8832 0x14 0x61 0x07 0x98 EQ r16, r17, r16
+ 8836 0x80 0x12 0x08 0x40 0x01 0x84 JNZ r16, #9232
+.delay_slot
+ 8842 0x15 0x28 0x07 0x18 ADD r20, r20, #1
+.delay_slot
+ 8846 0x0e 0x06 0x91 0x98 ST r20, [p6]
+.delay_slot
+.swstall delay_slot
+ 8850 0x00 0x00 NOPX
+.delay_slot
+ 8852 0x18 0x69 0xd5 0x58 ADD.NC p0, r19, r21
+.delay_slot
+ 8856 0xf7 0x83 0xb0 0x48 0x22 0x5c ST p0, [sp, #-68]; MOVX r18, #4
+ 8862 0x14 0x61 0x27 0x98 EQ r16, r17, r18
+ 8866 0x80 0x11 0xc0 0x40 0x01 0x84 JNZ r16, #9088
+.delay_slot
+.swstall delay_slot
+ 8872 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8874 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8876 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8878 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8880 0x00 0x00 NOPX
+ 8882 0x14 0x60 0xe8 0x98 NE r16, r17, r14
+ 8886 0x80 0x11 0xa8 0x40 0x01 0x84 JNZ r16, #9040
+.delay_slot
+ 8892 0x00 0x07 0xcc 0xc9 0xc0 0x44 MOVXM p6, #509152
+.delay_slot
+.swstall delay_slot
+ 8898 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8900 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8902 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8904 0x00 0x00 NOPX
+ 8906 0xc0 0xca 0xd0 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r18, [p6]; MOVXM p6, #509000
+ 8916 0xc0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6a 0x10 0xba LDA r16, [p6]; MOVXM p6, #509140
+ 8926 0xc0 0xc6 0xd0 0x60 0x02 0x2c LDA r17, [p6]; MOVX r24, #0
+ 8932 0x00 0x00 NOPX
+ 8934 0x00 0x00 NOPX
+ 8936 0x00 0x00 NOPX
+ 8938 0x00 0x00 NOPX
+ 8940 0x00 0x00 NOPX
+ 8942 0x14 0xa5 0x0f 0x98 MUL r18, r18, r16
+ 8946 0x00 0x00 NOPX
+ 8948 0x8c 0xe4 0x3a 0x32 0x82 0xa4 SUB r19, r17, r18; ADD.NC r20, r18, r16
+ 8954 0x15 0x37 0x1c 0x98 LTU r27, r20, r17
+ 8958 0x14 0xe7 0x02 0x18 SEL.EQZ r19, r19, r16, r27
+ 8962 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17
+ 8966 0x16 0x23 0x32 0x18 SEL.EQZ r17, r24, r19, r27
+ 8970 0x14 0x25 0x11 0x98 SUB r18, r16, r17
+ 8974 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 8978 0x80 0x12 0x40 0x40 0x01 0x84 JNZ r16, #9344
+.delay_slot
+ 8984 0x00 0x07 0xcc 0xca 0x20 0x44 MOVXM p6, #509200
+.delay_slot
+ 8990 0x0e 0x06 0x51 0x98 ST r18, [p6]
+.delay_slot
+.swstall delay_slot
+ 8994 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8996 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8998 0x00 0x00 NOPX
+ 9000 0x00 0x11 0xf8 0x00 0x00 0x84 J #9200
+.delay_slot
+ 9006 0x00 0x4e 0x00 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba MOVA r14, #2; MOVXM p7, #509136
+.delay_slot
+ 9016 0x00 0x2f 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r15, #1; MOVXM p2, #508992
+.delay_slot
+ 9026 0x10 0x1a 0x01 0x18 MOVX r13, #0
+.delay_slot
+.swstall delay_slot
+ 9030 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9032 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504
+ 9040 0x00 0x11 0xf8 0x00 0x00 0x84 J #9200
+.delay_slot
+ 9046 0x00 0x4e 0x00 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba MOVA r14, #2; MOVXM p7, #509136
+.delay_slot
+ 9056 0x00 0x2f 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r15, #1; MOVXM p2, #508992
+.delay_slot
+ 9066 0x10 0x1a 0x01 0x18 MOVX r13, #0
+.delay_slot
+.swstall delay_slot
+ 9070 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9072 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552
+ 9088 0x00 0x0d 0x00 0x00 0x01 0xf3 0x32 0x72 0x10 0xba MOVA r13, #0; MOVXM p6, #509156
+ 9098 0xc0 0xca 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r18, [p6]; MOVXM p2, #508992
+ 9108 0x40 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6c 0x10 0xba LDA r16, [p2]; MOVXM p6, #509144
+ 9118 0xc0 0xc6 0xd0 0x3c 0x0a 0x2c LDA r17, [p6]; MOVX r15, #1
+ 9124 0x00 0x00 NOPX
+ 9126 0x00 0x00 NOPX
+ 9128 0x00 0x00 NOPX
+ 9130 0x00 0x00 NOPX
+ 9132 0x00 0x00 NOPX
+ 9134 0x14 0xa5 0x0f 0x98 MUL r18, r18, r16
+ 9138 0x00 0x00 NOPX
+ 9140 0x8c 0xe4 0x3a 0x32 0x82 0xa4 SUB r19, r17, r18; ADD.NC r20, r18, r16
+ 9146 0x15 0x37 0x1c 0x98 LTU r27, r20, r17
+ 9150 0x14 0xe7 0x02 0x18 SEL.EQZ r19, r19, r16, r27
+ 9154 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17
+ 9158 0x13 0x63 0x32 0x18 SEL.EQZ r17, r13, r19, r27
+ 9162 0x14 0x25 0x11 0x98 SUB r18, r16, r17
+ 9166 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 9170 0x80 0x12 0x40 0x40 0x01 0x84 JNZ r16, #9344
+.delay_slot
+ 9176 0x00 0x07 0xcc 0xca 0x30 0x44 MOVXM p6, #509208
+.delay_slot
+ 9182 0x0e 0x06 0x51 0x98 ST r18, [p6]
+.delay_slot
+.swstall delay_slot
+ 9186 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9188 0x00 0x00 NOPX
+.delay_slot
+ 9190 0x00 0x2c 0xf0 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba NOPA; MOVXM p7, #509136
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664
+ 9200 0xd1 0x81 0x60 0x00 0x04 0x98 0x00 0x00 0x21 0x3a MOVS p6, r12; J #9408
+.delay_slot
+ 9210 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28]
+.delay_slot
+.swstall delay_slot
+ 9214 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9216 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9218 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9220 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696
+ 9232 0x00 0x07 0xcc 0xc9 0x88 0x44 MOVXM p6, #509124
+ 9238 0xc0 0xce 0xd0 0x00 0x01 0xf3 0x32 0x22 0x10 0xba LDA r19, [p6]; MOVXM p6, #508996
+ 9248 0xc0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6e 0x10 0xba LDA r16, [p6]; MOVXM p6, #509148
+ 9258 0x06 0x06 0x56 0x98 LDA r18, [p6]
+ 9262 0x00 0x00 NOPX
+ 9264 0x00 0x00 NOPX
+ 9266 0x00 0x00 NOPX
+ 9268 0x00 0x00 NOPX
+ 9270 0x00 0x00 NOPX
+ 9272 0x14 0xe7 0x0f 0x98 MUL r19, r19, r16
+ 9276 0x00 0x00 NOPX
+ 9278 0x95 0x26 0x3a 0xb3 0x82 0xa4 SUB r20, r18, r19; ADD.NC r21, r19, r16
+ 9284 0x15 0x77 0x2c 0x98 LTU r27, r21, r18
+ 9288 0x15 0x29 0x02 0x18 SEL.EQZ r20, r20, r16, r27
+ 9292 0x9e 0xe5 0x98 0xa0 0x01 0x64 LTU r27, r19, r18; MOV r17, #0
+ 9298 0x14 0x63 0x42 0x18 SEL.EQZ r17, r17, r20, r27
+ 9302 0x14 0x25 0x11 0x98 SUB r18, r16, r17
+ 9306 0x14 0x61 0x07 0x98 EQ r16, r17, r16
+ 9310 0x80 0x13 0xe0 0x40 0x01 0x84 JNZ r16, #10176
+.delay_slot
+ 9316 0x00 0x07 0xcc 0xca 0x40 0x44 MOVXM p6, #509216
+.delay_slot
+ 9322 0x0e 0x06 0x51 0x98 ST r18, [p6]
+.delay_slot
+.swstall delay_slot
+ 9326 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9328 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 9330 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 9344 0xf7 0x83 0x26 0x8c 0x0b 0x00 0xe0 0x49 0xe8 0x01 0x58 0x76 LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9356 0x07 0xbc 0x99 0x18 LDA p1, [sp, #-68]
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9360 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28]
+.aggressive_scheduled_block_id 7
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9364 0x00 0x08 0x28 0x00 0x01 0x04 JL #4176
+.delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9370 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+ 9374 0x10 0x1a 0x01 0x18 MOVX r13, #0
+.delay_slot
+.swstall delay_slot
+ 9378 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9380 0x00 0x00 NOPX
+.delay_slot
+ 9382 0x00 0x2c 0xf0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba NOPA; MOVXM p2, #509184
+.return_address
+ 9392 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x20 0x11 0x3a MOVS p0, p7; MOVXM p2, #508992
+ 9402 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136
+.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 9408 0x06 0x5c 0x9e 0x98 LDA p1, [p6], #20
+.no_stack_arguments
+ 9412 0x00 0x09 0x78 0x00 0x01 0x04 JL #4848
+.delay_slot
+.swstall delay_slot
+ 9418 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9420 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9422 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9424 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9426 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.return_address
+ 9440 0x07 0x06 0x16 0x98 LDA r16, [p7]
+ 9444 0x00 0x00 NOPX
+ 9446 0x00 0x00 NOPX
+ 9448 0x00 0x00 NOPX
+ 9450 0x00 0x00 NOPX
+ 9452 0x00 0x00 NOPX
+ 9454 0x00 0x00 NOPX
+ 9456 0x13 0xe3 0x08 0x98 NE r17, r15, r16
+ 9460 0x88 0x12 0xe0 0x40 0x01 0x84 JNZ r17, #9664
+.delay_slot
+.swstall delay_slot
+ 9466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9468 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9470 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9472 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9474 0x00 0x00 NOPX
+ 9476 0x00 0x07 0xce 0xc9 0x88 0x44 MOVXM p7, #509124
+ 9482 0xe0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p7]; MOVXM p2, #509024
+ 9492 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2]
+ 9496 0x00 0x00 NOPX
+ 9498 0x00 0x00 NOPX
+ 9500 0x00 0x00 NOPX
+.no_stack_arguments
+ 9502 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+.swstall delay_slot
+ 9508 0x00 0x00 NOPX
+.delay_slot
+ 9510 0x14 0x20 0x07 0x18 ADD r16, r16, #1
+.delay_slot
+ 9514 0xe0 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p7]; LT r27, r16, r13
+.delay_slot
+ 9520 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27
+.delay_slot
+ 9526 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27
+.return_address
+ 9536 0xfb 0xa3 0x20 0x1b 0x01 0x8f 0x6b 0xd0 0x78 0xba LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15
+ 9546 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 9550 0x80 0x12 0xd0 0x40 0x01 0x84 JNZ r16, #9632
+.delay_slot
+ 9556 0x10 0x1e 0x05 0x18 MOVX r15, #1
+.delay_slot
+.swstall delay_slot
+ 9560 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9562 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9564 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9566 0x00 0x00 NOPX
+ 9568 0x4a 0xc2 0xde 0x0b 0x63 0x0c LDA r16, [p2, #20]; ST r13, [p7]
+ 9574 0x00 0x00 NOPX
+ 9576 0x00 0x00 NOPX
+ 9578 0x00 0x00 NOPX
+ 9580 0x00 0x00 NOPX
+ 9582 0x00 0x00 NOPX
+ 9584 0x00 0x00 NOPX
+ 9586 0x14 0x10 0xf8 0x18 REL r16, r15
+ 9590 0xdc 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba LDA r16, [p6, #-8]; MOVXM p7, #509136
+ 9600 0x00 0x00 NOPX
+ 9602 0x00 0x00 NOPX
+ 9604 0x00 0x12 0xd8 0x00 0x00 0x84 J #9648
+.delay_slot
+.swstall delay_slot
+ 9610 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9612 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9614 0x00 0x00 NOPX
+.delay_slot
+ 9616 0x13 0xe1 0x01 0x98 SUB r16, r15, r16
+.delay_slot
+ 9620 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x73 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p6, #-8]; NOPX
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096
+ 9632 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf3 0xb2 0x68 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112
+ 9648 0xe0 0xc2 0xd0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128
+ 9664 0x10 0x22 0x01 0x18 MOVX r17, #0
+ 9668 0x00 0x00 NOPX
+ 9670 0x00 0x00 NOPX
+ 9672 0x00 0x00 NOPX
+ 9674 0x00 0x00 NOPX
+ 9676 0x00 0x00 NOPX
+ 9678 0x13 0xa1 0x08 0x98 NE r16, r14, r16
+ 9682 0x80 0x13 0x48 0x40 0x01 0x84 JNZ r16, #9872
+.delay_slot
+ 9688 0x00 0x07 0xce 0xc9 0xc0 0x44 MOVXM p7, #509152
+.delay_slot
+ 9694 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024
+.delay_slot
+.swstall delay_slot
+ 9700 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9702 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9704 0x00 0x00 NOPX
+ 9706 0x07 0x06 0x16 0x98 LDA r16, [p7]
+ 9710 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2]
+ 9714 0x00 0x00 NOPX
+ 9716 0x00 0x00 NOPX
+ 9718 0x00 0x00 NOPX
+.no_stack_arguments
+ 9720 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+.swstall delay_slot
+ 9726 0x00 0x00 NOPX
+.delay_slot
+ 9728 0x14 0x20 0x07 0x18 ADD r16, r16, #1
+.delay_slot
+ 9732 0xe0 0xc2 0x38 0x6e 0x35 0x5c ST r16, [p7]; LT r27, r16, r17
+.delay_slot
+ 9738 0x8c 0x60 0x37 0x3b 0x41 0xe4 SUB r17, r17, r16; MOV r14, r27
+.delay_slot
+ 9744 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV
+.return_address
+ 9760 0xfb 0x93 0x20 0x1b 0x01 0x8f 0x6b 0x90 0x78 0xba LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14
+ 9770 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 9774 0x80 0x13 0x38 0x40 0x01 0x84 JNZ r16, #9840
+.delay_slot
+ 9780 0x00 0x07 0xc4 0xc9 0xa0 0x44 MOVXM p2, #509136
+.delay_slot
+.swstall delay_slot
+ 9786 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9788 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9790 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9792 0x00 0x00 NOPX
+ 9794 0x2a 0xc2 0xde 0x0b 0x63 0x0c LDA r16, [p1, #20]; ST r13, [p7]
+ 9800 0x00 0x00 NOPX
+ 9802 0x00 0x00 NOPX
+ 9804 0x00 0x00 NOPX
+ 9806 0x00 0x00 NOPX
+ 9808 0x00 0x00 NOPX
+ 9810 0x00 0x00 NOPX
+ 9812 0x14 0x10 0xf8 0x18 REL r16, r15
+ 9816 0x06 0xe6 0x16 0x98 LDA r16, [p6, #-8]
+ 9820 0x00 0x00 NOPX
+ 9822 0x00 0x00 NOPX
+ 9824 0x00 0x00 NOPX
+ 9826 0x00 0x00 NOPX
+ 9828 0x00 0x00 NOPX
+ 9830 0x00 0x00 NOPX
+ 9832 0x13 0xe1 0x01 0x98 SUB r16, r15, r16
+ 9836 0x0e 0xe6 0x11 0x98 ST r16, [p6, #-8]
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304
+ 9840 0x00 0x13 0x50 0x00 0x00 0x84 J #9888
+.delay_slot
+ 9846 0x1f 0x62 0xc0 0xf8 MOV p7, p1
+.delay_slot
+.swstall delay_slot
+ 9850 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9852 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9854 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9856 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336
+ 9872 0xfb 0xf3 0x20 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0x32 0x68 0x10 0x00 0x00 0xe1 LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352
+ 9888 0x40 0xc2 0xd0 0x44 0x22 0x2c LDA r16, [p2]; MOVX r17, #4
+ 9894 0x00 0x00 NOPX
+ 9896 0x00 0x00 NOPX
+ 9898 0x00 0x00 NOPX
+ 9900 0x00 0x00 NOPX
+ 9902 0x00 0x00 NOPX
+ 9904 0x00 0x00 NOPX
+ 9906 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 9910 0x80 0x13 0xa8 0x40 0x01 0x84 JNZ r16, #10064
+.delay_slot
+ 9916 0x00 0x07 0xc4 0xc9 0xc8 0x44 MOVXM p2, #509156
+.delay_slot
+.swstall delay_slot
+ 9922 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9924 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9926 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9928 0x00 0x00 NOPX
+ 9930 0x40 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x30 0x10 0xba LDA r16, [p2]; MOVXM p1, #509024
+ 9940 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1]
+ 9944 0x00 0x00 NOPX
+ 9946 0x00 0x00 NOPX
+ 9948 0x00 0x00 NOPX
+.no_stack_arguments
+ 9950 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224
+.delay_slot
+.swstall delay_slot
+ 9956 0x00 0x00 NOPX
+.delay_slot
+ 9958 0x14 0x20 0x07 0x18 ADD r16, r16, #1
+.delay_slot
+ 9962 0x40 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p2]; LT r27, r16, r13
+.delay_slot
+ 9968 0x6c 0x60 0x37 0x3b 0x41 0xe4 SUB r17, r13, r16; MOV r14, r27
+.delay_slot
+ 9974 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27
+.return_address
+ 9984 0x6c 0x06 0x3d 0xae 0x41 0xe4 SUB r16, r13, r3; MOV r27, r14
+ 9990 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27
+ 9994 0x80 0x13 0xa8 0x40 0x01 0x84 JNZ r16, #10064
+.delay_slot
+ 10000 0x00 0x07 0xc4 0xc9 0xc8 0x44 MOVXM p2, #509156
+.delay_slot
+.swstall delay_slot
+ 10006 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10008 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10010 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10012 0x00 0x00 NOPX
+ 10014 0xea 0xc2 0xd4 0x0b 0x63 0x0c LDA r16, [p7, #20]; ST r13, [p2]
+ 10020 0x00 0x00 NOPX
+ 10022 0x00 0x00 NOPX
+ 10024 0x00 0x00 NOPX
+ 10026 0x00 0x00 NOPX
+ 10028 0x00 0x00 NOPX
+ 10030 0x00 0x00 NOPX
+ 10032 0x14 0x10 0xf8 0x18 REL r16, r15
+ 10036 0x06 0xe6 0x16 0x98 LDA r16, [p6, #-8]
+ 10040 0x00 0x00 NOPX
+ 10042 0x00 0x00 NOPX
+ 10044 0x00 0x00 NOPX
+ 10046 0x00 0x00 NOPX
+ 10048 0x00 0x00 NOPX
+ 10050 0x00 0x00 NOPX
+ 10052 0x13 0xe1 0x01 0x98 SUB r16, r15, r16
+ 10056 0xdc 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p6, #-8]; NOPM
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528
+ 10064 0x00 0x07 0xcc 0xc9 0x80 0x44 MOVXM p6, #509120
+ 10070 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x74 0x10 0xba LDA r16, [p6]; MOVXM p2, #509160
+ 10080 0x02 0x06 0x36 0x98 LDA r17, [p2]
+ 10084 0x00 0x00 NOPX
+ 10086 0x00 0x00 NOPX
+ 10088 0x00 0x00 NOPX
+ 10090 0x00 0x00 NOPX
+ 10092 0x00 0x00 NOPX
+ 10094 0x00 0x00 NOPX
+ 10096 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 10100 0x80 0x13 0xc8 0x40 0x01 0x84 JNZ r16, #10128
+.delay_slot
+ 10106 0x07 0xef 0x99 0x18 LDA p7, [sp, #-20]
+.delay_slot
+ 10110 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16]
+.delay_slot
+ 10114 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12]
+.delay_slot
+.swstall delay_slot
+ 10118 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10120 0x00 0x00 NOPX
+ 10122 0x00 0x2c 0xfc 0x0b 0x63 0x0c NOPA; ST r13, [p6]
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 10128 0xff 0x2e 0x2e 0xeb 0x41 0xd4 LDA r11, [sp, #-8]; MOV lr, r11
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 10134 0x07 0xfd 0x91 0x18 LDA r12, [sp, #-4]
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10138 0x07 0xe9 0xb1 0x18 LDA r13, [sp, #-24]
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10142 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10146 0x1e 0x66 0x20 0xf8 MOV p6, r12
+.delay_slot
+ 10150 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 10156 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10158 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640
+ 10176 0x00 0x0d 0x06 0x8c 0x0b 0x00 0x04 0x98 0x00 0x00 0x20 0x76 MOVA r13, #0; MOVS p6, r12; J #9408
+.delay_slot
+ 10188 0x03 0xc0 0xa7 0x20 0x09 0x64 MOVX r15, #1; MOV r14, #2
+.delay_slot
+ 10194 0x00 0x07 0xc4 0xc8 0x80 0x44 MOVXM p2, #508992
+.delay_slot
+ 10200 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136
+.delay_slot
+ 10206 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28]
+.delay_slot
+.swstall delay_slot
+ 10210 0x00 0x00 NOPX
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 10224
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function_start
+ 10224 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0
+ 10230 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10234 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10238 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10242 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10246 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10250 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10254 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10258 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10262 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10266 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10270 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10274 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10278 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10282 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10286 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10290 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10294 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10298 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10302 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10306 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10310 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10314 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10318 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10322 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10326 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10330 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10334 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10338 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 10342 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10346 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 10350 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 10354 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 10358 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 10362 0x18 0x9f 0xa0 0xf8 MOV r2, r31
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+
+.text_segment PM 10368
+.label _ZL19propagateFloat32NaNjj
+.function_start
+ 10368 0xfd 0x43 0x00 0x3f 0xc0 0x02 0x48 0x00 0x10 0xba MOVA r3, #-22; MOVXM r18, #-16777216
+ 10378 0x3f 0xe7 0x00 0x00 0x10 0x00 0x08 0x00 0x10 0xba MOVA r7, #511; MOVXM r0, #4194304
+ 10388 0x00 0x30 0x00 0x02 0x40 0x2c 0xa9 0xfe 0x58 0xba MOVA r16, #1; OR r4, r1, r0; MOV r5, #510
+ 10398 0x10 0x80 0x05 0x98 OR r0, r2, r0
+ 10402 0x10 0x4c 0x3d 0x98 LSHL r6, r1, r3
+ 10406 0x10 0x86 0x3d 0x98 LSHL r3, r2, r3
+ 10410 0x11 0xc6 0x34 0x98 AND r3, r7, r3
+ 10414 0x11 0xcc 0x64 0x98 AND r6, r7, r6
+ 10418 0x11 0x4c 0x67 0x98 EQ r6, r5, r6
+ 10422 0x10 0xa3 0x0d 0x98 LSHL r17, r2, r16
+ 10426 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17
+ 10430 0x11 0x22 0x02 0x18 SEL.EQZ r17, r4, r0, r27
+ 10434 0x00 0x3f 0xf8 0x3f 0xfe 0x44 MOVXM r16, #4194303
+ 10440 0x10 0x85 0x04 0x98 AND r2, r2, r16
+ 10444 0x10 0x84 0xf0 0x18 NEZ r2, r2
+ 10448 0x10 0x43 0x04 0x98 AND r1, r1, r16
+ 10452 0x10 0x42 0xf0 0x18 NEZ r1, r1
+ 10456 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10460 0x10 0x76 0x64 0x98 AND r27, r1, r6
+.delay_slot
+ 10464 0x10 0xc2 0x57 0x98 EQ r1, r3, r5
+.delay_slot
+ 10468 0x14 0x46 0x42 0x18 SEL.EQZ r3, r17, r4, r27
+.delay_slot
+ 10472 0x10 0x76 0x24 0x98 AND r27, r1, r2
+.delay_slot
+ 10476 0x10 0xc0 0x02 0x18 SEL.EQZ r0, r3, r0, r27
+.label _ZL19propagateFloat32NaNjj__end
+.label _ZL19roundAndPackFloat32iij
+.function_start
+ 10480 0x08 0x00 0x00 0x00 0x01 0xf0 0x32 0x7a 0x10 0xba MOVA r0, #64; MOVXM p0, #509172
+ 10490 0x00 0x92 0xd0 0x99 0xfa 0x2c LDA r4, [p0]; MOVX r6, #127
+.swstall __RAW__R_1948
+ 10496 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 10498 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 10500 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 10502 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 10504 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 10506 0x00 0x00 NOPX
+ 10508 0x20 0x14 0xa8 0x00 0x01 0x84 JZ r4, #10576
+.delay_slot
+ 10514 0x10 0x4a 0x01 0x18 MOVX r5, #64
+.delay_slot
+.swstall delay_slot
+ 10518 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10520 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10522 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10524 0x00 0x00 NOPX
+ 10526 0x00 0x70 0x00 0x00 0x70 0x4b 0x08 0x00 0x58 0xba MOVA r16, #3; MOVX r7, #2; MOV r24, #0
+ 10536 0x3e 0xc8 0xf2 0xa0 0x05 0x64 EQ r27, r7, r4; MOV r5, #1
+ 10542 0x11 0x8f 0x82 0x18 SEL.EQZ r7, r6, r24, r27
+ 10546 0x11 0x37 0x07 0x98 EQ r27, r4, r16
+ 10550 0x34 0x30 0x4d 0xa1 0x41 0xe4 SEL.EQZ r16, r6, r24, r27; MOV r27, r1
+ 10556 0x14 0x0e 0x72 0x18 SEL.EQZ r7, r16, r7, r27
+ 10560 0x11 0x76 0x47 0x98 EQ r27, r5, r4
+ 10564 0x00 0x2c 0xf0 0x00 0x20 0x0e 0x5c 0x10 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27
+.label TGT_F_ZL19roundAndPackFloat32iij_96
+ 10576 0x14 0x96 0x08 0x23 0xf5 0x64 EXTEND.u16 r18, r2; MOV r16, #253
+ 10582 0x14 0xa5 0x0a 0x98 LT r18, r18, r16
+ 10586 0x90 0x15 0x08 0x40 0x01 0x84 JNZ r18, #10768
+.delay_slot
+ 10592 0x10 0xe2 0x64 0x98 AND r17, r3, r6
+.delay_slot
+ 10596 0x10 0x0e 0x7d 0x18 MOVX r7, #31
+.delay_slot
+ 10600 0x10 0x42 0x7d 0x98 LSHL r1, r1, r7
+.delay_slot
+.swstall delay_slot
+ 10604 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10606 0x00 0x00 NOPX
+ 10608 0x00 0x12 0x00 0x05 0x38 0x3e 0x88 0xca 0xa8 0xba MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5
+ 10618 0x15 0x29 0x2a 0x98 LT r20, r20, r18
+ 10622 0x14 0x20 0x2a 0x98 LT r16, r16, r2
+ 10626 0x14 0xe7 0x44 0x98 AND r19, r19, r20
+ 10630 0x14 0xe7 0x05 0x98 OR r19, r19, r16
+ 10634 0x98 0x15 0x30 0x40 0x01 0x84 JNZ r19, #10848
+.delay_slot
+ 10640 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+.swstall delay_slot
+ 10644 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10646 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10648 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10650 0x00 0x00 NOPX
+ 10652 0x10 0xa7 0x09 0x98 GE r19, r2, r16
+ 10656 0x98 0x15 0x10 0x40 0x01 0x84 JNZ r19, #10784
+.delay_slot
+.swstall delay_slot
+ 10662 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10664 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10666 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10668 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10670 0x00 0x00 NOPX
+ 10672 0x14 0x04 0x21 0x98 SUB r2, r16, r2
+ 10676 0x10 0x14 0xf8 0x00 0x01 0x84 JZ r2, #10736
+.delay_slot
+.swstall delay_slot
+ 10682 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10684 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10686 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10688 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10690 0x00 0x00 NOPX
+ 10692 0x84 0x44 0x39 0xa0 0x81 0x64 SUB r17, r16, r2; MOV r19, #32
+ 10698 0x11 0xcf 0x14 0x98 AND r7, r7, r17
+ 10702 0x10 0xce 0x7d 0x98 LSHL r7, r3, r7
+ 10706 0x10 0xe3 0x1d 0x98 LSHL r17, r3, r17
+ 10710 0x10 0xb7 0x3a 0x98 LT r27, r2, r19
+ 10714 0x11 0xce 0xf0 0x18 NEZ r7, r7
+ 10718 0x10 0xc6 0xf0 0x18 NEZ r3, r3
+ 10722 0x11 0xc5 0x15 0x98 OR r2, r7, r17
+ 10726 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x00 0xc6 0x22 0x7a NOPA; NOPS; SEL.EQZ r3, r3, r2, r27
+.label TGT_F_ZL19roundAndPackFloat32iij_256
+ 10736 0x00 0x15 0x10 0x00 0x00 0x84 J #10784
+.delay_slot
+ 10742 0x10 0xe2 0x64 0x98 AND r17, r3, r6
+.delay_slot
+ 10746 0x10 0x04 0x01 0x18 MOVX r2, #0
+.delay_slot
+.swstall delay_slot
+ 10750 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10752 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10754 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_ZL19roundAndPackFloat32iij_288
+ 10768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV
+.label TGT_F_ZL19roundAndPackFloat32iij_304
+ 10784 0xff 0x20 0x00 0x22 0x30 0x34 0xa8 0xca 0xa8 0xba MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5
+ 10794 0x02 0xe3 0x00 0x06 0x62 0x2c 0x8f 0xff 0x58 0xba MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1
+ 10804 0x11 0x8c 0xd0 0x18 EQZ r6, r6
+ 10808 0x11 0x40 0x0d 0x98 LSHL r0, r5, r0
+ 10812 0x11 0x88 0x46 0x98 XOR r4, r6, r4
+ 10816 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10820 0x11 0x36 0x04 0x98 AND r27, r4, r0
+.delay_slot
+ 10824 0x14 0x04 0x22 0x18 SEL.EQZ r2, r16, r2, r27
+.delay_slot
+ 10828 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3
+.delay_slot
+ 10832 0x10 0x44 0x20 0x98 ADD r2, r1, r2
+.delay_slot
+ 10836 0x00 0x2c 0xf0 0x00 0x20 0x36 0x01 0x04 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; ADD r0, r27, r2
+.label TGT_F_ZL19roundAndPackFloat32iij_368
+ 10848 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10852 0x7f 0x80 0x01 0x20 0x00 0x44 MOVXM r2, #2139095040
+.delay_slot
+ 10858 0x10 0x46 0x20 0x98 ADD r3, r1, r2
+.delay_slot
+ 10862 0x11 0x44 0xd0 0x18 EQZ r2, r5
+.delay_slot
+ 10866 0x10 0xc0 0x21 0x98 SUB r0, r3, r2
+.delay_slot
+.swstall delay_slot
+ 10870 0x00 0x00 NOPX
+.label _ZL19roundAndPackFloat32iij__end
+
+.text_segment PM 10880
+.label _ZL28normalizeRoundAndPackFloat32iij
+.tail_call
+.function_start
+ 10880 0x00 0x14 0x78 0x00 0x00 0x84 J #10480
+.delay_slot
+ 10886 0x10 0xe0 0x30 0x18 CLZ r16, r3
+.delay_slot
+ 10890 0x14 0x21 0xff 0x18 ADD r16, r16, #-1
+.delay_slot
+ 10894 0x10 0x85 0x01 0x98 SUB r2, r2, r16
+.delay_slot
+ 10898 0x10 0xc7 0x0d 0x98 LSHL r3, r3, r16
+.delay_slot
+.swstall delay_slot
+ 10902 0x00 0x00 NOPX
+.label _ZL28normalizeRoundAndPackFloat32iij__end
+
+.text_segment PM 10912
+.label int32_to_float32
+.function_start
+ 10912 0x08 0x15 0x78 0x00 0x01 0x84 JZ r1, #10992
+.delay_slot
+.swstall delay_slot
+ 10918 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10920 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10922 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10924 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10926 0x00 0x00 NOPX
+ 10928 0x80 0x00 0x08 0x20 0x00 0x44 MOVXM r16, #-2147483648
+ 10934 0x10 0x61 0x07 0x98 EQ r16, r1, r16
+ 10938 0x80 0x15 0x80 0x40 0x01 0x84 JNZ r16, #11008
+.delay_slot
+.swstall delay_slot
+ 10944 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10946 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10948 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10950 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10952 0x00 0x00 NOPX
+.tail_call
+ 10954 0x13 0x82 0x00 0x00 0x05 0x50 0x00 0x00 0x20 0xba MOVA r2, #156; J #10880
+.delay_slot
+ 10964 0x10 0x47 0x10 0x18 ABS r3, r1
+.delay_slot
+ 10968 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+ 10972 0x10 0x43 0x0a 0x98 LT r1, r1, r16
+.delay_slot
+.swstall delay_slot
+ 10976 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10978 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_Fint32_to_float32_80
+.return_address
+ 10992 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10996 0x10 0x00 0x01 0x18 MOVX r0, #0
+.delay_slot
+.swstall delay_slot
+ 11000 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11002 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11004 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11006 0x00 0x00 NOPX
+.label TGT_Fint32_to_float32_96
+ 11008 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 11012 0xcf 0x00 0x00 0x20 0x00 0x44 MOVXM r0, #-822083584
+.delay_slot
+.swstall delay_slot
+ 11018 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11020 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11022 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11024 0x00 0x00 NOPX
+.label int32_to_float32__end
+
+.text_segment PM 11040
+.label _ZL14addFloat32Sigsjji
+.function_start
+ 11040 0xfd 0x32 0x00 0x00 0x1f 0xfe 0x0f 0xff 0x90 0xba MOVA r18, #-23; MOVXM r16, #8388607
+ 11050 0x10 0x63 0x2d 0x98 LSHL r17, r1, r18
+ 11054 0x10 0x89 0x2d 0x98 LSHL r4, r2, r18
+ 11058 0x14 0x76 0x90 0x18 EXTEND.u8 r27, r17
+ 11062 0x11 0x32 0x90 0x18 EXTEND.u8 r25, r4
+ 11066 0xdc 0x72 0x3c 0x20 0x01 0x64 SUB r17, r27, r25; MOV r24, #0
+ 11072 0x16 0x09 0x1a 0x98 LT r4, r24, r17
+ 11076 0x20 0x15 0xf8 0x40 0x01 0x84 JNZ r4, #11248
+.delay_slot
+ 11082 0x10 0x67 0x04 0x98 AND r19, r1, r16
+.delay_slot
+ 11086 0x14 0x20 0x90 0x20 0x19 0x64 AND r16, r2, r16; MOV r0, #6
+.delay_slot
+ 11092 0x14 0xe6 0x0d 0x98 LSHL r19, r19, r0
+.delay_slot
+ 11096 0x84 0x01 0xba 0x23 0xfd 0x64 LSHL r16, r16, r0; MOV r20, #255
+.delay_slot
+ 11102 0xd8 0x28 0xf9 0x20 0x7d 0x64 EQ r0, r27, r20; MOV r18, #31
+ 11108 0x14 0x4b 0x89 0x98 GE r5, r17, r24
+ 11112 0x28 0x16 0x58 0x40 0x01 0x84 JNZ r5, #11440
+.delay_slot
+ 11118 0x10 0xc9 0x2d 0x98 LSHL r4, r3, r18
+.delay_slot
+.swstall delay_slot
+ 11122 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11124 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11126 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11128 0x00 0x00 NOPX
+ 11130 0x16 0x69 0x47 0x98 EQ r20, r25, r20
+ 11134 0xa0 0x16 0x40 0x40 0x01 0x84 JNZ r20, #11392
+.delay_slot
+.swstall delay_slot
+ 11140 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11142 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11144 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11146 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11148 0x00 0x00 NOPX
+ 11150 0x18 0x51 0xa0 0xf8 MOV r1, r3
+ 11154 0x18 0x9c 0xa0 0xf8 MOV r2, r25
+ 11158 0x14 0x40 0x07 0x18 ADD r0, r17, #1
+ 11162 0x10 0x23 0x12 0x18 SEL.EQZ r17, r0, r17, r27
+ 11166 0x16 0x23 0x11 0x98 SUB r17, r24, r17
+ 11170 0x88 0x16 0x28 0x00 0x01 0x84 JZ r17, #11344
+.delay_slot
+ 11176 0x20 0x00 0x0a 0x20 0x00 0x44 MOVXM r20, #536870912
+.delay_slot
+ 11182 0x14 0xc7 0x45 0x98 OR r3, r19, r20
+.delay_slot
+ 11186 0x14 0xe6 0x32 0x18 SEL.EQZ r19, r19, r3, r27
+.delay_slot
+.swstall delay_slot
+ 11190 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11192 0x00 0x00 NOPX
+ 11194 0xc0 0xe2 0x30 0x20 0x81 0x64 SUB r3, r24, r17; MOV r0, #32
+ 11200 0x10 0xe5 0x24 0x98 AND r18, r3, r18
+ 11204 0x14 0xe5 0x2d 0x98 LSHL r18, r19, r18
+ 11208 0x14 0x76 0x0a 0x98 LT r27, r17, r0
+ 11212 0x00 0x16 0x28 0x00 0x00 0x84 J #11344
+.delay_slot
+ 11218 0x14 0xc6 0x3d 0x98 LSHL r3, r19, r3
+.delay_slot
+ 11222 0x14 0xa4 0xf0 0x18 NEZ r18, r18
+.delay_slot
+ 11226 0x14 0xe2 0xf0 0x18 NEZ r17, r19
+.delay_slot
+ 11230 0x10 0xe5 0x25 0x98 OR r18, r3, r18
+.delay_slot
+ 11234 0x00 0x2c 0xf0 0x00 0x24 0x67 0x22 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM
+.label TGT_F_ZL14addFloat32Sigsjji_208
+ 11248 0x00 0x16 0x78 0x40 0x01 0x84 JNZ r0, #11504
+.delay_slot
+ 11254 0x20 0x00 0x0a 0x20 0x00 0x44 MOVXM r20, #536870912
+.delay_slot
+.swstall delay_slot
+ 11260 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11262 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11264 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11266 0x00 0x00 NOPX
+ 11268 0x18 0x51 0xa0 0xf8 MOV r1, r3
+ 11272 0x88 0xff 0xe1 0x3b 0x41 0xe4 ADD r3, r17, #-1; MOV r2, r27
+ 11278 0x1e 0xdc 0xa0 0xf8 MOV r27, r25
+ 11282 0x10 0xe3 0x12 0x18 SEL.EQZ r17, r3, r17, r27
+ 11286 0x88 0x16 0x28 0x00 0x01 0x84 JZ r17, #11344
+.delay_slot
+ 11292 0x15 0x01 0x05 0x98 OR r0, r20, r16
+.delay_slot
+ 11296 0x14 0x20 0x02 0x18 SEL.EQZ r16, r16, r0, r27
+.delay_slot
+.swstall delay_slot
+ 11300 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11302 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11304 0x00 0x00 NOPX
+ 11306 0xc0 0xe2 0x30 0x20 0x81 0x64 SUB r3, r24, r17; MOV r0, #32
+ 11312 0x10 0xe5 0x24 0x98 AND r18, r3, r18
+ 11316 0x14 0x25 0x2d 0x98 LSHL r18, r16, r18
+ 11320 0x14 0x06 0x3d 0x98 LSHL r3, r16, r3
+ 11324 0x14 0x76 0x0a 0x98 LT r27, r17, r0
+ 11328 0x14 0xa4 0xf0 0x18 NEZ r18, r18
+ 11332 0x14 0x20 0xf0 0x18 NEZ r16, r16
+ 11336 0x10 0xe3 0x25 0x98 OR r17, r3, r18
+ 11340 0x14 0x21 0x12 0x18 SEL.EQZ r16, r16, r17, r27
+.label TGT_F_ZL14addFloat32Sigsjji_304
+ 11344 0x00 0x32 0x00 0x27 0x3a 0x2e 0x28 0xbf 0xc8 0xba MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1
+ 11354 0x9c 0xe0 0x18 0x31 0x01 0x24 ADD r19, r19, r16; ADD.NC r16, r17, #1
+ 11360 0x14 0xe5 0x2d 0x98 LSHL r18, r19, r18
+ 11364 0x14 0xb7 0x8a 0x98 LT r27, r18, r24
+ 11368 0x14 0x45 0x02 0x18 SEL.EQZ r2, r17, r16, r27
+ 11372 0x14 0x87 0x32 0x18 SEL.EQZ r3, r18, r19, r27
+.label __ll1__ZL14addFloat32Sigsjji
+.tail_call
+ 11376 0x00 0x14 0x78 0x00 0x00 0x84 J #10480
+.delay_slot
+.swstall delay_slot
+ 11382 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11384 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11386 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11388 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11390 0x00 0x00 NOPX
+.label TGT_F_ZL14addFloat32Sigsjji_352
+.return_address
+ 11392 0x80 0x16 0x88 0x40 0x01 0x84 JNZ r16, #11536
+.delay_slot
+.swstall delay_slot
+ 11398 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11400 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11402 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11404 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11406 0x00 0x00 NOPX
+ 11408 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 11412 0x7f 0x80 0x08 0x20 0x00 0x44 MOVXM r16, #2139095040
+.delay_slot
+ 11418 0x11 0x01 0x00 0x98 ADD r0, r4, r16
+.delay_slot
+.swstall delay_slot
+ 11422 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11424 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11426 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_ZL14addFloat32Sigsjji_400
+ 11440 0x00 0x16 0x90 0x40 0x01 0x84 JNZ r0, #11552
+.delay_slot
+.swstall delay_slot
+ 11446 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11448 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11450 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11452 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11454 0x00 0x00 NOPX
+ 11456 0xd8 0x16 0xa8 0x00 0x01 0x84 JZ r27, #11600
+.delay_slot
+.swstall delay_slot
+ 11462 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11464 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11468 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11470 0x00 0x00 NOPX
+ 11472 0x18 0x51 0xa0 0xf8 MOV r1, r3
+ 11476 0x00 0x16 0x38 0x00 0x00 0x84 J #11376
+.delay_slot
+ 11482 0x40 0x00 0x08 0xa0 0x00 0x44 MOVXM r17, #1073741824
+.delay_slot
+ 11488 0x9c 0x62 0x11 0x3b 0x41 0xe4 ADD r17, r19, r17; MOV r2, r27
+.delay_slot
+ 11494 0x14 0x47 0x00 0x98 ADD r3, r17, r16
+.delay_slot
+.swstall delay_slot
+ 11498 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11500 0x00 0x01 0x67 0x98 NOPA
+.label TGT_F_ZL14addFloat32Sigsjji_464
+ 11504 0x98 0x16 0xb8 0x40 0x01 0x84 JNZ r19, #11632
+.delay_slot
+.swstall delay_slot
+ 11510 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11512 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11514 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11516 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11518 0x00 0x00 NOPX
+ 11520 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 11524 0x18 0x10 0xa0 0xf8 MOV r0, r1
+.delay_slot
+.swstall delay_slot
+ 11528 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11530 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11532 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11534 0x00 0x00 NOPX
+.label TGT_F_ZL14addFloat32Sigsjji_496
+.tail_call
+ 11536 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 11542 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11544 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11546 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11548 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11550 0x00 0x00 NOPX
+.label TGT_F_ZL14addFloat32Sigsjji_512
+.return_address
+ 11552 0x14 0xe1 0x05 0x98 OR r16, r19, r16
+ 11556 0x80 0x16 0xc0 0x40 0x01 0x84 JNZ r16, #11648
+.delay_slot
+.swstall delay_slot
+ 11562 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11564 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11566 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11568 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11570 0x00 0x00 NOPX
+ 11572 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 11576 0x18 0x10 0xa0 0xf8 MOV r0, r1
+.delay_slot
+.swstall delay_slot
+ 11580 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11582 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11584 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11586 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_ZL14addFloat32Sigsjji_560
+ 11600 0x05 0x00 0x08 0x33 0x82 0xa4 RET lr; ADD.NC r16, r19, r16
+.delay_slot
+ 11606 0x17 0xe2 0xe9 0x18 MOVX r17, #-6
+.delay_slot
+ 11610 0x14 0x21 0x1d 0x98 LSHL r16, r16, r17
+.delay_slot
+ 11614 0x11 0x01 0x00 0x98 ADD r0, r4, r16
+.delay_slot
+.swstall delay_slot
+ 11618 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11620 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZL14addFloat32Sigsjji_592
+.tail_call
+ 11632 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 11638 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11640 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11642 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11644 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11646 0x00 0x00 NOPX
+.label TGT_F_ZL14addFloat32Sigsjji_608
+.tail_call
+.return_address
+ 11648 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 11654 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11656 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11658 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11660 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11662 0x00 0x00 NOPX
+.label _ZL14addFloat32Sigsjji__end
+.label _ZL14subFloat32Sigsjji
+.function_start
+ 11664 0xfd 0x31 0x00 0x00 0x1f 0xfe 0x0f 0xff 0x90 0xba MOVA r17, #-23; MOVXM r16, #8388607
+ 11674 0x10 0x89 0x1d 0x98 LSHL r4, r2, r17
+ 11678 0x10 0x65 0x1d 0x98 LSHL r18, r1, r17
+ 11682 0x10 0x69 0x04 0x98 AND r20, r1, r16
+ 11686 0x11 0x32 0x90 0x18 EXTEND.u8 r25, r4
+ 11690 0x14 0xb6 0x90 0x18 EXTEND.u8 r27, r18
+ 11694 0x14 0x20 0x99 0xa0 0x1d 0x64 AND r16, r2, r16; MOV r19, #7
+ 11700 0x15 0x23 0x3d 0x98 LSHL r17, r20, r19
+ 11704 0xdc 0xb2 0x3c 0x20 0x01 0x64 SUB r18, r27, r25; MOV r24, #0
+ 11710 0x16 0x0b 0x2a 0x98 LT r5, r24, r18
+ 11714 0x28 0x17 0x40 0x40 0x01 0x84 JNZ r5, #11904
+.delay_slot
+ 11720 0x14 0x21 0x3d 0x98 LSHL r16, r16, r19
+.delay_slot
+ 11724 0x1f 0xe0 0x00 0x10 0x00 0x00 0x88 0x00 0x10 0xba MOVA r0, #255; MOVXM r4, #1073741824
+.delay_slot
+ 11734 0x16 0xe8 0x07 0x98 EQ r20, r27, r0
+.delay_slot
+ 11738 0x14 0x66 0x45 0x98 OR r19, r17, r4
+.delay_slot
+ 11742 0x11 0x09 0x05 0x98 OR r4, r4, r16
+ 11746 0x14 0x8d 0x89 0x98 GE r6, r18, r24
+ 11750 0x30 0x17 0x90 0x40 0x01 0x84 JNZ r6, #12064
+.delay_slot
+ 11756 0x10 0x0a 0x05 0x18 MOVX r5, #1
+.delay_slot
+ 11760 0x10 0xce 0x56 0x98 XOR r7, r3, r5
+.delay_slot
+.swstall delay_slot
+ 11764 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11766 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11768 0x00 0x00 NOPX
+ 11770 0x16 0x68 0x07 0x98 EQ r20, r25, r0
+ 11774 0xa0 0x17 0xc8 0x40 0x01 0x84 JNZ r20, #12176
+.delay_slot
+.swstall delay_slot
+ 11780 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11782 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11784 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11786 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11788 0x00 0x00 NOPX
+ 11790 0x18 0x53 0xa0 0xf8 MOV r1, r7
+ 11794 0x14 0xa0 0x07 0x18 ADD r16, r18, #1
+ 11798 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27
+ 11802 0x16 0x21 0x01 0x98 SUB r16, r24, r16
+ 11806 0x80 0x17 0x30 0x00 0x01 0x84 JZ r16, #11872
+.delay_slot
+ 11812 0x14 0x63 0x32 0x18 SEL.EQZ r17, r17, r19, r27
+.delay_slot
+.swstall delay_slot
+ 11816 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11818 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11820 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11822 0x00 0x00 NOPX
+ 11824 0x04 0x14 0x00 0x30 0x38 0x0e 0x48 0x1f 0x58 0xba MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31
+ 11834 0x10 0xe5 0x24 0x98 AND r18, r3, r18
+ 11838 0x14 0x65 0x2d 0x98 LSHL r18, r17, r18
+ 11842 0x14 0x66 0xf0 0x18 NEZ r19, r17
+ 11846 0x14 0x37 0x4a 0x98 LT r27, r16, r20
+ 11850 0x14 0x62 0x3d 0x98 LSHL r17, r17, r3
+ 11854 0x14 0xa4 0xf0 0x18 NEZ r18, r18
+ 11858 0x14 0x61 0x25 0x98 OR r16, r17, r18
+ 11862 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0xe3 0x02 0x7a NOPA; NOPS; SEL.EQZ r17, r19, r16, r27
+.label __ll2__ZL14subFloat32Sigsjji
+ 11872 0x00 0x17 0x80 0x00 0x00 0x84 J #12032
+.delay_slot
+ 11878 0x11 0x07 0x11 0x98 SUB r3, r4, r17
+.delay_slot
+.swstall delay_slot
+ 11882 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11884 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11886 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11888 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_240
+ 11904 0xa0 0x17 0xe0 0x40 0x01 0x84 JNZ r20, #12224
+.delay_slot
+.swstall delay_slot
+ 11910 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11912 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11914 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11916 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11918 0x00 0x00 NOPX
+ 11920 0x18 0x1d 0xa0 0xf8 MOV r0, r27
+ 11924 0x18 0x51 0xa0 0xf8 MOV r1, r3
+ 11928 0x1e 0xdc 0xa0 0xf8 MOV r27, r25
+ 11932 0x1e 0x50 0x20 0xf8 MOV r25, r0
+ 11936 0x14 0xa3 0xff 0x18 ADD r17, r18, #-1
+ 11940 0x14 0x63 0x22 0x18 SEL.EQZ r17, r17, r18, r27
+ 11944 0x88 0x17 0x78 0x00 0x01 0x84 JZ r17, #12016
+.delay_slot
+ 11950 0x14 0x20 0x42 0x18 SEL.EQZ r16, r16, r4, r27
+.delay_slot
+.swstall delay_slot
+ 11954 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11956 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11958 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11960 0x00 0x00 NOPX
+ 11962 0x04 0x03 0x00 0x31 0x28 0x8e 0x88 0x1f 0x58 0xba MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31
+ 11972 0x14 0xa9 0x44 0x98 AND r20, r18, r20
+ 11976 0x14 0x29 0x4d 0x98 LSHL r20, r16, r20
+ 11980 0x14 0x25 0x2d 0x98 LSHL r18, r16, r18
+ 11984 0x14 0x76 0x3a 0x98 LT r27, r17, r3
+ 11988 0x15 0x28 0xf0 0x18 NEZ r20, r20
+ 11992 0x14 0x20 0xf0 0x18 NEZ r16, r16
+ 11996 0x14 0xa3 0x45 0x98 OR r17, r18, r20
+ 12000 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x21 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV
+.label __ll1__ZL14subFloat32Sigsjji
+ 12016 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x26 0x38 0x0c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_368
+.tail_call
+ 12032 0x00 0x15 0x40 0x00 0x00 0x84 J #10880
+.delay_slot
+ 12038 0x16 0x45 0xff 0x18 ADD r2, r25, #-1
+.delay_slot
+.swstall delay_slot
+ 12042 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12044 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12046 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12048 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_400
+.return_address
+ 12064 0xa0 0x17 0xf0 0x40 0x01 0x84 JNZ r20, #12256
+.delay_slot
+.swstall delay_slot
+ 12070 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12072 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12074 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12076 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12078 0x00 0x00 NOPX
+ 12080 0x14 0x27 0x1c 0x98 LTU r19, r16, r17
+ 12084 0x98 0x18 0x08 0x40 0x01 0x84 JNZ r19, #12304
+.delay_slot
+ 12090 0x11 0x71 0x92 0x18 SEL.EQZ r24, r5, r25, r27
+.delay_slot
+ 12094 0x1c 0x9d 0xa0 0xf8 MOV r18, r27
+.delay_slot
+ 12098 0x11 0x73 0x22 0x18 SEL.EQZ r25, r5, r18, r27
+.delay_slot
+.swstall delay_slot
+ 12102 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12104 0x00 0x00 NOPX
+ 12106 0x14 0x65 0x0c 0x98 LTU r18, r17, r16
+ 12110 0x90 0x18 0x18 0x40 0x01 0x84 JNZ r18, #12336
+.delay_slot
+.swstall delay_slot
+ 12116 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12118 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12120 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12122 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12124 0x00 0x00 NOPX
+ 12126 0x10 0x20 0x7d 0x18 MOVX r16, #31
+ 12130 0x00 0x07 0xc0 0xc9 0xe8 0x44 MOVXM p0, #509172
+ 12136 0x00 0x06 0x56 0x98 LDA r18, [p0]
+.swstall __RAW__R_1948
+ 12140 0x00 0x00 NOPX
+.swstall __RAW__R_1948
+ 12142 0x00 0x00 NOPX
+ 12144 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 12148 0x10 0x22 0x0d 0x18 MOVX r17, #3
+.delay_slot
+.swstall delay_slot
+ 12152 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12154 0x00 0x00 NOPX
+.delay_slot
+ 12156 0x14 0x63 0x27 0x98 EQ r17, r17, r18
+.delay_slot
+ 12160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x22 0x08 0x6c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_512
+ 12176 0x80 0x18 0x28 0x40 0x01 0x84 JNZ r16, #12368
+.delay_slot
+.swstall delay_slot
+ 12182 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12184 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12186 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12188 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12190 0x00 0x00 NOPX
+ 12192 0x10 0x20 0x7d 0x18 MOVX r16, #31
+ 12196 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 12200 0x11 0xe1 0x0d 0x98 LSHL r16, r7, r16
+.delay_slot
+ 12204 0x7f 0x80 0x08 0xa0 0x00 0x44 MOVXM r17, #2139095040
+.delay_slot
+ 12210 0x14 0x41 0x00 0x98 ADD r0, r17, r16
+.delay_slot
+.swstall delay_slot
+ 12214 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12216 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_560
+ 12224 0x88 0x18 0x30 0x40 0x01 0x84 JNZ r17, #12384
+.delay_slot
+.swstall delay_slot
+ 12230 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12232 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12234 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12236 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12238 0x00 0x00 NOPX
+ 12240 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 12244 0x18 0x10 0xa0 0xf8 MOV r0, r1
+.delay_slot
+.swstall delay_slot
+ 12248 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12250 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12252 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12254 0x00 0x00 NOPX
+.label TGT_F_ZL14subFloat32Sigsjji_592
+ 12256 0x14 0x61 0x05 0x98 OR r16, r17, r16
+ 12260 0x80 0x18 0x38 0x40 0x01 0x84 JNZ r16, #12400
+.delay_slot
+.swstall delay_slot
+ 12266 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12268 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12270 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12272 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12274 0x00 0x00 NOPX
+ 12276 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 12280 0x7f 0xff 0xf0 0x3f 0xfe 0x44 MOVXM r0, #2147483647
+.delay_slot
+.swstall delay_slot
+ 12286 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12288 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12290 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12292 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZL14subFloat32Sigsjji_640
+ 12304 0x00 0x17 0x78 0x00 0x00 0x84 J #12016
+.delay_slot
+ 12310 0x18 0x51 0xa0 0xf8 MOV r1, r3
+.delay_slot
+ 12314 0x1c 0xd8 0xa0 0xf8 MOV r19, r17
+.delay_slot
+.swstall delay_slot
+ 12318 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12320 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12322 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV
+.label TGT_F_ZL14subFloat32Sigsjji_672
+ 12336 0x00 0x17 0x30 0x00 0x00 0x84 J #11872
+.delay_slot
+ 12342 0x19 0x18 0x20 0xf8 MOV r4, r16
+.delay_slot
+ 12346 0x1e 0x5c 0x20 0xf8 MOV r25, r24
+.delay_slot
+ 12350 0x18 0x53 0xa0 0xf8 MOV r1, r7
+.delay_slot
+.swstall delay_slot
+ 12354 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12356 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZL14subFloat32Sigsjji_704
+.tail_call
+ 12368 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 12374 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12376 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12378 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12380 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12382 0x00 0x00 NOPX
+.label TGT_F_ZL14subFloat32Sigsjji_720
+.tail_call
+.return_address
+ 12384 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 12390 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12392 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12394 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12396 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12398 0x00 0x00 NOPX
+.label TGT_F_ZL14subFloat32Sigsjji_736
+.tail_call
+.return_address
+ 12400 0x00 0x14 0x40 0x00 0x00 0x84 J #10368
+.delay_slot
+.swstall delay_slot
+ 12406 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12408 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12410 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12412 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12414 0x00 0x00 NOPX
+.label _ZL14subFloat32Sigsjji__end
+.label float32_add
+.function_start
+ 12416 0x17 0xe0 0x85 0x18 MOVX r16, #-31
+ 12420 0x10 0x47 0x0d 0x98 LSHL r3, r1, r16
+ 12424 0x10 0xa1 0x0d 0x98 LSHL r16, r2, r16
+ 12428 0x10 0xe1 0x07 0x98 EQ r16, r3, r16
+ 12432 0x80 0x18 0x58 0x40 0x01 0x84 JNZ r16, #12464
+.delay_slot
+.swstall delay_slot
+ 12438 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12440 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12442 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12444 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12446 0x00 0x00 NOPX
+.tail_call
+ 12448 0x00 0x16 0xc8 0x00 0x00 0x84 J #11664
+.delay_slot
+.swstall delay_slot
+ 12454 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12456 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12458 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12460 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12462 0x00 0x00 NOPX
+.label TGT_Ffloat32_add_48
+.tail_call
+.return_address
+ 12464 0x00 0x15 0x90 0x00 0x00 0x84 J #11040
+.delay_slot
+.swstall delay_slot
+ 12470 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12472 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12474 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12476 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12478 0x00 0x00 NOPX
+.label float32_add__end
+
+.data_segment DMb 508992
+.label reduce_mean_c8_params
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x7
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+ 0x0
+
+.bss_segment DMb 509120 40
+
+.data_segment DMb 509160
+.label _ZL8num_iter
+ 0x1
+ 0x0
+ 0x0
+ 0x0
+
+.bss_segment DMb 509164 4
+
+.bss_segment DMb 509168 1
+
+.bss_segment DMb 509172 4
+
+.bss_segment DMb 509184 64
+
+.stack DM_stack 506560 508928
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.map
new file mode 100644
index 0000000000000000000000000000000000000000..a0123fcd2abb0ee7d6fe767c4cfeb9204c35f584
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.map
@@ -0,0 +1,177 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme
+
+// Release: ipp V-2024.06-TGT-241219
+
+Memory map for memory 'DM_stack':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 2368
+
+ 0x0007bac0..0x0007c3ff ( 2368 items) : Stack
+
+Memory map for memory 'DMb':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 2613
+
+ 0x00000000..0x0007babf ( 506560 items) : Reserved
+ 0x0007bac0..0x0007c3ff ( 2368 items) : Stack
+ 0x0007c400..0x0007c43f ( 64 items) : Reserved
+ 0x0007c440..0x0007c4bf ( 128 items) : ../Release/0_0_reloadable2.o::reduce_mean_c8_params (Data, Global, .data.DMb.64)
+ 0x0007c4c0..0x0007c4c3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL9curr_iter (Data, Local, .bss.DMb.4)
+ 0x0007c4c4..0x0007c4c7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL10depth_iter (Data, Local, .bss.DMb.4)
+ 0x0007c4c8..0x0007c4cb ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8core_row (Data, Local, .bss.DMb.4)
+ 0x0007c4cc..0x0007c4cf ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4)
+ 0x0007c4d0..0x0007c4d3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11reduce_axis (Data, Local, .bss.DMb.4)
+ 0x0007c4d4..0x0007c4d7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8l3_width (Data, Local, .bss.DMb.4)
+ 0x0007c4d8..0x0007c4db ( 4 items) : ../Release/0_0_reloadable2.o::_ZL9l3_height (Data, Local, .bss.DMb.4)
+ 0x0007c4dc..0x0007c4df ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8l3_depth (Data, Local, .bss.DMb.4)
+ 0x0007c4e0..0x0007c4e3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL10width_iter (Data, Local, .bss.DMb.4)
+ 0x0007c4e4..0x0007c4e7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11height_iter (Data, Local, .bss.DMb.4)
+ 0x0007c4e8..0x0007c4eb ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8num_iter (Data, Local, .data.DMb.4)
+ 0x0007c4ec..0x0007c4ef ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4)
+ 0x0007c4f0..0x0007c4f0 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1)
+ 0x0007c4f4..0x0007c4f7 ( 4 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::float_rounding_mode (Data, Global, .bss.DMb.4)
+ 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable2.o::pad_3d_params (Data, Global, .bss.DMb.64)
+ 0x0007ccc0..0x000fffff ( 537408 items) : Reserved
+
+Memory map for memory 'PM':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 10058
+
+ 0x00000000..0x0000092f ( 2352 items) : Reserved
+ 0x00000930..0x00000a0b ( 220 items) : ../Release/0_0_reloadable2.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00000a10..0x00001043 ( 1588 items) : ../Release/0_0_reloadable2.o::_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64)
+ 0x00001050..0x000012ed ( 670 items) : ../Release/0_0_reloadable2.o::_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t (Function, Weak, .text) (stack frame size = 0)
+ 0x000012f0..0x00001d67 ( 2680 items) : ../Release/0_0_reloadable2.o::_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E (Function, Weak, .text) (stack frame size = 256)
+
+ Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001d70..0x000027e3 ( 2676 items) : ../Release/0_0_reloadable2.o::_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128)
+
+ Called functions : _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ int32_to_float32
+ float32_add
+ _ZN12me_primitive10udiv_dstepEjjRjS0_
+ _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+ _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ _ZN12me_primitive11control_satE
+ _ZN12me_primitive11control_rndE
+ reduce_mean_c8_params
+ _ZL11reduce_axis
+ _ZL11ifm1_offset
+ pad_3d_params
+ _ZL8num_iter
+ _ZL8l3_width
+ _ZL9l3_height
+ _ZL8l3_depth
+ _ZL10depth_iter
+ _ZL10width_iter
+ _ZL11height_iter
+
+ 0x000027f0..0x0000287d ( 142 items) : me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0)
+ 0x00002880..0x000028ef ( 112 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL19propagateFloat32NaNjj (Function, Local, .text) (stack frame size = 0)
+ 0x000028f0..0x00002a77 ( 392 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL19roundAndPackFloat32iij (Function, Local, .text) (stack frame size = 0)
+
+ Referenced symbols: float_rounding_mode
+
+ 0x00002a80..0x00002a97 ( 24 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL28normalizeRoundAndPackFloat32iij (Function, Local, .text) (stack frame size = 0)
+
+ Called functions : _ZL19roundAndPackFloat32iij
+
+ 0x00002aa0..0x00002b11 ( 114 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::int32_to_float32 (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _ZL28normalizeRoundAndPackFloat32iij
+
+ 0x00002b20..0x00002d8f ( 624 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL14addFloat32Sigsjji (Function, Local, .text) (stack frame size = 0)
+
+ Called functions : _ZL19roundAndPackFloat32iij
+ _ZL19propagateFloat32NaNjj
+
+ 0x00002d90..0x0000307f ( 752 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL14subFloat32Sigsjji (Function, Local, .text) (stack frame size = 0)
+
+ Called functions : _ZL28normalizeRoundAndPackFloat32iij
+ _ZL19propagateFloat32NaNjj
+
+ Referenced symbols: float_rounding_mode
+
+ 0x00003080..0x000030bf ( 64 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::float32_add (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _ZL14subFloat32Sigsjji
+ _ZL14addFloat32Sigsjji
+
+
+External symbols:
+
+ __dso_handle = 0x0
+ _ctors_end = 0x0
+ _ctors_start = 0x0
+ _dtors_end = 0x0
+ _dtors_start = 0x0
+ _pc_end = 0x30c0
+ _pc_start = 0x930
+ _sp_end_DM_stack = 0x7c400
+ _sp_start_DM_stack = 0x7bac0
+
+Section summary for memory 'DM_stack':
+
+ .stack File
+ ---------- ----------
+ 2368
+ ---------- ----------
+ 2368 Total
+
+Section summary for memory 'DMb':
+
+ .bss .data File
+ ---------- ---------- ----------
+ 104 132 ../Release/0_0_reloadable2.o
+ 4 0 softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)
+ 5 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ ---------- ---------- ----------
+ 113 132 Total
+
+Section summary for memory 'PM':
+
+ .text File
+ ---------- ----------
+ 7834 ../Release/0_0_reloadable2.o
+ 2082 softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)
+ 142 me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ ---------- ----------
+ 10058 Total
+
+File summary:
+
+../Release/0_0_reloadable2.o
+ DMb 236
+ PM 7834
+
+me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ DMb 5
+
+softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)
+ DMb 4
+ PM 2082
+
+me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ PM 142
+
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.sdr
new file mode 100644
index 0000000000000000000000000000000000000000..efa1bd1f1f0feebb4e1aac96628ff9f168810f9e
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.sdr
@@ -0,0 +1,96 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme
+
+// Release: ipp V-2024.06-TGT-241219
+
+// Symbols in memory 'DM_bankA':
+// Symbols in memory 'DM_bankAB':
+// Symbols in memory 'DM_bankAC':
+// Symbols in memory 'DM_bankAD':
+// Symbols in memory 'DM_bankB':
+// Symbols in memory 'DM_bankBC':
+// Symbols in memory 'DM_bankBD':
+// Symbols in memory 'DM_bankC':
+// Symbols in memory 'DM_bankCD':
+// Symbols in memory 'DM_bankD':
+// Symbols in memory 'DM_stack':
+// Symbols in memory 'DM_test':
+// Symbols in memory 'DMb':
+_symbol reduce_mean_c8_params 0x0007c440
+_symbol _ZN12me_primitive11control_satE 0x0007c4ec
+_symbol _ZN12me_primitive11control_rndE 0x0007c4f0
+_symbol float_rounding_mode 0x0007c4f4
+_symbol pad_3d_params 0x0007c500
+// Symbols in memory 'DMh':
+// Symbols in memory 'DMh_bankA':
+// Symbols in memory 'DMh_bankAB':
+// Symbols in memory 'DMh_bankAC':
+// Symbols in memory 'DMh_bankAD':
+// Symbols in memory 'DMh_bankB':
+// Symbols in memory 'DMh_bankBC':
+// Symbols in memory 'DMh_bankBD':
+// Symbols in memory 'DMh_bankC':
+// Symbols in memory 'DMh_bankCD':
+// Symbols in memory 'DMh_bankD':
+// Symbols in memory 'DMh_stack':
+// Symbols in memory 'DMs':
+// Symbols in memory 'DMs_bankA':
+// Symbols in memory 'DMs_bankAB':
+// Symbols in memory 'DMs_bankAC':
+// Symbols in memory 'DMs_bankAD':
+// Symbols in memory 'DMs_bankB':
+// Symbols in memory 'DMs_bankBC':
+// Symbols in memory 'DMs_bankBD':
+// Symbols in memory 'DMs_bankC':
+// Symbols in memory 'DMs_bankCD':
+// Symbols in memory 'DMs_bankD':
+// Symbols in memory 'DMs_stack':
+// Symbols in memory 'DMv':
+// Symbols in memory 'DMv_bankA':
+// Symbols in memory 'DMv_bankAB':
+// Symbols in memory 'DMv_bankAC':
+// Symbols in memory 'DMv_bankAD':
+// Symbols in memory 'DMv_bankB':
+// Symbols in memory 'DMv_bankBC':
+// Symbols in memory 'DMv_bankBD':
+// Symbols in memory 'DMv_bankC':
+// Symbols in memory 'DMv_bankCD':
+// Symbols in memory 'DMv_bankD':
+// Symbols in memory 'DMv_stack':
+// Symbols in memory 'DMw':
+// Symbols in memory 'DMw_bankA':
+// Symbols in memory 'DMw_bankAB':
+// Symbols in memory 'DMw_bankAC':
+// Symbols in memory 'DMw_bankAD':
+// Symbols in memory 'DMw_bankB':
+// Symbols in memory 'DMw_bankBC':
+// Symbols in memory 'DMw_bankBD':
+// Symbols in memory 'DMw_bankC':
+// Symbols in memory 'DMw_bankCD':
+// Symbols in memory 'DMw_bankD':
+// Symbols in memory 'DMw_stack':
+// Symbols in memory 'DMx':
+// Symbols in memory 'DMx_bankA':
+// Symbols in memory 'DMx_bankAB':
+// Symbols in memory 'DMx_bankAC':
+// Symbols in memory 'DMx_bankAD':
+// Symbols in memory 'DMx_bankB':
+// Symbols in memory 'DMx_bankBC':
+// Symbols in memory 'DMx_bankBD':
+// Symbols in memory 'DMx_bankC':
+// Symbols in memory 'DMx_bankCD':
+// Symbols in memory 'DMx_bankD':
+// Symbols in memory 'DMx_stack':
+// Symbols in memory 'PM':
+_symbol _Z13kernelWrapperPPvjjjj 0x00000930
+_symbol _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv 0x00000a10
+_symbol _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t 0x00001050
+_symbol _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E 0x000012f0
+_symbol _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001d70
+_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x000027f0
+_symbol int32_to_float32 0x00002aa0
+_symbol float32_add 0x00003080
+// Symbols in memory 'PMw':
+// Symbols in memory 'TM4':
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.srv
new file mode 100644
index 0000000000000000000000000000000000000000..d037f49ea23915d17f1d140dbcf225735acc1af1
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.srv
@@ -0,0 +1,14427 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:41 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me
+
+// Release: ipp V-2024.06-TGT-241219
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function kernelWrapper _Z13kernelWrapperPPvjjjj
+.src_ref 0 "0_0_reloadable2.cc" 29 first
+.src_ref 0 "0_0_reloadable2.cc" 31 60 first
+.function_start
+ 2352 "11010100" // LDA r16, [p0]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2353 "01000001" // /* MW 5 */
+ 2354 "00101111" // /* MW 4 */
+ 2355 "11010000" // /* MW 3 */
+ 2356 "11000010" // /* MW 2 */
+ 2357 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 29
+ 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2359 "00000001" // /* MW 5 */
+ 2360 "00000000" // /* MW 4 */
+ 2361 "00000000" // /* MW 3 */
+ 2362 "00001000" // /* MW 2 */
+ 2363 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 31 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2364 "00000010" // ST p7, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2365 "01110000" // /* MW 7 */
+ 2366 "01010000" // /* MW 6 */
+ 2367 "11101000" // /* MW 5 */
+ 2368 "00000001" // /* MW 4 */
+ 2369 "10110000" // /* MW 3 */
+ 2370 "01110011" // /* MW 2 */
+ 2371 "11111111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 17 79
+.src_ref 0 "0_0_reloadable2.cc" 31 110 first
+ 2372 "00111010" // ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2373 "01111001" // /* MW 9 */
+ 2374 "01100000" // /* MW 8 */
+ 2375 "10110000" // /* MW 7 */
+ 2376 "10000011" // /* MW 6 */
+ 2377 "10100111" // /* MW 5 */
+ 2378 "00011111" // /* MW 4 */
+ 2379 "10110000" // /* MW 3 */
+ 2380 "10000010" // /* MW 2 */
+ 2381 "11111111" // /* MW 1 */
+ 2382 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2383 "00111101" // /* MW 3 */
+ 2384 "11110100" // /* MW 2 */
+ 2385 "00001111" // /* MW 1 */
+ 2386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2387 "00000000" // /* MW 1 */
+ 2388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2389 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2390 "00011000" // ADD.NC p0, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2391 "00000010" // /* MW 3 */
+ 2392 "01101000" // /* MW 2 */
+ 2393 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2394 "10011000" // LDA r16, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2395 "00010110" // /* MW 3 */
+ 2396 "00011110" // /* MW 2 */
+ 2397 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2398 "10011000" // LDA r18, [p0], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2399 "01010110" // /* MW 3 */
+ 2400 "00111110" // /* MW 2 */
+ 2401 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2402 "10011000" // LDA r17, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2403 "00110110" // /* MW 3 */
+ 2404 "11101110" // /* MW 2 */
+ 2405 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2406 "10011000" // LDA r27, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2407 "01110110" // /* MW 3 */
+ 2408 "00000111" // /* MW 2 */
+ 2409 "00000000" // /* MW 1 */
+ 2410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2411 "00000000" // /* MW 1 */
+ 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2413 "00000000" // /* MW 1 */
+ 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2415 "00000000" // /* MW 1 */
+ 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2417 "00000000" // /* MW 1 */
+ 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2419 "00000000" // /* MW 1 */
+ 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2421 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2422 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2423 "00100010" // /* MW 3 */
+ 2424 "00100001" // /* MW 2 */
+ 2425 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2426 "10011000" // ST r16, [p0, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2427 "00010001" // /* MW 3 */
+ 2428 "11010110" // /* MW 2 */
+ 2429 "00001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2430 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2431 "11111101" // /* MW 3 */
+ 2432 "11100000" // /* MW 2 */
+ 2433 "00010111" // /* MW 1 */
+ 2434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2435 "00000000" // /* MW 1 */
+ 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2437 "00000000" // /* MW 1 */
+ 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2439 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2440 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2441 "00001000" // /* MW 3 */
+ 2442 "01010111" // /* MW 2 */
+ 2443 "00010100" // /* MW 1 */
+ 2444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2445 "00000000" // /* MW 1 */
+ 2446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2447 "00000000" // /* MW 1 */
+ 2448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2449 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 17 79 first
+ 2450 "10011000" // LDA p0, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2451 "00011110" // /* MW 3 */
+ 2452 "00101100" // /* MW 2 */
+ 2453 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 18 47 first
+ 2454 "10011000" // LDA p1, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2455 "10011110" // /* MW 3 */
+ 2456 "11111100" // /* MW 2 */
+ 2457 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 19 81 first
+ 2458 "10011000" // LDA p2, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2459 "00011110" // /* MW 3 */
+ 2460 "00000101" // /* MW 2 */
+ 2461 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 16 4 first
+.no_stack_arguments
+ 2462 "00000100" // JL #7536 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7536 delay_slots=5 */
+ 2463 "00000001" // /* MW 5 */
+ 2464 "00000000" // /* MW 4 */
+ 2465 "10111000" // /* MW 3 */
+ 2466 "00001110" // /* MW 2 */
+ 2467 "00000000" // /* MW 1 */
+.delay_slot
+ 2468 "10011000" // ST r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2469 "01010101" // /* MW 3 */
+ 2470 "11110011" // /* MW 2 */
+ 2471 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2475 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2479 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 38 60 first
+.return_address
+ 2480 "10011000" // LDA r16, [p7, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2481 "00010110" // /* MW 3 */
+ 2482 "11110110" // /* MW 2 */
+ 2483 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2484 "00011000" // LDA r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2485 "01010001" // /* MW 3 */
+ 2486 "11110011" // /* MW 2 */
+ 2487 "00000111" // /* MW 1 */
+ 2488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2489 "00000000" // /* MW 1 */
+ 2490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2491 "00000000" // /* MW 1 */
+ 2492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2493 "00000000" // /* MW 1 */
+ 2494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2495 "00000000" // /* MW 1 */
+ 2496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2497 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+ 2498 "00011000" // ADD.NC p0, r16, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2499 "00001000" // /* MW 3 */
+ 2500 "01101000" // /* MW 2 */
+ 2501 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+ 2502 "10011000" // LDA r16, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2503 "00010110" // /* MW 3 */
+ 2504 "00000110" // /* MW 2 */
+ 2505 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2506 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2507 "00000101" // /* MW 3 */
+ 2508 "00100010" // /* MW 2 */
+ 2509 "00010000" // /* MW 1 */
+ 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2511 "00000000" // /* MW 1 */
+ 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2513 "00000000" // /* MW 1 */
+ 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2515 "00000000" // /* MW 1 */
+ 2516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2517 "00000000" // /* MW 1 */
+ 2518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2519 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+ 2520 "00011000" // REL.COND r16, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2521 "00011000" // /* MW 3 */
+ 2522 "00010101" // /* MW 2 */
+ 2523 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2524 "11010100" // LDA lr, [sp, #-12]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2525 "01000001" // /* MW 5 */
+ 2526 "10101111" // /* MW 4 */
+ 2527 "00101101" // /* MW 3 */
+ 2528 "10000111" // /* MW 2 */
+ 2529 "11111110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2530 "10011000" // LDA r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2531 "00010110" // /* MW 3 */
+ 2532 "11110110" // /* MW 2 */
+ 2533 "00000000" // /* MW 1 */
+ 2534 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2535 "10011001" // /* MW 3 */
+ 2536 "11111011" // /* MW 2 */
+ 2537 "00000111" // /* MW 1 */
+ 2538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2539 "00000000" // /* MW 1 */
+ 2540 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2541 "11110001" // /* MW 3 */
+ 2542 "11111101" // /* MW 2 */
+ 2543 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41 first
+ 2544 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2545 "00000001" // /* MW 5 */
+ 2546 "00000000" // /* MW 4 */
+ 2547 "00000000" // /* MW 3 */
+ 2548 "11111000" // /* MW 2 */
+ 2549 "11111111" // /* MW 1 */
+ 2550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2551 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable2.cc" 41
+ 2552 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 2553 "00000000" // /* MW 3 */
+ 2554 "00101000" // /* MW 2 */
+ 2555 "00010000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+.delay_slot
+ 2556 "10011000" // SUB r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2557 "00000001" // /* MW 3 */
+ 2558 "01100011" // /* MW 2 */
+ 2559 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2560 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2561 "00010010" // /* MW 3 */
+ 2562 "00100001" // /* MW 2 */
+ 2563 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 22
+.delay_slot
+ 2564 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2565 "00010001" // /* MW 3 */
+ 2566 "11110110" // /* MW 2 */
+ 2567 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+ 2571 "00000000" // /* MW 1 */
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.function setup _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.src_ref 2 "reduce_base_c8.h" 218 first
+.src_ref 2 "reduce_base_c8.h" 220 27 first
+.src_ref 2 "reduce_base_c8.h" 290 63
+.src_ref 2 "reduce_base_c8.h" 348 46
+.function_start
+ 2576 "01110110" // LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2577 "01111000" // /* MW 11 */
+ 2578 "01100000" // /* MW 10 */
+ 2579 "00001001" // /* MW 9 */
+ 2580 "01101000" // /* MW 8 */
+ 2581 "01100111" // /* MW 7 */
+ 2582 "00111110" // /* MW 6 */
+ 2583 "10001011" // /* MW 5 */
+ 2584 "10000000" // /* MW 4 */
+ 2585 "11010011" // /* MW 3 */
+ 2586 "10001110" // /* MW 2 */
+ 2587 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 287 40
+.src_ref 2 "reduce_base_c8.h" 287 40
+.src_ref 2 "reduce_base_c8.h" 348 46 first
+ 2588 "10111010" // MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2589 "00001000" // /* MW 9 */
+ 2590 "00000111" // /* MW 8 */
+ 2591 "00110000" // /* MW 7 */
+ 2592 "00001001" // /* MW 6 */
+ 2593 "00100101" // /* MW 5 */
+ 2594 "00111110" // /* MW 4 */
+ 2595 "00000000" // /* MW 3 */
+ 2596 "00000111" // /* MW 2 */
+ 2597 "00000010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 293 77
+.src_ref 2 "reduce_base_c8.h" 298 44
+.src_ref 2 "reduce_base_c8.h" 299 40
+.src_ref 2 "reduce_base_c8.h" 300 59
+.src_ref 2 "reduce_base_c8.h" 326 79
+ 2598 "10111010" // MOVA r30, #3; MOVX r1, #-3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2599 "01111000" // /* MW 9 */
+ 2600 "01100000" // /* MW 8 */
+ 2601 "00001000" // /* MW 7 */
+ 2602 "10101000" // /* MW 6 */
+ 2603 "00010111" // /* MW 5 */
+ 2604 "00111110" // /* MW 4 */
+ 2605 "00000000" // /* MW 3 */
+ 2606 "01111110" // /* MW 2 */
+ 2607 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 57
+.src_ref 2 "reduce_base_c8.h" 301 81
+.src_ref 2 "reduce_base_c8.h" 305 77
+ 2608 "10111010" // MOVA r5, #-1; MOVXM r4, #65528 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2609 "00010000" // /* MW 9 */
+ 2610 "11111100" // /* MW 8 */
+ 2611 "10001111" // /* MW 7 */
+ 2612 "00111100" // /* MW 6 */
+ 2613 "00000000" // /* MW 5 */
+ 2614 "00000000" // /* MW 4 */
+ 2615 "00000000" // /* MW 3 */
+ 2616 "11100101" // /* MW 2 */
+ 2617 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 218
+.src_ref 2 "reduce_base_c8.h" 280 76
+.src_ref 2 "reduce_base_c8.h" 312 98
+ 2618 "10111010" // MOVA r16, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2619 "01110000" // /* MW 9 */
+ 2620 "00000000" // /* MW 8 */
+ 2621 "00000000" // /* MW 7 */
+ 2622 "00000000" // /* MW 6 */
+ 2623 "00000010" // /* MW 5 */
+ 2624 "00000000" // /* MW 4 */
+ 2625 "00000000" // /* MW 3 */
+ 2626 "10010000" // /* MW 2 */
+ 2627 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 298 44 first
+ 2628 "00011000" // ADD.NC p4, r0, #46 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2629 "00010111" // /* MW 3 */
+ 2630 "01100000" // /* MW 2 */
+ 2631 "00011100" // /* MW 1 */
+ 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2633 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 220 25 first
+ 2634 "10011000" // ST r3, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2635 "01110001" // /* MW 3 */
+ 2636 "00011100" // /* MW 2 */
+ 2637 "00001000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 221 28 first
+ 2638 "10011000" // LDA r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2639 "01010110" // /* MW 3 */
+ 2640 "00011111" // /* MW 2 */
+ 2641 "00000001" // /* MW 1 */
+ 2642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2643 "00000000" // /* MW 1 */
+ 2644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2645 "00000000" // /* MW 1 */
+ 2646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2647 "00000000" // /* MW 1 */
+ 2648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2649 "00000000" // /* MW 1 */
+ 2650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2651 "00000000" // /* MW 1 */
+ 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2653 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 221 26
+.src_ref 2 "reduce_base_c8.h" 301 81 first
+ 2654 "01011100" // ST r26, [p0], #4; AND r17, r26, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2655 "10001001" // /* MW 5 */
+ 2656 "01000100" // /* MW 4 */
+ 2657 "00111101" // /* MW 3 */
+ 2658 "11101010" // /* MW 2 */
+ 2659 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 222 26 first
+.src_ref 2 "reduce_base_c8.h" 293 58 first
+.src_ref 2 "reduce_base_c8.h" 301 81
+ 2660 "10111010" // LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2661 "10101000" // /* MW 9 */
+ 2662 "01001000" // /* MW 8 */
+ 2663 "11001100" // /* MW 7 */
+ 2664 "01111110" // /* MW 6 */
+ 2665 "01001101" // /* MW 5 */
+ 2666 "00000110" // /* MW 4 */
+ 2667 "11010000" // /* MW 3 */
+ 2668 "11110110" // /* MW 2 */
+ 2669 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 63 first
+ 2670 "10011000" // LSHL r18, r26, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2671 "01101101" // /* MW 3 */
+ 2672 "10100100" // /* MW 2 */
+ 2673 "00010110" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 293 77 first
+ 2674 "10011000" // LSHL r6, r4, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2675 "00011101" // /* MW 3 */
+ 2676 "00001100" // /* MW 2 */
+ 2677 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 41 first
+.src_ref 2 "reduce_base_c8.h" 300 59 first
+ 2678 "00100100" // LSHL r17, r26, r1; ADD.NC r1, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2679 "11111111" // /* MW 5 */
+ 2680 "10110010" // /* MW 4 */
+ 2681 "10110000" // /* MW 3 */
+ 2682 "01000011" // /* MW 2 */
+ 2683 "11010100" // /* MW 1 */
+ 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2685 "00000000" // /* MW 1 */
+ 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2687 "00000000" // /* MW 1 */
+ 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2689 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 222 24 first
+.src_ref 2 "reduce_base_c8.h" 287 40 first
+ 2690 "01011100" // ST r29, [p0], #4; MAC r7, r7, r29, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2691 "01001100" // /* MW 5 */
+ 2692 "10011100" // /* MW 4 */
+ 2693 "00111110" // /* MW 3 */
+ 2694 "11110110" // /* MW 2 */
+ 2695 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 223 29 first
+.src_ref 2 "reduce_base_c8.h" 312 60 first
+ 2696 "11111010" // LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2697 "10101111" // /* MW 9 */
+ 2698 "01001001" // /* MW 8 */
+ 2699 "00000111" // /* MW 7 */
+ 2700 "10000000" // /* MW 6 */
+ 2701 "10110101" // /* MW 5 */
+ 2702 "11111111" // /* MW 4 */
+ 2703 "11010111" // /* MW 3 */
+ 2704 "10001010" // /* MW 2 */
+ 2705 "00100011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 305 57 first
+ 2706 "10011000" // MUL r20, r3, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2707 "11011111" // /* MW 3 */
+ 2708 "11101001" // /* MW 2 */
+ 2709 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 78 first
+ 2710 "10011000" // MUL r28, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2711 "01001111" // /* MW 3 */
+ 2712 "11111000" // /* MW 2 */
+ 2713 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 299 40 first
+ 2714 "10011000" // LSHL r21, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2715 "11101101" // /* MW 3 */
+ 2716 "01101011" // /* MW 2 */
+ 2717 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 57 first
+.src_ref 2 "reduce_base_c8.h" 299 40
+ 2718 "00100100" // LSHL r18, r29, r5; ADD.NC r27, r21, #-48 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2719 "11010000" // /* MW 5 */
+ 2720 "10110101" // /* MW 4 */
+ 2721 "10111101" // /* MW 3 */
+ 2722 "10001011" // /* MW 2 */
+ 2723 "11101100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 41
+ 2724 "00011000" // ADD r23, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2725 "11111111" // /* MW 3 */
+ 2726 "10101111" // /* MW 2 */
+ 2727 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 301 85 first
+ 2728 "10011000" // MUL r29, r29, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2729 "01101111" // /* MW 3 */
+ 2730 "01111011" // /* MW 2 */
+ 2731 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 223 27 first
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2732 "01011100" // ST r2, [p0], #4; LT r24, r30, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2733 "01010101" // /* MW 5 */
+ 2734 "01100000" // /* MW 4 */
+ 2735 "00111111" // /* MW 3 */
+ 2736 "10001010" // /* MW 2 */
+ 2737 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 224 33 first
+ 2738 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2739 "00101110" // /* MW 3 */
+ 2740 "00011100" // /* MW 2 */
+ 2741 "00000001" // /* MW 1 */
+ 2742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2743 "00000000" // /* MW 1 */
+ 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2745 "00000000" // /* MW 1 */
+ 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2747 "00000000" // /* MW 1 */
+ 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2749 "00000000" // /* MW 1 */
+ 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2751 "00000000" // /* MW 1 */
+ 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2753 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 224 31
+.src_ref 2 "reduce_base_c8.h" 318 64
+ 2754 "00000010" // ST el0, [p0], #4; MOV r31, el0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2755 "01110000" // /* MW 7 */
+ 2756 "00001110" // /* MW 6 */
+ 2757 "11110000" // /* MW 5 */
+ 2758 "00000011" // /* MW 4 */
+ 2759 "00110000" // /* MW 3 */
+ 2760 "10000101" // /* MW 2 */
+ 2761 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 225 34 first
+ 2762 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2763 "00001110" // /* MW 3 */
+ 2764 "00000100" // /* MW 2 */
+ 2765 "00000001" // /* MW 1 */
+ 2766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2767 "00000000" // /* MW 1 */
+ 2768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2769 "00000000" // /* MW 1 */
+ 2770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2771 "00000000" // /* MW 1 */
+ 2772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2773 "00000000" // /* MW 1 */
+ 2774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2775 "00000000" // /* MW 1 */
+ 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2777 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 225 32
+.src_ref 2 "reduce_base_c8.h" 318 64
+ 2778 "00000010" // ST eh0, [p0]; MOV r25, eh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2779 "01110000" // /* MW 7 */
+ 2780 "10001110" // /* MW 6 */
+ 2781 "00110000" // /* MW 5 */
+ 2782 "00000011" // /* MW 4 */
+ 2783 "00110000" // /* MW 3 */
+ 2784 "10000001" // /* MW 2 */
+ 2785 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 226 32 first
+ 2786 "10011000" // LDA r30, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2787 "11010110" // /* MW 3 */
+ 2788 "00010111" // /* MW 2 */
+ 2789 "00000001" // /* MW 1 */
+ 2790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2791 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2792 "10000100" // JNZ r24, #2912 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=2912 delay_slots=5 */
+ 2793 "00000001" // /* MW 5 */
+ 2794 "01000000" // /* MW 4 */
+ 2795 "10110000" // /* MW 3 */
+ 2796 "00000101" // /* MW 2 */
+ 2797 "11000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 98 first
+.delay_slot
+ 2798 "10011000" // LSHL r19, r28, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2799 "00001101" // /* MW 3 */
+ 2800 "00100111" // /* MW 2 */
+ 2801 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 318 64 first
+.delay_slot
+ 2802 "10011000" // MUL r25, r31, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2803 "10011111" // /* MW 3 */
+ 2804 "11110011" // /* MW 2 */
+ 2805 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 300 41 first
+.src_ref 2 "reduce_base_c8.h" 305 77 first
+.delay_slot
+ 2806 "00100100" // LSHL r20, r20, r5; ADD.NC r5, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2807 "11111111" // /* MW 5 */
+ 2808 "10110001" // /* MW 4 */
+ 2809 "10110010" // /* MW 3 */
+ 2810 "00001011" // /* MW 2 */
+ 2811 "10100101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 280 76 first
+.delay_slot
+ 2812 "10011000" // LSHL r16, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2813 "00001101" // /* MW 3 */
+ 2814 "00100001" // /* MW 2 */
+ 2815 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 226 30 first
+.src_ref 2 "reduce_base_c8.h" 318 88 first
+.delay_slot
+ 2816 "01011100" // ST r30, [p0, #4]; MUL r31, r25, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2817 "11011111" // /* MW 5 */
+ 2818 "11111111" // /* MW 4 */
+ 2819 "00111100" // /* MW 3 */
+ 2820 "11111010" // /* MW 2 */
+ 2821 "00000010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2822 "00011000" // MOVX r28, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2823 "00000101" // /* MW 3 */
+ 2824 "00111000" // /* MW 2 */
+ 2825 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2826 "10011000" // EQ r28, r2, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2827 "11000111" // /* MW 3 */
+ 2828 "10111001" // /* MW 2 */
+ 2829 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2830 "10000100" // JNZ r28, #4032 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4032 delay_slots=5 */
+ 2831 "00000001" // /* MW 5 */
+ 2832 "01000000" // /* MW 4 */
+ 2833 "11100000" // /* MW 3 */
+ 2834 "00000111" // /* MW 2 */
+ 2835 "11100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2837 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2839 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2841 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2843 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 242 41 first
+.delay_slot
+ 2844 "00011000" // ADD r22, r3, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2845 "11111111" // /* MW 3 */
+ 2846 "11101101" // /* MW 2 */
+ 2847 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2848 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2849 "00001001" // /* MW 3 */
+ 2850 "00100010" // /* MW 2 */
+ 2851 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 2852 "10011000" // EQ r17, r17, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2853 "00100111" // /* MW 3 */
+ 2854 "01100010" // /* MW 2 */
+ 2855 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2856 "10000100" // JNZ r17, #3904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3904 delay_slots=5 */
+ 2857 "00000001" // /* MW 5 */
+ 2858 "01000000" // /* MW 4 */
+ 2859 "10100000" // /* MW 3 */
+ 2860 "00000111" // /* MW 2 */
+ 2861 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2863 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2865 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2869 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.delay_slot
+ 2870 "00011000" // MOVX r7, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2871 "00001101" // /* MW 3 */
+ 2872 "00001110" // /* MW 2 */
+ 2873 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2874 "10011000" // EQ r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2875 "00100111" // /* MW 3 */
+ 2876 "11000100" // /* MW 2 */
+ 2877 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2878 "10000100" // JNZ r2, #3744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3744 delay_slots=5 */
+ 2879 "00000001" // /* MW 5 */
+ 2880 "01000000" // /* MW 4 */
+ 2881 "01010000" // /* MW 3 */
+ 2882 "00000111" // /* MW 2 */
+ 2883 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2885 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2887 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2889 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2891 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2893 "00000000" // /* MW 1 */
+ 2894 "10000100" // J #3552 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3552 delay_slots=5 */
+ 2895 "00000000" // /* MW 5 */
+ 2896 "00000000" // /* MW 4 */
+ 2897 "11110000" // /* MW 3 */
+ 2898 "00000110" // /* MW 2 */
+ 2899 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 2900 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2901 "00010001" // /* MW 3 */
+ 2902 "00110100" // /* MW 2 */
+ 2903 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2905 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2907 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2909 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2911 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2912 "00011000" // MOVX r29, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2913 "00010101" // /* MW 3 */
+ 2914 "00111010" // /* MW 2 */
+ 2915 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2916 "10011000" // LT r24, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2917 "00101010" // /* MW 3 */
+ 2918 "01110000" // /* MW 2 */
+ 2919 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2920 "10000100" // JNZ r24, #3232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3232 delay_slots=5 */
+ 2921 "00000001" // /* MW 5 */
+ 2922 "01000000" // /* MW 4 */
+ 2923 "01010000" // /* MW 3 */
+ 2924 "00000110" // /* MW 2 */
+ 2925 "11000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2933 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 316 38
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 2934 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2935 "00010001" // /* MW 3 */
+ 2936 "00110100" // /* MW 2 */
+ 2937 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2938 "10011000" // EQ r17, r26, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2939 "00100111" // /* MW 3 */
+ 2940 "10100010" // /* MW 2 */
+ 2941 "00010110" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2942 "10000100" // JNZ r17, #3104 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3104 delay_slots=5 */
+ 2943 "00000001" // /* MW 5 */
+ 2944 "01000000" // /* MW 4 */
+ 2945 "00010000" // /* MW 3 */
+ 2946 "00000110" // /* MW 2 */
+ 2947 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2949 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2951 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2953 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2957 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2958 "10011000" // NE r2, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2959 "00101000" // /* MW 3 */
+ 2960 "01000100" // /* MW 2 */
+ 2961 "00010111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 2962 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */
+ 2963 "00000001" // /* MW 5 */
+ 2964 "01000000" // /* MW 4 */
+ 2965 "11110000" // /* MW 3 */
+ 2966 "00000110" // /* MW 2 */
+ 2967 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2969 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2971 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2973 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2975 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2977 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 286 44 first
+.src_ref 2 "reduce_base_c8.h" 289 38
+.src_ref 2 "reduce_base_c8.h" 291 40
+.src_ref 2 "reduce_base_c8.h" 291 40
+ 2978 "10111010" // ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2979 "01011000" // /* MW 9 */
+ 2980 "11101100" // /* MW 8 */
+ 2981 "00000111" // /* MW 7 */
+ 2982 "00001000" // /* MW 6 */
+ 2983 "00100010" // /* MW 5 */
+ 2984 "00000000" // /* MW 4 */
+ 2985 "11100000" // /* MW 3 */
+ 2986 "11010110" // /* MW 2 */
+ 2987 "10000011" // /* MW 1 */
+ 2988 "11111000" // MOV r30, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2989 "10100000" // /* MW 3 */
+ 2990 "10011100" // /* MW 2 */
+ 2991 "00011111" // /* MW 1 */
+ 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2993 "00000000" // /* MW 1 */
+ 2994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2995 "00000000" // /* MW 1 */
+ 2996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2997 "00000000" // /* MW 1 */
+ 2998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2999 "00000000" // /* MW 1 */
+ 3000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3001 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 287 38 first
+ 3002 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3003 "11110111" // /* MW 3 */
+ 3004 "00011100" // /* MW 2 */
+ 3005 "00000100" // /* MW 1 */
+ 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3007 "00000000" // /* MW 1 */
+ 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3009 "00000000" // /* MW 1 */
+ 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3011 "00000000" // /* MW 1 */
+ 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3013 "00000000" // /* MW 1 */
+ 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3015 "00000000" // /* MW 1 */
+ 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3017 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 288 39 first
+ 3018 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3019 "11110111" // /* MW 3 */
+ 3020 "00011110" // /* MW 2 */
+ 3021 "00000100" // /* MW 1 */
+ 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3023 "00000000" // /* MW 1 */
+ 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3025 "00000000" // /* MW 1 */
+ 3026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3027 "00000000" // /* MW 1 */
+ 3028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3029 "00000000" // /* MW 1 */
+ 3030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3031 "00000000" // /* MW 1 */
+ 3032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3033 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 289 38 first
+ 3034 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3035 "01010111" // /* MW 3 */
+ 3036 "00011100" // /* MW 2 */
+ 3037 "00000100" // /* MW 1 */
+ 3038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3039 "00000000" // /* MW 1 */
+ 3040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3041 "00000000" // /* MW 1 */
+ 3042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3043 "00000000" // /* MW 1 */
+ 3044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3045 "00000000" // /* MW 1 */
+ 3046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3047 "00000000" // /* MW 1 */
+ 3048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3049 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 290 39 first
+ 3050 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3051 "00110111" // /* MW 3 */
+ 3052 "00011100" // /* MW 2 */
+ 3053 "00000100" // /* MW 1 */
+ 3054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3055 "00000000" // /* MW 1 */
+ 3056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3057 "00000000" // /* MW 1 */
+ 3058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3059 "00000000" // /* MW 1 */
+ 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3061 "00000000" // /* MW 1 */
+ 3062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3063 "00000000" // /* MW 1 */
+ 3064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3065 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 291 40 first
+ 3066 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3067 "01010111" // /* MW 3 */
+ 3068 "00001000" // /* MW 2 */
+ 3069 "00000100" // /* MW 1 */
+ 3070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3071 "00000000" // /* MW 1 */
+ 3072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3073 "00000000" // /* MW 1 */
+ 3074 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3075 "00000000" // /* MW 5 */
+ 3076 "00000000" // /* MW 4 */
+ 3077 "11101000" // /* MW 3 */
+ 3078 "00000110" // /* MW 2 */
+ 3079 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3081 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3083 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3085 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 292 38 first
+.delay_slot
+ 3086 "10011000" // ST r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3087 "01010001" // /* MW 3 */
+ 3088 "00000110" // /* MW 2 */
+ 3089 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 293 38 first
+.delay_slot
+ 3090 "00101110" // NOPA; ST r6, [p4, #4]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 3091 "00011100" // /* MW 13 */
+ 3092 "00000000" // /* MW 12 */
+ 3093 "00000000" // /* MW 11 */
+ 3094 "01010111" // /* MW 10 */
+ 3095 "00011010" // /* MW 9 */
+ 3096 "01000000" // /* MW 8 */
+ 3097 "00000000" // /* MW 7 */
+ 3098 "00000000" // /* MW 6 */
+ 3099 "10100011" // /* MW 5 */
+ 3100 "00101001" // /* MW 4 */
+ 3101 "11111000" // /* MW 3 */
+ 3102 "00101100" // /* MW 2 */
+ 3103 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528
+.src_ref 2 "reduce_base_c8.h" 274 44 first
+.src_ref 2 "reduce_base_c8.h" 275 40
+.src_ref 2 "reduce_base_c8.h" 275 40
+ 3104 "10111010" // ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3105 "01011000" // /* MW 9 */
+ 3106 "00010000" // /* MW 8 */
+ 3107 "01001000" // /* MW 7 */
+ 3108 "10101000" // /* MW 6 */
+ 3109 "01100111" // /* MW 5 */
+ 3110 "00111110" // /* MW 4 */
+ 3111 "11100000" // /* MW 3 */
+ 3112 "10010010" // /* MW 2 */
+ 3113 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 40 first
+.src_ref 2 "reduce_base_c8.h" 279 40
+ 3114 "10111010" // MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3115 "01111000" // /* MW 9 */
+ 3116 "00001110" // /* MW 8 */
+ 3117 "11010000" // /* MW 7 */
+ 3118 "00110011" // /* MW 6 */
+ 3119 "00100010" // /* MW 5 */
+ 3120 "00001100" // /* MW 4 */
+ 3121 "10000000" // /* MW 3 */
+ 3122 "10000000" // /* MW 2 */
+ 3123 "11111101" // /* MW 1 */
+ 3124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3125 "00000000" // /* MW 1 */
+ 3126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3127 "00000000" // /* MW 1 */
+ 3128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3129 "00000000" // /* MW 1 */
+ 3130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3131 "00000000" // /* MW 1 */
+ 3132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3133 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 38
+ 3134 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3135 "01010111" // /* MW 3 */
+ 3136 "00011100" // /* MW 2 */
+ 3137 "00000100" // /* MW 1 */
+ 3138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3139 "00000000" // /* MW 1 */
+ 3140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3141 "00000000" // /* MW 1 */
+ 3142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3143 "00000000" // /* MW 1 */
+ 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3145 "00000000" // /* MW 1 */
+ 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3147 "00000000" // /* MW 1 */
+ 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3149 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 276 39 first
+ 3150 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3151 "11110111" // /* MW 3 */
+ 3152 "00011110" // /* MW 2 */
+ 3153 "00000100" // /* MW 1 */
+ 3154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3155 "00000000" // /* MW 1 */
+ 3156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3157 "00000000" // /* MW 1 */
+ 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3159 "00000000" // /* MW 1 */
+ 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3161 "00000000" // /* MW 1 */
+ 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3163 "00000000" // /* MW 1 */
+ 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3165 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 275 38 first
+.src_ref 2 "reduce_base_c8.h" 277 38 first
+ 3166 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3167 "01010111" // /* MW 3 */
+ 3168 "00011100" // /* MW 2 */
+ 3169 "00000100" // /* MW 1 */
+ 3170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3171 "00000000" // /* MW 1 */
+ 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3173 "00000000" // /* MW 1 */
+ 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3175 "00000000" // /* MW 1 */
+ 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3177 "00000000" // /* MW 1 */
+ 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3179 "00000000" // /* MW 1 */
+ 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3181 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 278 39 first
+ 3182 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3183 "10110111" // /* MW 3 */
+ 3184 "00011100" // /* MW 2 */
+ 3185 "00000100" // /* MW 1 */
+ 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3187 "00000000" // /* MW 1 */
+ 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3189 "00000000" // /* MW 1 */
+ 3190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3191 "00000000" // /* MW 1 */
+ 3192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3193 "00000000" // /* MW 1 */
+ 3194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3195 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3197 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 279 40 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3198 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3199 "00110111" // /* MW 3 */
+ 3200 "00001000" // /* MW 2 */
+ 3201 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3203 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3205 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3206 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3207 "00000000" // /* MW 5 */
+ 3208 "00000000" // /* MW 4 */
+ 3209 "11101000" // /* MW 3 */
+ 3210 "00000110" // /* MW 2 */
+ 3211 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 279 40
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3212 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3213 "01000001" // /* MW 3 */
+ 3214 "00000010" // /* MW 2 */
+ 3215 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3217 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3219 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 281 38 first
+.delay_slot
+ 3220 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3221 "01110001" // /* MW 3 */
+ 3222 "00010100" // /* MW 2 */
+ 3223 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 280 38 first
+.delay_slot
+ 3224 "00000010" // ST r16, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3225 "01110000" // /* MW 7 */
+ 3226 "10100101" // /* MW 6 */
+ 3227 "00000001" // /* MW 5 */
+ 3228 "00000000" // /* MW 4 */
+ 3229 "00110000" // /* MW 3 */
+ 3230 "11000010" // /* MW 2 */
+ 3231 "10000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656
+.src_ref 2 "reduce_base_c8.h" 236 8
+.src_ref 2 "reduce_base_c8.h" 301 40
+.src_ref 2 "reduce_base_c8.h" 302 76
+ 3232 "00101100" // LDA r3, [sp, #-4]; MOVX r4, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3233 "00110010" // /* MW 5 */
+ 3234 "00010000" // /* MW 4 */
+ 3235 "00100000" // /* MW 3 */
+ 3236 "10001110" // /* MW 2 */
+ 3237 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8 first
+ 3238 "10011000" // EQ r4, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3239 "01000111" // /* MW 3 */
+ 3240 "10001000" // /* MW 2 */
+ 3241 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3242 "10000100" // JNZ r4, #3408 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3408 delay_slots=5 */
+ 3243 "00000001" // /* MW 5 */
+ 3244 "01000000" // /* MW 4 */
+ 3245 "10101000" // /* MW 3 */
+ 3246 "00000110" // /* MW 2 */
+ 3247 "00100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 298 44
+.src_ref 2 "reduce_base_c8.h" 303 40
+.src_ref 2 "reduce_base_c8.h" 310 44
+.src_ref 2 "reduce_base_c8.h" 311 38
+.delay_slot
+ 3248 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3249 "01000001" // /* MW 3 */
+ 3250 "00000010" // /* MW 2 */
+ 3251 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3255 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3257 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3259 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3260 "00011000" // MOVX r3, #7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3261 "00011101" // /* MW 3 */
+ 3262 "00000110" // /* MW 2 */
+ 3263 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3264 "10011000" // NE r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3265 "00101000" // /* MW 3 */
+ 3266 "11000100" // /* MW 2 */
+ 3267 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 236 8
+ 3268 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */
+ 3269 "00000001" // /* MW 5 */
+ 3270 "01000000" // /* MW 4 */
+ 3271 "11110000" // /* MW 3 */
+ 3272 "00000110" // /* MW 2 */
+ 3273 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3275 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3277 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3283 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 310 44 first
+.src_ref 2 "reduce_base_c8.h" 312 41 first
+.src_ref 2 "reduce_base_c8.h" 315 40
+ 3284 "10111010" // ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3285 "01011000" // /* MW 9 */
+ 3286 "11101100" // /* MW 8 */
+ 3287 "00000111" // /* MW 7 */
+ 3288 "11111000" // /* MW 6 */
+ 3289 "00101111" // /* MW 5 */
+ 3290 "00100110" // /* MW 4 */
+ 3291 "11100000" // /* MW 3 */
+ 3292 "10000110" // /* MW 2 */
+ 3293 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 313 38
+.src_ref 2 "reduce_base_c8.h" 317 97
+ 3294 "10111010" // MOVA r3, #-6; MOVXM dj0, #65536 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3295 "00010000" // /* MW 9 */
+ 3296 "00000000" // /* MW 8 */
+ 3297 "01000000" // /* MW 7 */
+ 3298 "01000000" // /* MW 6 */
+ 3299 "00000000" // /* MW 5 */
+ 3300 "00000000" // /* MW 4 */
+ 3301 "00000000" // /* MW 3 */
+ 3302 "01000011" // /* MW 2 */
+ 3303 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 315 40
+.src_ref 2 "reduce_base_c8.h" 317 97 first
+ 3304 "01100100" // LSHL r3, r28, r3; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3305 "00000001" // /* MW 5 */
+ 3306 "00100000" // /* MW 4 */
+ 3307 "10111100" // /* MW 3 */
+ 3308 "11000111" // /* MW 2 */
+ 3309 "11100000" // /* MW 1 */
+ 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3311 "00000000" // /* MW 1 */
+ 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3313 "00000000" // /* MW 1 */
+ 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3315 "00000000" // /* MW 1 */
+ 3316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3317 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 311 38 first
+ 3318 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3319 "00110111" // /* MW 3 */
+ 3320 "00011100" // /* MW 2 */
+ 3321 "00000100" // /* MW 1 */
+ 3322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3323 "00000000" // /* MW 1 */
+ 3324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3325 "00000000" // /* MW 1 */
+ 3326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3327 "00000000" // /* MW 1 */
+ 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3329 "00000000" // /* MW 1 */
+ 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3331 "00000000" // /* MW 1 */
+ 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3333 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 312 39 first
+ 3334 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3335 "01010111" // /* MW 3 */
+ 3336 "00011100" // /* MW 2 */
+ 3337 "00000100" // /* MW 1 */
+ 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3339 "00000000" // /* MW 1 */
+ 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3341 "00000000" // /* MW 1 */
+ 3342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3343 "00000000" // /* MW 1 */
+ 3344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3345 "00000000" // /* MW 1 */
+ 3346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3347 "00000000" // /* MW 1 */
+ 3348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3349 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 313 38 first
+ 3350 "10011000" // ST dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3351 "01000001" // /* MW 3 */
+ 3352 "00011100" // /* MW 2 */
+ 3353 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 315 40 first
+ 3354 "00011000" // ST.s16 r24, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3355 "00010111" // /* MW 3 */
+ 3356 "00001011" // /* MW 2 */
+ 3357 "00000100" // /* MW 1 */
+ 3358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3359 "00000000" // /* MW 1 */
+ 3360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3361 "00000000" // /* MW 1 */
+ 3362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3363 "00000000" // /* MW 1 */
+ 3364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3365 "00000000" // /* MW 1 */
+ 3366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3367 "00000000" // /* MW 1 */
+ 3368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3369 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 316 38 first
+ 3370 "10011000" // ST r26, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3371 "01010001" // /* MW 3 */
+ 3372 "00000111" // /* MW 2 */
+ 3373 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 317 38 first
+ 3374 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3375 "01110001" // /* MW 3 */
+ 3376 "00010100" // /* MW 2 */
+ 3377 "00001100" // /* MW 1 */
+ 3378 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3379 "00000000" // /* MW 5 */
+ 3380 "00000000" // /* MW 4 */
+ 3381 "11101000" // /* MW 3 */
+ 3382 "00000110" // /* MW 2 */
+ 3383 "00000000" // /* MW 1 */
+.delay_slot
+ 3384 "11111000" // MOV r30, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3385 "10100000" // /* MW 3 */
+ 3386 "10011111" // /* MW 2 */
+ 3387 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3391 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3394 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 3395 "00011100" // /* MW 13 */
+ 3396 "00000000" // /* MW 12 */
+ 3397 "00000000" // /* MW 11 */
+ 3398 "01010111" // /* MW 10 */
+ 3399 "00011010" // /* MW 9 */
+ 3400 "01000000" // /* MW 8 */
+ 3401 "00000000" // /* MW 7 */
+ 3402 "00000000" // /* MW 6 */
+ 3403 "10110110" // /* MW 5 */
+ 3404 "00000010" // /* MW 4 */
+ 3405 "11110000" // /* MW 3 */
+ 3406 "00101100" // /* MW 2 */
+ 3407 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832
+.src_ref 2 "reduce_base_c8.h" 298 44 first
+.src_ref 2 "reduce_base_c8.h" 301 40
+.src_ref 2 "reduce_base_c8.h" 301 40 first
+ 3408 "10111010" // ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3409 "01011000" // /* MW 9 */
+ 3410 "00010000" // /* MW 8 */
+ 3411 "01001000" // /* MW 7 */
+ 3412 "01110000" // /* MW 6 */
+ 3413 "00101011" // /* MW 5 */
+ 3414 "00000110" // /* MW 4 */
+ 3415 "11100000" // /* MW 3 */
+ 3416 "10000110" // /* MW 2 */
+ 3417 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 76
+.src_ref 2 "reduce_base_c8.h" 303 40
+.src_ref 2 "reduce_base_c8.h" 306 62
+ 3418 "10111010" // MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3419 "01111000" // /* MW 9 */
+ 3420 "00001110" // /* MW 8 */
+ 3421 "11010000" // /* MW 7 */
+ 3422 "10101000" // /* MW 6 */
+ 3423 "01000111" // /* MW 5 */
+ 3424 "00111110" // /* MW 4 */
+ 3425 "10000000" // /* MW 3 */
+ 3426 "10000000" // /* MW 2 */
+ 3427 "11111101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 76 first
+ 3428 "10011000" // LSHL r4, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3429 "01001101" // /* MW 3 */
+ 3430 "11001000" // /* MW 2 */
+ 3431 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 41
+.src_ref 2 "reduce_base_c8.h" 306 62 first
+ 3432 "00100100" // MUL r30, r30, r6; ADD.NC r3, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3433 "11111111" // /* MW 5 */
+ 3434 "10100100" // /* MW 4 */
+ 3435 "11110001" // /* MW 3 */
+ 3436 "10001101" // /* MW 2 */
+ 3437 "11110111" // /* MW 1 */
+ 3438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3439 "00000000" // /* MW 1 */
+ 3440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3441 "00000000" // /* MW 1 */
+ 3442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3443 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 299 38 first
+ 3444 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3445 "01110111" // /* MW 3 */
+ 3446 "00011111" // /* MW 2 */
+ 3447 "00000100" // /* MW 1 */
+ 3448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3449 "00000000" // /* MW 1 */
+ 3450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3451 "00000000" // /* MW 1 */
+ 3452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3453 "00000000" // /* MW 1 */
+ 3454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3455 "00000000" // /* MW 1 */
+ 3456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3457 "00000000" // /* MW 1 */
+ 3458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3459 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 300 39 first
+ 3460 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3461 "10110111" // /* MW 3 */
+ 3462 "00011100" // /* MW 2 */
+ 3463 "00000100" // /* MW 1 */
+ 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3465 "00000000" // /* MW 1 */
+ 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3467 "00000000" // /* MW 1 */
+ 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3469 "00000000" // /* MW 1 */
+ 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3471 "00000000" // /* MW 1 */
+ 3472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3473 "00000000" // /* MW 1 */
+ 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3475 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 301 38 first
+ 3476 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3477 "01010111" // /* MW 3 */
+ 3478 "00011100" // /* MW 2 */
+ 3479 "00000100" // /* MW 1 */
+ 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3481 "00000000" // /* MW 1 */
+ 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3483 "00000000" // /* MW 1 */
+ 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3485 "00000000" // /* MW 1 */
+ 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3487 "00000000" // /* MW 1 */
+ 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3489 "00000000" // /* MW 1 */
+ 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3491 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 302 39 first
+ 3492 "00011000" // ST.s16 r3, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3493 "01110111" // /* MW 3 */
+ 3494 "00011100" // /* MW 2 */
+ 3495 "00000100" // /* MW 1 */
+ 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3497 "00000000" // /* MW 1 */
+ 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3499 "00000000" // /* MW 1 */
+ 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3501 "00000000" // /* MW 1 */
+ 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3503 "00000000" // /* MW 1 */
+ 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3505 "00000000" // /* MW 1 */
+ 3506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3507 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 303 40 first
+ 3508 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3509 "00110111" // /* MW 3 */
+ 3510 "00001000" // /* MW 2 */
+ 3511 "00000100" // /* MW 1 */
+ 3512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3513 "00000000" // /* MW 1 */
+ 3514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3515 "00000000" // /* MW 1 */
+ 3516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3517 "00000000" // /* MW 1 */
+ 3518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3519 "00000000" // /* MW 1 */
+ 3520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3521 "00000000" // /* MW 1 */
+ 3522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3523 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 304 38 first
+ 3524 "10011000" // ST r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3525 "00110001" // /* MW 3 */
+ 3526 "00000110" // /* MW 2 */
+ 3527 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 305 38 first
+ 3528 "00000010" // ST r20, [p4, #4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3529 "01110000" // /* MW 7 */
+ 3530 "10100101" // /* MW 6 */
+ 3531 "00000001" // /* MW 5 */
+ 3532 "00000000" // /* MW 4 */
+ 3533 "00110000" // /* MW 3 */
+ 3534 "11010010" // /* MW 2 */
+ 3535 "10000010" // /* MW 1 */
+.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+ 3536 "10111000" // MOV dj0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3537 "01000000" // /* MW 3 */
+ 3538 "10000000" // /* MW 2 */
+ 3539 "00011000" // /* MW 1 */
+ 3540 "00110110" // ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3541 "10000001" // /* MW 11 */
+ 3542 "10101101" // /* MW 10 */
+ 3543 "00000000" // /* MW 9 */
+ 3544 "00000000" // /* MW 8 */
+ 3545 "00000000" // /* MW 7 */
+ 3546 "00000000" // /* MW 6 */
+ 3547 "00100000" // /* MW 5 */
+ 3548 "00000000" // /* MW 4 */
+ 3549 "11100000" // /* MW 3 */
+ 3550 "01111010" // /* MW 2 */
+ 3551 "01100000" // /* MW 1 */
+.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv
+.src_ref 2 "reduce_base_c8.h" 326 79 first
+.src_ref 2 "reduce_base_c8.h" 329 51
+ 3552 "00010100" // MOVA m2, #24; ADD.NC p0, r0, #30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3553 "00011110" // /* MW 5 */
+ 3554 "11000000" // /* MW 4 */
+ 3555 "10000000" // /* MW 3 */
+ 3556 "00001000" // /* MW 2 */
+ 3557 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.src_ref 2 "reduce_base_c8.h" 330 26
+.src_ref 3 "reduce_mean_c8_impl.h" 139 51 first
+ 3558 "10111010" // LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3559 "01011000" // /* MW 9 */
+ 3560 "11100010" // /* MW 8 */
+ 3561 "00000111" // /* MW 7 */
+ 3562 "00001000" // /* MW 6 */
+ 3563 "00000010" // /* MW 5 */
+ 3564 "00000000" // /* MW 4 */
+ 3565 "11010000" // /* MW 3 */
+ 3566 "10001010" // /* MW 2 */
+ 3567 "01000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 331 24
+.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first
+ 3568 "01010100" // LDA.s16 r3, [p2]; MOV m1, #38 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3569 "10011001" // /* MW 5 */
+ 3570 "00000000" // /* MW 4 */
+ 3571 "01010010" // /* MW 3 */
+ 3572 "10001110" // /* MW 2 */
+ 3573 "01000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 141 49 first
+ 3574 "10011000" // LDA r1, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3575 "00110110" // /* MW 3 */
+ 3576 "00010100" // /* MW 2 */
+ 3577 "00000010" // /* MW 1 */
+ 3578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3579 "00000000" // /* MW 1 */
+ 3580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3581 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 326 28 first
+ 3582 "00011000" // ST.s16 r31, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3583 "11110111" // /* MW 3 */
+ 3584 "00101111" // /* MW 2 */
+ 3585 "00000000" // /* MW 1 */
+ 3586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3587 "00000000" // /* MW 1 */
+ 3588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3589 "00000000" // /* MW 1 */
+ 3590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3591 "00000000" // /* MW 1 */
+ 3592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3593 "00000000" // /* MW 1 */
+ 3594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3595 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3597 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 327 31 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3598 "00011000" // ST.s16 r24, [p0], #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3599 "00010111" // /* MW 3 */
+ 3600 "01011111" // /* MW 2 */
+ 3601 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3603 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3605 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3607 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3609 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 327 31
+.src_ref 2 "reduce_base_c8.h" 328 23
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3610 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3611 "00000001" // /* MW 3 */
+ 3612 "00110000" // /* MW 2 */
+ 3613 "00010000" // /* MW 1 */
+ 3614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3615 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 328 23 first
+ 3616 "00011000" // ST.s16 r24, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3617 "00010111" // /* MW 3 */
+ 3618 "11001111" // /* MW 2 */
+ 3619 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 51 first
+ 3620 "10011000" // LDA.u16 r4, [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3621 "10011010" // /* MW 3 */
+ 3622 "01001000" // /* MW 2 */
+ 3623 "00000000" // /* MW 1 */
+ 3624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3625 "00000000" // /* MW 1 */
+ 3626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3627 "00000000" // /* MW 1 */
+ 3628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3629 "00000000" // /* MW 1 */
+ 3630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3631 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3633 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 28
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3634 "00011000" // ST.s16 r0, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3635 "00010111" // /* MW 3 */
+ 3636 "11111100" // /* MW 2 */
+ 3637 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.src_ref 2 "reduce_base_c8.h" 330 28
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3638 "00100100" // LSHL r4, r4, r26; ADD.NC r5, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3639 "11111111" // /* MW 5 */
+ 3640 "10100100" // /* MW 4 */
+ 3641 "10110010" // /* MW 3 */
+ 3642 "00110101" // /* MW 2 */
+ 3643 "00100001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 329 30
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3644 "10011000" // SUB r0, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3645 "01000001" // /* MW 3 */
+ 3646 "00000000" // /* MW 2 */
+ 3647 "00010000" // /* MW 1 */
+ 3648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3649 "00000000" // /* MW 1 */
+ 3650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3651 "00000000" // /* MW 1 */
+ 3652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3653 "00000000" // /* MW 1 */
+ 3654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3655 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 330 26 first
+ 3656 "00011000" // ST.s16 r5, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3657 "10110111" // /* MW 3 */
+ 3658 "00001000" // /* MW 2 */
+ 3659 "00000000" // /* MW 1 */
+ 3660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3661 "00000000" // /* MW 1 */
+ 3662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3663 "00000000" // /* MW 1 */
+ 3664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3665 "00000000" // /* MW 1 */
+ 3666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3667 "00000000" // /* MW 1 */
+ 3668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3669 "00000000" // /* MW 1 */
+ 3670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3671 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 331 24 first
+ 3672 "00011000" // ST.s16 r19, [p0], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3673 "01110111" // /* MW 3 */
+ 3674 "00101010" // /* MW 2 */
+ 3675 "00000000" // /* MW 1 */
+ 3676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3677 "00000000" // /* MW 1 */
+ 3678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3679 "00000000" // /* MW 1 */
+ 3680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3681 "00000000" // /* MW 1 */
+ 3682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3683 "00000000" // /* MW 1 */
+ 3684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3685 "00000000" // /* MW 1 */
+ 3686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3687 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 139 40 first
+ 3688 "00011000" // ST.s8 r2, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3689 "01000111" // /* MW 3 */
+ 3690 "11101100" // /* MW 2 */
+ 3691 "00000000" // /* MW 1 */
+ 3692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3693 "00000000" // /* MW 1 */
+ 3694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3695 "00000000" // /* MW 1 */
+ 3696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3697 "00000000" // /* MW 1 */
+ 3698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3699 "00000000" // /* MW 1 */
+ 3700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3701 "00000000" // /* MW 1 */
+ 3702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3703 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first
+ 3704 "00011000" // ST.s16 r3, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3705 "01110111" // /* MW 3 */
+ 3706 "00000100" // /* MW 2 */
+ 3707 "00000000" // /* MW 1 */
+ 3708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3709 "00000000" // /* MW 1 */
+ 3710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3711 "00000000" // /* MW 1 */
+ 3712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3713 "00000000" // /* MW 1 */
+ 3714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3715 "00000000" // /* MW 1 */
+ 3716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3717 "00000000" // /* MW 1 */
+ 3718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3719 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 141 38 first
+ 3720 "00011000" // ST.s8 r1, [p0, #-2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3721 "00100111" // /* MW 3 */
+ 3722 "11100100" // /* MW 2 */
+ 3723 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 349 4 first
+ 3724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 3725 "00000000" // /* MW 3 */
+ 3726 "00101000" // /* MW 2 */
+ 3727 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 349 4
+.delay_slot
+ 3728 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3729 "00000001" // /* MW 5 */
+ 3730 "00000000" // /* MW 4 */
+ 3731 "00000000" // /* MW 3 */
+ 3732 "11111000" // /* MW 2 */
+ 3733 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3735 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3737 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3739 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3740 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3741 "01100111" // /* MW 3 */
+ 3742 "00000001" // /* MW 2 */
+ 3743 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168
+.src_ref 2 "reduce_base_c8.h" 262 44 first
+.src_ref 2 "reduce_base_c8.h" 263 77
+ 3744 "10111010" // ST.s16 r21, [p4], #2; MOVXM r5, #65512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3745 "00010000" // /* MW 9 */
+ 3746 "11110100" // /* MW 8 */
+ 3747 "10101111" // /* MW 7 */
+ 3748 "00111100" // /* MW 6 */
+ 3749 "00000000" // /* MW 5 */
+ 3750 "00000000" // /* MW 4 */
+ 3751 "11100000" // /* MW 3 */
+ 3752 "11010110" // /* MW 2 */
+ 3753 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 56
+.src_ref 2 "reduce_base_c8.h" 263 77 first
+.src_ref 2 "reduce_base_c8.h" 267 40
+ 3754 "10111010" // LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3755 "01011000" // /* MW 9 */
+ 3756 "11101100" // /* MW 8 */
+ 3757 "00000111" // /* MW 7 */
+ 3758 "00000100" // /* MW 6 */
+ 3759 "01111101" // /* MW 5 */
+ 3760 "00001010" // /* MW 4 */
+ 3761 "00100000" // /* MW 3 */
+ 3762 "10001010" // /* MW 2 */
+ 3763 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 118
+.src_ref 2 "reduce_base_c8.h" 329 30
+ 3764 "10111010" // MOVA r26, #4; MOVXM r6, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3765 "10010000" // /* MW 9 */
+ 3766 "11111111" // /* MW 8 */
+ 3767 "11001111" // /* MW 7 */
+ 3768 "00111100" // /* MW 6 */
+ 3769 "00000000" // /* MW 5 */
+ 3770 "00000000" // /* MW 4 */
+ 3771 "00000000" // /* MW 3 */
+ 3772 "10011010" // /* MW 2 */
+ 3773 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 118 first
+ 3774 "10011000" // ADD r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3775 "01100000" // /* MW 3 */
+ 3776 "11100010" // /* MW 2 */
+ 3777 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 98
+.src_ref 2 "reduce_base_c8.h" 267 116 first
+ 3778 "00011000" // MAC r29, r29, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3779 "01000110" // /* MW 3 */
+ 3780 "01111010" // /* MW 2 */
+ 3781 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 60 first
+.src_ref 2 "reduce_base_c8.h" 265 98 first
+ 3782 "00011000" // MSC r21, r21, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3783 "01001110" // /* MW 3 */
+ 3784 "01101010" // /* MW 2 */
+ 3785 "00010100" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3787 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 38 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3788 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3789 "01010111" // /* MW 3 */
+ 3790 "00011100" // /* MW 2 */
+ 3791 "00000100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 263 56
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3792 "10011000" // MUL r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3793 "00101111" // /* MW 3 */
+ 3794 "11000100" // /* MW 2 */
+ 3795 "00010001" // /* MW 1 */
+ 3796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3797 "00000000" // /* MW 1 */
+ 3798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3799 "00000000" // /* MW 1 */
+ 3800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3801 "00000000" // /* MW 1 */
+ 3802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3803 "00000000" // /* MW 1 */
+ 3804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3805 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 264 39 first
+ 3806 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3807 "11010111" // /* MW 3 */
+ 3808 "00011110" // /* MW 2 */
+ 3809 "00000100" // /* MW 1 */
+ 3810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3811 "00000000" // /* MW 1 */
+ 3812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3813 "00000000" // /* MW 1 */
+ 3814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3815 "00000000" // /* MW 1 */
+ 3816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3817 "00000000" // /* MW 1 */
+ 3818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3819 "00000000" // /* MW 1 */
+ 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3821 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 265 38 first
+ 3822 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3823 "10110111" // /* MW 3 */
+ 3824 "00011110" // /* MW 2 */
+ 3825 "00000100" // /* MW 1 */
+ 3826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3827 "00000000" // /* MW 1 */
+ 3828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3829 "00000000" // /* MW 1 */
+ 3830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3831 "00000000" // /* MW 1 */
+ 3832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3833 "00000000" // /* MW 1 */
+ 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3835 "00000000" // /* MW 1 */
+ 3836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3837 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 266 39 first
+ 3838 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3839 "00110111" // /* MW 3 */
+ 3840 "00011100" // /* MW 2 */
+ 3841 "00000100" // /* MW 1 */
+ 3842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3843 "00000000" // /* MW 1 */
+ 3844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3845 "00000000" // /* MW 1 */
+ 3846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3847 "00000000" // /* MW 1 */
+ 3848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3849 "00000000" // /* MW 1 */
+ 3850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3851 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3853 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 40 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3854 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3855 "01010111" // /* MW 3 */
+ 3856 "00001000" // /* MW 2 */
+ 3857 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3859 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3861 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3862 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 3863 "00000000" // /* MW 5 */
+ 3864 "00000000" // /* MW 4 */
+ 3865 "11101000" // /* MW 3 */
+ 3866 "00000110" // /* MW 2 */
+ 3867 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 42
+.delay_slot
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3868 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3869 "01000001" // /* MW 3 */
+ 3870 "00001010" // /* MW 2 */
+ 3871 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 267 42
+.delay_slot
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3872 "10011000" // SUB r2, r5, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3873 "11010001" // /* MW 3 */
+ 3874 "01000101" // /* MW 2 */
+ 3875 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 270 64
+.delay_slot
+ 3876 "11111000" // MOV r6, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3877 "00011100" // /* MW 3 */
+ 3878 "10100001" // /* MW 2 */
+ 3879 "00011001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 268 38 first
+.delay_slot
+ 3880 "00000010" // ST r3, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3881 "01110000" // /* MW 7 */
+ 3882 "10100101" // /* MW 6 */
+ 3883 "00000001" // /* MW 5 */
+ 3884 "00000000" // /* MW 4 */
+ 3885 "00110000" // /* MW 3 */
+ 3886 "10001110" // /* MW 2 */
+ 3887 "10000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 269 38 first
+.src_ref 2 "reduce_base_c8.h" 270 64 first
+.delay_slot
+ 3888 "11100001" // NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 3889 "00000000" // /* MW 15 */
+ 3890 "00000000" // /* MW 14 */
+ 3891 "01111000" // /* MW 13 */
+ 3892 "10100101" // /* MW 12 */
+ 3893 "00000001" // /* MW 11 */
+ 3894 "01111100" // /* MW 10 */
+ 3895 "11100011" // /* MW 9 */
+ 3896 "10111101" // /* MW 8 */
+ 3897 "00010001" // /* MW 7 */
+ 3898 "00010110" // /* MW 6 */
+ 3899 "00100100" // /* MW 5 */
+ 3900 "00000000" // /* MW 4 */
+ 3901 "11110000" // /* MW 3 */
+ 3902 "00101100" // /* MW 2 */
+ 3903 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328
+.src_ref 2 "reduce_base_c8.h" 250 44
+.src_ref 2 "reduce_base_c8.h" 250 44 first
+.src_ref 2 "reduce_base_c8.h" 255 40
+ 3904 "10111010" // ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3905 "01011000" // /* MW 9 */
+ 3906 "11101100" // /* MW 8 */
+ 3907 "00000111" // /* MW 7 */
+ 3908 "00001000" // /* MW 6 */
+ 3909 "01000010" // /* MW 5 */
+ 3910 "00000000" // /* MW 4 */
+ 3911 "11100000" // /* MW 3 */
+ 3912 "10010010" // /* MW 2 */
+ 3913 "10000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 255 113 first
+ 3914 "10111010" // LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3915 "01011000" // /* MW 9 */
+ 3916 "00001000" // /* MW 8 */
+ 3917 "01001000" // /* MW 7 */
+ 3918 "01110000" // /* MW 6 */
+ 3919 "00101101" // /* MW 5 */
+ 3920 "00000110" // /* MW 4 */
+ 3921 "00100000" // /* MW 3 */
+ 3922 "10000110" // /* MW 2 */
+ 3923 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.src_ref 2 "reduce_base_c8.h" 329 30
+ 3924 "01100100" // MOVX r3, #16; MOV r26, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3925 "00010001" // /* MW 5 */
+ 3926 "00100000" // /* MW 4 */
+ 3927 "00101101" // /* MW 3 */
+ 3928 "11001000" // /* MW 2 */
+ 3929 "00000000" // /* MW 1 */
+ 3930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3931 "00000000" // /* MW 1 */
+ 3932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3933 "00000000" // /* MW 1 */
+ 3934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3935 "00000000" // /* MW 1 */
+ 3936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3937 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 251 38 first
+ 3938 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3939 "01110111" // /* MW 3 */
+ 3940 "00011111" // /* MW 2 */
+ 3941 "00000100" // /* MW 1 */
+ 3942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3943 "00000000" // /* MW 1 */
+ 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3945 "00000000" // /* MW 1 */
+ 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3947 "00000000" // /* MW 1 */
+ 3948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3949 "00000000" // /* MW 1 */
+ 3950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3951 "00000000" // /* MW 1 */
+ 3952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3953 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 252 39 first
+ 3954 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3955 "10110111" // /* MW 3 */
+ 3956 "00011100" // /* MW 2 */
+ 3957 "00000100" // /* MW 1 */
+ 3958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3959 "00000000" // /* MW 1 */
+ 3960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3961 "00000000" // /* MW 1 */
+ 3962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3963 "00000000" // /* MW 1 */
+ 3964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3965 "00000000" // /* MW 1 */
+ 3966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3967 "00000000" // /* MW 1 */
+ 3968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3969 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 253 38 first
+ 3970 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3971 "01110111" // /* MW 3 */
+ 3972 "00011111" // /* MW 2 */
+ 3973 "00000100" // /* MW 1 */
+ 3974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3975 "00000000" // /* MW 1 */
+ 3976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3977 "00000000" // /* MW 1 */
+ 3978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3979 "00000000" // /* MW 1 */
+ 3980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3981 "00000000" // /* MW 1 */
+ 3982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3983 "00000000" // /* MW 1 */
+ 3984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3985 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 254 39 first
+ 3986 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3987 "11010111" // /* MW 3 */
+ 3988 "00011110" // /* MW 2 */
+ 3989 "00000100" // /* MW 1 */
+ 3990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3991 "00000000" // /* MW 1 */
+ 3992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3993 "00000000" // /* MW 1 */
+ 3994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3995 "00000000" // /* MW 1 */
+ 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3997 "00000000" // /* MW 1 */
+ 3998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3999 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 4000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4001 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 40 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 4002 "00011000" // ST.s16 r3, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4003 "01110111" // /* MW 3 */
+ 4004 "00001000" // /* MW 2 */
+ 4005 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4007 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4009 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 4010 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 4011 "00000000" // /* MW 5 */
+ 4012 "00000000" // /* MW 4 */
+ 4013 "11101000" // /* MW 3 */
+ 4014 "00000110" // /* MW 2 */
+ 4015 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 255 42
+.src_ref 2 "reduce_base_c8.h" 255 113
+.delay_slot
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4016 "00011000" // MAC r3, r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4017 "00100110" // /* MW 3 */
+ 4018 "01000110" // /* MW 2 */
+ 4019 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4023 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 256 38 first
+.delay_slot
+ 4024 "10011000" // ST r6, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4025 "11010001" // /* MW 3 */
+ 4026 "00000100" // /* MW 2 */
+ 4027 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 257 38 first
+.delay_slot
+ 4028 "10011000" // ST r18, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4029 "01010001" // /* MW 3 */
+ 4030 "00010110" // /* MW 2 */
+ 4031 "00001100" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456
+.src_ref 2 "reduce_base_c8.h" 238 44 first
+ 4032 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4033 "10110111" // /* MW 3 */
+ 4034 "00011110" // /* MW 2 */
+ 4035 "00000100" // /* MW 1 */
+ 4036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4037 "00000000" // /* MW 1 */
+ 4038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4039 "00000000" // /* MW 1 */
+ 4040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4041 "00000000" // /* MW 1 */
+ 4042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4043 "00000000" // /* MW 1 */
+ 4044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4045 "00000000" // /* MW 1 */
+ 4046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4047 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 239 38 first
+ 4048 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4049 "11110111" // /* MW 3 */
+ 4050 "00011100" // /* MW 2 */
+ 4051 "00000100" // /* MW 1 */
+ 4052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4053 "00000000" // /* MW 1 */
+ 4054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4055 "00000000" // /* MW 1 */
+ 4056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4057 "00000000" // /* MW 1 */
+ 4058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4059 "00000000" // /* MW 1 */
+ 4060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4061 "00000000" // /* MW 1 */
+ 4062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4063 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 240 39 first
+ 4064 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4065 "11110111" // /* MW 3 */
+ 4066 "00011110" // /* MW 2 */
+ 4067 "00000100" // /* MW 1 */
+ 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4069 "00000000" // /* MW 1 */
+ 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4071 "00000000" // /* MW 1 */
+ 4072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4073 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 94
+ 4074 "00011000" // LDA r3, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4075 "01110001" // /* MW 3 */
+ 4076 "11111100" // /* MW 2 */
+ 4077 "00000111" // /* MW 1 */
+ 4078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4079 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 4080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4081 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 38 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 4082 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4083 "00110111" // /* MW 3 */
+ 4084 "00011100" // /* MW 2 */
+ 4085 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4087 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4088 "01000100" // MOVXM r1, #65504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4089 "11000000" // /* MW 5 */
+ 4090 "10111111" // /* MW 4 */
+ 4091 "11110000" // /* MW 3 */
+ 4092 "00000000" // /* MW 2 */
+ 4093 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 4094 "10011000" // ADD r2, r1, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4095 "10100000" // /* MW 3 */
+ 4096 "01000101" // /* MW 2 */
+ 4097 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 40
+.src_ref 2 "reduce_base_c8.h" 241 94
+.src_ref 2 "reduce_base_c8.h" 241 94
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4098 "01100100" // MAC r1, r1, r3, r2; MOV r1, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4099 "01000001" // /* MW 5 */
+ 4100 "10100000" // /* MW 4 */
+ 4101 "11000000" // /* MW 3 */
+ 4102 "01000100" // /* MW 2 */
+ 4103 "00011000" // /* MW 1 */
+ 4104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4105 "00000000" // /* MW 1 */
+ 4106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4107 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 242 39 first
+ 4108 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4109 "11010111" // /* MW 3 */
+ 4110 "00011110" // /* MW 2 */
+ 4111 "00000100" // /* MW 1 */
+ 4112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4113 "00000000" // /* MW 1 */
+ 4114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4115 "00000000" // /* MW 1 */
+ 4116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4117 "00000000" // /* MW 1 */
+ 4118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4119 "00000000" // /* MW 1 */
+ 4120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4121 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 40
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 4122 "10111000" // MOV m0, #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4123 "11011000" // /* MW 3 */
+ 4124 "00001111" // /* MW 2 */
+ 4125 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 40 first
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 4126 "00011000" // ST.s16 r5, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4127 "10110111" // /* MW 3 */
+ 4128 "00001000" // /* MW 2 */
+ 4129 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4131 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4133 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 4134 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */
+ 4135 "00000000" // /* MW 5 */
+ 4136 "00000000" // /* MW 4 */
+ 4137 "11101000" // /* MW 3 */
+ 4138 "00000110" // /* MW 2 */
+ 4139 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 243 42
+.src_ref 2 "reduce_base_c8.h" 243 42
+.src_ref 2 "reduce_base_c8.h" 243 91
+.src_ref 2 "reduce_base_c8.h" 243 91
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4140 "01100100" // MSC r5, r5, r22, r4; MOV r5, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4141 "01000001" // /* MW 5 */
+ 4142 "10100000" // /* MW 4 */
+ 4143 "11000010" // /* MW 3 */
+ 4144 "01001001" // /* MW 2 */
+ 4145 "10110001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4147 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4149 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 244 38 first
+.delay_slot
+ 4150 "10011000" // ST r20, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4151 "10010001" // /* MW 3 */
+ 4152 "00000110" // /* MW 2 */
+ 4153 "00001100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 245 38 first
+.src_ref 2 "reduce_base_c8.h" 329 30
+.delay_slot
+ 4154 "00111010" // ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4155 "01111001" // /* MW 9 */
+ 4156 "10001110" // /* MW 8 */
+ 4157 "11010000" // /* MW 7 */
+ 4158 "10001011" // /* MW 6 */
+ 4159 "10100000" // /* MW 5 */
+ 4160 "00000001" // /* MW 4 */
+ 4161 "00110000" // /* MW 3 */
+ 4162 "11000110" // /* MW 2 */
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0
+ 4163 "10000010" // /* MW 1 */
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+.function pad_3d<(pad_3d_mode)0, bfloat16, 1> _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t
+.src_ref 3 "pad_3d.h" 266 first
+.src_ref 3 "pad_3d.h" 465 37 first
+.src_ref 3 "pad_3d.h" 468 21 first
+.src_ref 3 "pad_3d.h" 471 29
+.src_ref 3 "pad_3d.h" 479 21
+.function_start
+ 4176 "10111010" // LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4177 "01011000" // /* MW 9 */
+ 4178 "11101000" // /* MW 8 */
+ 4179 "10000111" // /* MW 7 */
+ 4180 "11001000" // /* MW 6 */
+ 4181 "01000111" // /* MW 5 */
+ 4182 "00111110" // /* MW 4 */
+ 4183 "11010000" // /* MW 3 */
+ 4184 "10000010" // /* MW 2 */
+ 4185 "01000010" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 469 21 first
+.src_ref 3 "pad_3d.h" 478 21
+.src_ref 3 "pad_3d.h" 499 52
+.src_ref 3 "pad_3d.h" 511 25
+ 4186 "10111010" // LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4187 "01011000" // /* MW 9 */
+ 4188 "00000110" // /* MW 8 */
+ 4189 "00001000" // /* MW 7 */
+ 4190 "10101010" // /* MW 6 */
+ 4191 "00100111" // /* MW 5 */
+ 4192 "00111110" // /* MW 4 */
+ 4193 "11010000" // /* MW 3 */
+ 4194 "10000110" // /* MW 2 */
+ 4195 "01000101" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 470 21 first
+.src_ref 3 "pad_3d.h" 486 26
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 26
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 517 22
+ 4196 "10111010" // LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4197 "01111000" // /* MW 9 */
+ 4198 "01100000" // /* MW 8 */
+ 4199 "01101000" // /* MW 7 */
+ 4200 "00001000" // /* MW 6 */
+ 4201 "10000000" // /* MW 5 */
+ 4202 "00000001" // /* MW 4 */
+ 4203 "11010000" // /* MW 3 */
+ 4204 "10010110" // /* MW 2 */
+ 4205 "01001111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 471 29 first
+ 4206 "10011000" // LDA.s16 r18, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4207 "01010010" // /* MW 3 */
+ 4208 "00101010" // /* MW 2 */
+ 4209 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 472 25 first
+ 4210 "10011000" // LDA r6, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4211 "11010110" // /* MW 3 */
+ 4212 "00011100" // /* MW 2 */
+ 4213 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 473 26 first
+ 4214 "10011000" // LDA r7, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4215 "11110110" // /* MW 3 */
+ 4216 "00101100" // /* MW 2 */
+ 4217 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 475 24 first
+ 4218 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4219 "00110110" // /* MW 3 */
+ 4220 "00000110" // /* MW 2 */
+ 4221 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 479 21 first
+ 4222 "10011000" // ASHL r19, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4223 "01001110" // /* MW 3 */
+ 4224 "00100110" // /* MW 2 */
+ 4225 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 477 23 first
+ 4226 "10011000" // LDA r4, [p2, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4227 "10010110" // /* MW 3 */
+ 4228 "00100100" // /* MW 2 */
+ 4229 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 478 21 first
+ 4230 "10011000" // ASHL r20, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4231 "00101110" // /* MW 3 */
+ 4232 "01101000" // /* MW 2 */
+ 4233 "00010001" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 56 25 first
+ 4234 "11111000" // VBCST.16 x0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4235 "01110010" // /* MW 3 */
+ 4236 "01001001" // /* MW 2 */
+ 4237 "00011000" // /* MW 1 */
+ 4238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4239 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 485 45 first
+ 4240 "10011000" // MUL r18, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4241 "01001111" // /* MW 3 */
+ 4242 "11100101" // /* MW 2 */
+ 4243 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 485 34
+ 4244 "10011000" // SUB r19, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4245 "00010001" // /* MW 3 */
+ 4246 "01100111" // /* MW 2 */
+ 4247 "00010000" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 998 25 first
+ 4248 "10011000" // MUL r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4249 "00101111" // /* MW 3 */
+ 4250 "11100111" // /* MW 2 */
+ 4251 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 43 first
+ 4252 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4253 "00101111" // /* MW 3 */
+ 4254 "01100011" // /* MW 2 */
+ 4255 "00010100" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13 first
+ 4256 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4257 "00001101" // /* MW 3 */
+ 4258 "11100001" // /* MW 2 */
+ 4259 "00010100" // /* MW 1 */
+.src_ref 4 "array_helpers.hpp" 950 13
+.src_ref 3 "pad_3d.h" 486 26 first
+ 4260 "10100100" // GE r16, r24, r17; ADD.NC p2, r3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4261 "10000010" // /* MW 5 */
+ 4262 "11000011" // /* MW 4 */
+ 4263 "00110100" // /* MW 3 */
+ 4264 "00100011" // /* MW 2 */
+ 4265 "11000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4266 "10000100" // JNZ r16, #4416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4416 delay_slots=5 */
+ 4267 "00000001" // /* MW 5 */
+ 4268 "01000000" // /* MW 4 */
+ 4269 "10100000" // /* MW 3 */
+ 4270 "00001000" // /* MW 2 */
+ 4271 "10000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 487 22
+.delay_slot
+ 4272 "11111000" // VMOV bmll0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4273 "10010010" // /* MW 3 */
+ 4274 "00000000" // /* MW 2 */
+ 4275 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4277 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4283 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4284 "01000100" // MOVXM ls, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4285 "01100000" // /* MW 5 */
+ 4286 "11100010" // /* MW 4 */
+ 4287 "00010001" // /* MW 3 */
+ 4288 "00000000" // /* MW 2 */
+ 4289 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4290 "01000100" // MOVXM le, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4291 "01100000" // /* MW 5 */
+ 4292 "11100010" // /* MW 4 */
+ 4293 "00010110" // /* MW 3 */
+ 4294 "00000000" // /* MW 2 */
+ 4295 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 486 4
+ 4296 "00000010" // NOPS; MOV lc, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4297 "01110000" // /* MW 7 */
+ 4298 "01010000" // /* MW 6 */
+ 4299 "10111100" // /* MW 5 */
+ 4300 "00000010" // /* MW 4 */
+ 4301 "01100000" // /* MW 3 */
+ 4302 "00101011" // /* MW 2 */
+ 4303 "00000000" // /* MW 1 */
+ 4304 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4305 "00000000" // /* MW 15 */
+ 4306 "00000000" // /* MW 14 */
+ 4307 "01111000" // /* MW 13 */
+ 4308 "10100101" // /* MW 12 */
+ 4309 "00000001" // /* MW 11 */
+ 4310 "00000000" // /* MW 10 */
+ 4311 "00000000" // /* MW 9 */
+ 4312 "00000000" // /* MW 8 */
+ 4313 "01011011" // /* MW 7 */
+ 4314 "00000001" // /* MW 6 */
+ 4315 "00100000" // /* MW 5 */
+ 4316 "00000000" // /* MW 4 */
+ 4317 "11110000" // /* MW 3 */
+ 4318 "00101100" // /* MW 2 */
+ 4319 "00000000" // /* MW 1 */
+ 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4321 "00000000" // /* MW 15 */
+ 4322 "00000000" // /* MW 14 */
+ 4323 "01111000" // /* MW 13 */
+ 4324 "10100101" // /* MW 12 */
+ 4325 "00000001" // /* MW 11 */
+ 4326 "00000000" // /* MW 10 */
+ 4327 "00000000" // /* MW 9 */
+ 4328 "00000000" // /* MW 8 */
+ 4329 "01011011" // /* MW 7 */
+ 4330 "00000001" // /* MW 6 */
+ 4331 "00100000" // /* MW 5 */
+ 4332 "00000000" // /* MW 4 */
+ 4333 "11110000" // /* MW 3 */
+ 4334 "00101100" // /* MW 2 */
+ 4335 "00000000" // /* MW 1 */
+ 4336 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4337 "00000000" // /* MW 15 */
+ 4338 "00000000" // /* MW 14 */
+ 4339 "01111000" // /* MW 13 */
+ 4340 "10100101" // /* MW 12 */
+ 4341 "00000001" // /* MW 11 */
+ 4342 "00000000" // /* MW 10 */
+ 4343 "00000000" // /* MW 9 */
+ 4344 "00000000" // /* MW 8 */
+ 4345 "01011011" // /* MW 7 */
+ 4346 "00000001" // /* MW 6 */
+ 4347 "00100000" // /* MW 5 */
+ 4348 "00000000" // /* MW 4 */
+ 4349 "11110000" // /* MW 3 */
+ 4350 "00101100" // /* MW 2 */
+ 4351 "00000000" // /* MW 1 */
+ 4352 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4353 "00000000" // /* MW 15 */
+ 4354 "00000000" // /* MW 14 */
+ 4355 "01111000" // /* MW 13 */
+ 4356 "10100101" // /* MW 12 */
+ 4357 "00000001" // /* MW 11 */
+ 4358 "00000000" // /* MW 10 */
+ 4359 "00000000" // /* MW 9 */
+ 4360 "00000000" // /* MW 8 */
+ 4361 "01011011" // /* MW 7 */
+ 4362 "00000001" // /* MW 6 */
+ 4363 "00100000" // /* MW 5 */
+ 4364 "00000000" // /* MW 4 */
+ 4365 "11110000" // /* MW 3 */
+ 4366 "00101100" // /* MW 2 */
+ 4367 "00000000" // /* MW 1 */
+ 4368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4369 "00000000" // /* MW 15 */
+ 4370 "00000000" // /* MW 14 */
+ 4371 "01111000" // /* MW 13 */
+ 4372 "10100101" // /* MW 12 */
+ 4373 "00000001" // /* MW 11 */
+ 4374 "00000000" // /* MW 10 */
+ 4375 "00000000" // /* MW 9 */
+ 4376 "00000000" // /* MW 8 */
+ 4377 "01011011" // /* MW 7 */
+ 4378 "00000001" // /* MW 6 */
+ 4379 "00100000" // /* MW 5 */
+ 4380 "00000000" // /* MW 4 */
+ 4381 "11110000" // /* MW 3 */
+ 4382 "00101100" // /* MW 2 */
+ 4383 "00000000" // /* MW 1 */
+ 4384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4385 "00000000" // /* MW 15 */
+ 4386 "00000000" // /* MW 14 */
+ 4387 "01111000" // /* MW 13 */
+ 4388 "10100101" // /* MW 12 */
+ 4389 "00000001" // /* MW 11 */
+ 4390 "00000000" // /* MW 10 */
+ 4391 "00000000" // /* MW 9 */
+ 4392 "00000000" // /* MW 8 */
+ 4393 "01011011" // /* MW 7 */
+ 4394 "00000001" // /* MW 6 */
+ 4395 "00100000" // /* MW 5 */
+ 4396 "00000000" // /* MW 4 */
+ 4397 "11110000" // /* MW 3 */
+ 4398 "00101100" // /* MW 2 */
+ 4399 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224
+.src_ref 3 "pad_3d.h" 487 22 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4400 "11100001" // NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4401 "00000000" // /* MW 15 */
+ 4402 "00000000" // /* MW 14 */
+ 4403 "01111000" // /* MW 13 */
+ 4404 "10100101" // /* MW 12 */
+ 4405 "00000001" // /* MW 11 */
+ 4406 "00000000" // /* MW 10 */
+ 4407 "00000000" // /* MW 9 */
+ 4408 "10000000" // /* MW 8 */
+ 4409 "00000110" // /* MW 7 */
+ 4410 "00011100" // /* MW 6 */
+ 4411 "00100010" // /* MW 5 */
+ 4412 "00000000" // /* MW 4 */
+ 4413 "11110000" // /* MW 3 */
+ 4414 "00101100" // /* MW 2 */
+ 4415 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240
+.src_ref 3 "pad_3d.h" 495 21
+.src_ref 3 "pad_3d.h" 495 40 first
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 38 first
+.loop_nesting 0
+ 4416 "10111010" // MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4417 "10101000" // /* MW 9 */
+ 4418 "11001100" // /* MW 8 */
+ 4419 "00101001" // /* MW 7 */
+ 4420 "11111110" // /* MW 6 */
+ 4421 "00000000" // /* MW 5 */
+ 4422 "00001011" // /* MW 4 */
+ 4423 "00000000" // /* MW 3 */
+ 4424 "10000110" // /* MW 2 */
+ 4425 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 40
+.src_ref 3 "pad_3d.h" 496 29 first
+ 4426 "00100100" // SUB r17, r0, r17; ADD.NC dn1, r7, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4427 "11111111" // /* MW 5 */
+ 4428 "10000111" // /* MW 4 */
+ 4429 "00110010" // /* MW 3 */
+ 4430 "01100010" // /* MW 2 */
+ 4431 "00000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 21 first
+ 4432 "10011000" // LSHL r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4433 "01101101" // /* MW 3 */
+ 4434 "01100010" // /* MW 2 */
+ 4435 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 495 58
+.src_ref 3 "pad_3d.h" 498 23 first
+ 4436 "00100100" // SUB r17, r0, r7; ADD.NC m1, r17, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4437 "00010000" // /* MW 5 */
+ 4438 "00010001" // /* MW 4 */
+ 4439 "00110010" // /* MW 3 */
+ 4440 "01001110" // /* MW 2 */
+ 4441 "00000100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 45 first
+ 4442 "10011000" // MUL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4443 "00001111" // /* MW 3 */
+ 4444 "11100001" // /* MW 2 */
+ 4445 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 498 10 first
+ 4446 "10011000" // LSHL r6, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4447 "01101101" // /* MW 3 */
+ 4448 "01001100" // /* MW 2 */
+ 4449 "00010100" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 498 10
+.src_ref 3 "pad_3d.h" 499 52 first
+ 4450 "10100100" // ASHL r6, r16, r2; ADD.NC p2, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4451 "00110010" // /* MW 5 */
+ 4452 "11000011" // /* MW 4 */
+ 4453 "11010100" // /* MW 3 */
+ 4454 "10000101" // /* MW 2 */
+ 4455 "10000001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 26
+ 4456 "10011000" // GE r7, r24, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4457 "01101001" // /* MW 3 */
+ 4458 "00001110" // /* MW 2 */
+ 4459 "00010110" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 4
+ 4460 "10000100" // JNZ r7, #4624 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4624 delay_slots=5 */
+ 4461 "00000001" // /* MW 5 */
+ 4462 "01000000" // /* MW 4 */
+ 4463 "00001000" // /* MW 3 */
+ 4464 "00001001" // /* MW 2 */
+ 4465 "00111000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4475 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 499 4
+ 4476 "10111010" // MOVA dc1, #0; MOVXM ls, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4477 "00010000" // /* MW 9 */
+ 4478 "00000000" // /* MW 8 */
+ 4479 "01111001" // /* MW 7 */
+ 4480 "00000100" // /* MW 6 */
+ 4481 "00000000" // /* MW 5 */
+ 4482 "00000000" // /* MW 4 */
+ 4483 "10000000" // /* MW 3 */
+ 4484 "00000111" // /* MW 2 */
+ 4485 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 499 4
+ 4486 "10111010" // MOVA dj1, #16; MOVXM le, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4487 "00010000" // /* MW 9 */
+ 4488 "00000000" // /* MW 8 */
+ 4489 "10111001" // /* MW 7 */
+ 4490 "00000101" // /* MW 6 */
+ 4491 "00000000" // /* MW 5 */
+ 4492 "00000000" // /* MW 4 */
+ 4493 "10000000" // /* MW 3 */
+ 4494 "00000110" // /* MW 2 */
+ 4495 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 499 4
+ 4496 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4497 "00000000" // /* MW 15 */
+ 4498 "00000000" // /* MW 14 */
+ 4499 "01111000" // /* MW 13 */
+ 4500 "10010000" // /* MW 12 */
+ 4501 "10111001" // /* MW 11 */
+ 4502 "00000010" // /* MW 10 */
+ 4503 "00000000" // /* MW 9 */
+ 4504 "00000000" // /* MW 8 */
+ 4505 "01011011" // /* MW 7 */
+ 4506 "00000001" // /* MW 6 */
+ 4507 "00100000" // /* MW 5 */
+ 4508 "00000000" // /* MW 4 */
+ 4509 "11110000" // /* MW 3 */
+ 4510 "00101100" // /* MW 2 */
+ 4511 "00000000" // /* MW 1 */
+ 4512 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4513 "00000000" // /* MW 15 */
+ 4514 "00000000" // /* MW 14 */
+ 4515 "01111000" // /* MW 13 */
+ 4516 "10100101" // /* MW 12 */
+ 4517 "00000001" // /* MW 11 */
+ 4518 "00000000" // /* MW 10 */
+ 4519 "00000000" // /* MW 9 */
+ 4520 "00000000" // /* MW 8 */
+ 4521 "01011011" // /* MW 7 */
+ 4522 "00000001" // /* MW 6 */
+ 4523 "00100000" // /* MW 5 */
+ 4524 "00000000" // /* MW 4 */
+ 4525 "11110000" // /* MW 3 */
+ 4526 "00101100" // /* MW 2 */
+ 4527 "00000000" // /* MW 1 */
+ 4528 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4529 "00000000" // /* MW 15 */
+ 4530 "00000000" // /* MW 14 */
+ 4531 "01111000" // /* MW 13 */
+ 4532 "10100101" // /* MW 12 */
+ 4533 "00000001" // /* MW 11 */
+ 4534 "00000000" // /* MW 10 */
+ 4535 "00000000" // /* MW 9 */
+ 4536 "00000000" // /* MW 8 */
+ 4537 "01011011" // /* MW 7 */
+ 4538 "00000001" // /* MW 6 */
+ 4539 "00100000" // /* MW 5 */
+ 4540 "00000000" // /* MW 4 */
+ 4541 "11110000" // /* MW 3 */
+ 4542 "00101100" // /* MW 2 */
+ 4543 "00000000" // /* MW 1 */
+ 4544 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4545 "00000000" // /* MW 15 */
+ 4546 "00000000" // /* MW 14 */
+ 4547 "01111000" // /* MW 13 */
+ 4548 "10100101" // /* MW 12 */
+ 4549 "00000001" // /* MW 11 */
+ 4550 "00000000" // /* MW 10 */
+ 4551 "00000000" // /* MW 9 */
+ 4552 "00000000" // /* MW 8 */
+ 4553 "01011011" // /* MW 7 */
+ 4554 "00000001" // /* MW 6 */
+ 4555 "00100000" // /* MW 5 */
+ 4556 "00000000" // /* MW 4 */
+ 4557 "11110000" // /* MW 3 */
+ 4558 "00101100" // /* MW 2 */
+ 4559 "00000000" // /* MW 1 */
+ 4560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4561 "00000000" // /* MW 15 */
+ 4562 "00000000" // /* MW 14 */
+ 4563 "01111000" // /* MW 13 */
+ 4564 "10100101" // /* MW 12 */
+ 4565 "00000001" // /* MW 11 */
+ 4566 "00000000" // /* MW 10 */
+ 4567 "00000000" // /* MW 9 */
+ 4568 "00000000" // /* MW 8 */
+ 4569 "01011011" // /* MW 7 */
+ 4570 "00000001" // /* MW 6 */
+ 4571 "00100000" // /* MW 5 */
+ 4572 "00000000" // /* MW 4 */
+ 4573 "11110000" // /* MW 3 */
+ 4574 "00101100" // /* MW 2 */
+ 4575 "00000000" // /* MW 1 */
+ 4576 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4577 "00000000" // /* MW 15 */
+ 4578 "00000000" // /* MW 14 */
+ 4579 "01111000" // /* MW 13 */
+ 4580 "10100101" // /* MW 12 */
+ 4581 "00000001" // /* MW 11 */
+ 4582 "00000000" // /* MW 10 */
+ 4583 "00000000" // /* MW 9 */
+ 4584 "00000000" // /* MW 8 */
+ 4585 "01011011" // /* MW 7 */
+ 4586 "00000001" // /* MW 6 */
+ 4587 "00100000" // /* MW 5 */
+ 4588 "00000000" // /* MW 4 */
+ 4589 "11110000" // /* MW 3 */
+ 4590 "00101100" // /* MW 2 */
+ 4591 "00000000" // /* MW 1 */
+ 4592 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4593 "00000000" // /* MW 15 */
+ 4594 "00000000" // /* MW 14 */
+ 4595 "01111000" // /* MW 13 */
+ 4596 "10100101" // /* MW 12 */
+ 4597 "00000001" // /* MW 11 */
+ 4598 "00000000" // /* MW 10 */
+ 4599 "00000000" // /* MW 9 */
+ 4600 "00000000" // /* MW 8 */
+ 4601 "01011011" // /* MW 7 */
+ 4602 "00000001" // /* MW 6 */
+ 4603 "00100000" // /* MW 5 */
+ 4604 "00000000" // /* MW 4 */
+ 4605 "11110000" // /* MW 3 */
+ 4606 "00101100" // /* MW 2 */
+ 4607 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4608 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4609 "00000000" // /* MW 15 */
+ 4610 "00000000" // /* MW 14 */
+ 4611 "01111000" // /* MW 13 */
+ 4612 "10100101" // /* MW 12 */
+ 4613 "00000001" // /* MW 11 */
+ 4614 "00000000" // /* MW 10 */
+ 4615 "00000000" // /* MW 9 */
+ 4616 "00000000" // /* MW 8 */
+ 4617 "00101110" // /* MW 7 */
+ 4618 "00110000" // /* MW 6 */
+ 4619 "00100010" // /* MW 5 */
+ 4620 "00000000" // /* MW 4 */
+ 4621 "11110000" // /* MW 3 */
+ 4622 "00101100" // /* MW 2 */
+ 4623 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448
+.src_ref 3 "pad_3d.h" 514 39
+.loop_nesting 0
+ 4624 "01000100" // MOVXM r7, #2147483640 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4625 "11110000" // /* MW 5 */
+ 4626 "10111111" // /* MW 4 */
+ 4627 "11110011" // /* MW 3 */
+ 4628 "11111111" // /* MW 2 */
+ 4629 "01111111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 39 first
+ 4630 "10011000" // AND r7, r7, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4631 "01000100" // /* MW 3 */
+ 4632 "11001110" // /* MW 2 */
+ 4633 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 35
+ 4634 "10011000" // SUB r7, r5, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4635 "01110001" // /* MW 3 */
+ 4636 "01001110" // /* MW 2 */
+ 4637 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55
+ 4638 "10011000" // MUL r7, r7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4639 "00001111" // /* MW 3 */
+ 4640 "11001110" // /* MW 2 */
+ 4641 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 511 25 first
+ 4642 "10011000" // ASHL r2, r4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4643 "00101110" // /* MW 3 */
+ 4644 "00000100" // /* MW 2 */
+ 4645 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 36 first
+ 4646 "10011000" // SUB r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4647 "01000001" // /* MW 3 */
+ 4648 "01001000" // /* MW 2 */
+ 4649 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 515 30 first
+ 4650 "10011000" // MUL r2, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4651 "00001111" // /* MW 3 */
+ 4652 "10000100" // /* MW 2 */
+ 4653 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 28 first
+ 4654 "10011000" // MUL r0, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4655 "00001111" // /* MW 3 */
+ 4656 "00000000" // /* MW 2 */
+ 4657 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 514 55
+.src_ref 3 "pad_3d.h" 517 39 first
+ 4658 "01100100" // MUL r1, r1, r2; MOV r6, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4659 "00000101" // /* MW 5 */
+ 4660 "00100000" // /* MW 4 */
+ 4661 "11110011" // /* MW 3 */
+ 4662 "01000101" // /* MW 2 */
+ 4663 "00001000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21 first
+ 4664 "10011000" // LSHL r0, r0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4665 "01101101" // /* MW 3 */
+ 4666 "00000000" // /* MW 2 */
+ 4667 "00010000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 509 21
+.src_ref 3 "pad_3d.h" 517 22 first
+ 4668 "10100100" // GE r0, r24, r1; ADD.NC p2, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4669 "00000010" // /* MW 5 */
+ 4670 "11000011" // /* MW 4 */
+ 4671 "00110100" // /* MW 3 */
+ 4672 "00000011" // /* MW 2 */
+ 4673 "11000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 517 4
+ 4674 "10000100" // JNZ r0, #4832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */
+ 4675 "00000001" // /* MW 5 */
+ 4676 "01000000" // /* MW 4 */
+ 4677 "01110000" // /* MW 3 */
+ 4678 "00001001" // /* MW 2 */
+ 4679 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4681 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4683 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55 first
+.delay_slot
+ 4684 "10011000" // LSHL r4, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4685 "01101101" // /* MW 3 */
+ 4686 "11001000" // /* MW 2 */
+ 4687 "00010001" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 514 55
+.delay_slot
+ 4688 "00011000" // ADD.NC m0, r4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4689 "00001000" // /* MW 3 */
+ 4690 "00000010" // /* MW 2 */
+ 4691 "00011000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 515 37 first
+.delay_slot
+ 4692 "10011000" // ADD.NC dn0, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4693 "01111111" // /* MW 3 */
+ 4694 "01000001" // /* MW 2 */
+ 4695 "00011000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 517 4 first
+ 4696 "10111010" // MOVA dc0, #0; MOVXM ls, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4697 "00010000" // /* MW 9 */
+ 4698 "01101000" // /* MW 8 */
+ 4699 "01111001" // /* MW 7 */
+ 4700 "00000100" // /* MW 6 */
+ 4701 "00000000" // /* MW 5 */
+ 4702 "00000000" // /* MW 4 */
+ 4703 "10000000" // /* MW 3 */
+ 4704 "00000011" // /* MW 2 */
+ 4705 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 3 "pad_3d.h" 517 4
+ 4706 "10111010" // MOVA dj0, #16; MOVXM le, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4707 "00010000" // /* MW 9 */
+ 4708 "01101000" // /* MW 8 */
+ 4709 "10111001" // /* MW 7 */
+ 4710 "00000101" // /* MW 6 */
+ 4711 "00000000" // /* MW 5 */
+ 4712 "00000000" // /* MW 4 */
+ 4713 "10000000" // /* MW 3 */
+ 4714 "00000010" // /* MW 2 */
+ 4715 "00000010" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 517 4
+ 4716 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4717 "10100000" // /* MW 3 */
+ 4718 "01110000" // /* MW 2 */
+ 4719 "00011101" // /* MW 1 */
+ 4720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4721 "00000000" // /* MW 15 */
+ 4722 "00000000" // /* MW 14 */
+ 4723 "01111000" // /* MW 13 */
+ 4724 "10100101" // /* MW 12 */
+ 4725 "00000001" // /* MW 11 */
+ 4726 "00000000" // /* MW 10 */
+ 4727 "00000000" // /* MW 9 */
+ 4728 "00000000" // /* MW 8 */
+ 4729 "01011011" // /* MW 7 */
+ 4730 "00000001" // /* MW 6 */
+ 4731 "00100000" // /* MW 5 */
+ 4732 "00000000" // /* MW 4 */
+ 4733 "11110000" // /* MW 3 */
+ 4734 "00101100" // /* MW 2 */
+ 4735 "00000000" // /* MW 1 */
+ 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4737 "00000000" // /* MW 15 */
+ 4738 "00000000" // /* MW 14 */
+ 4739 "01111000" // /* MW 13 */
+ 4740 "10100101" // /* MW 12 */
+ 4741 "00000001" // /* MW 11 */
+ 4742 "00000000" // /* MW 10 */
+ 4743 "00000000" // /* MW 9 */
+ 4744 "00000000" // /* MW 8 */
+ 4745 "01011011" // /* MW 7 */
+ 4746 "00000001" // /* MW 6 */
+ 4747 "00100000" // /* MW 5 */
+ 4748 "00000000" // /* MW 4 */
+ 4749 "11110000" // /* MW 3 */
+ 4750 "00101100" // /* MW 2 */
+ 4751 "00000000" // /* MW 1 */
+ 4752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4753 "00000000" // /* MW 15 */
+ 4754 "00000000" // /* MW 14 */
+ 4755 "01111000" // /* MW 13 */
+ 4756 "10100101" // /* MW 12 */
+ 4757 "00000001" // /* MW 11 */
+ 4758 "00000000" // /* MW 10 */
+ 4759 "00000000" // /* MW 9 */
+ 4760 "00000000" // /* MW 8 */
+ 4761 "01011011" // /* MW 7 */
+ 4762 "00000001" // /* MW 6 */
+ 4763 "00100000" // /* MW 5 */
+ 4764 "00000000" // /* MW 4 */
+ 4765 "11110000" // /* MW 3 */
+ 4766 "00101100" // /* MW 2 */
+ 4767 "00000000" // /* MW 1 */
+ 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4769 "00000000" // /* MW 15 */
+ 4770 "00000000" // /* MW 14 */
+ 4771 "01111000" // /* MW 13 */
+ 4772 "10100101" // /* MW 12 */
+ 4773 "00000001" // /* MW 11 */
+ 4774 "00000000" // /* MW 10 */
+ 4775 "00000000" // /* MW 9 */
+ 4776 "00000000" // /* MW 8 */
+ 4777 "01011011" // /* MW 7 */
+ 4778 "00000001" // /* MW 6 */
+ 4779 "00100000" // /* MW 5 */
+ 4780 "00000000" // /* MW 4 */
+ 4781 "11110000" // /* MW 3 */
+ 4782 "00101100" // /* MW 2 */
+ 4783 "00000000" // /* MW 1 */
+ 4784 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4785 "00000000" // /* MW 15 */
+ 4786 "00000000" // /* MW 14 */
+ 4787 "01111000" // /* MW 13 */
+ 4788 "10100101" // /* MW 12 */
+ 4789 "00000001" // /* MW 11 */
+ 4790 "00000000" // /* MW 10 */
+ 4791 "00000000" // /* MW 9 */
+ 4792 "00000000" // /* MW 8 */
+ 4793 "01011011" // /* MW 7 */
+ 4794 "00000001" // /* MW 6 */
+ 4795 "00100000" // /* MW 5 */
+ 4796 "00000000" // /* MW 4 */
+ 4797 "11110000" // /* MW 3 */
+ 4798 "00101100" // /* MW 2 */
+ 4799 "00000000" // /* MW 1 */
+ 4800 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4801 "00000000" // /* MW 15 */
+ 4802 "00000000" // /* MW 14 */
+ 4803 "01111000" // /* MW 13 */
+ 4804 "10100101" // /* MW 12 */
+ 4805 "00000001" // /* MW 11 */
+ 4806 "00000000" // /* MW 10 */
+ 4807 "00000000" // /* MW 9 */
+ 4808 "00000000" // /* MW 8 */
+ 4809 "01011011" // /* MW 7 */
+ 4810 "00000001" // /* MW 6 */
+ 4811 "00100000" // /* MW 5 */
+ 4812 "00000000" // /* MW 4 */
+ 4813 "11110000" // /* MW 3 */
+ 4814 "00101100" // /* MW 2 */
+ 4815 "00000000" // /* MW 1 */
+.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 4816 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4817 "00000000" // /* MW 15 */
+ 4818 "00000000" // /* MW 14 */
+ 4819 "01111000" // /* MW 13 */
+ 4820 "10100101" // /* MW 12 */
+ 4821 "00000001" // /* MW 11 */
+ 4822 "00000000" // /* MW 10 */
+ 4823 "00000000" // /* MW 9 */
+ 4824 "00000000" // /* MW 8 */
+ 4825 "00101110" // /* MW 7 */
+ 4826 "00010000" // /* MW 6 */
+ 4827 "00100010" // /* MW 5 */
+ 4828 "00000000" // /* MW 4 */
+ 4829 "11110000" // /* MW 3 */
+ 4830 "00101100" // /* MW 2 */
+ 4831 "00000000" // /* MW 1 */
+.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656
+.src_ref 3 "pad_3d.h" 282 first
+.loop_nesting 0
+ 4832 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4833 "00000000" // /* MW 3 */
+ 4834 "00101000" // /* MW 2 */
+ 4835 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4837 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4839 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4841 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4843 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end
+.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0
+ 4845 "00000000" // /* MW 1 */
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.function run _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 2 "reduce_base_c8.h" 352 30
+.src_ref 2 "reduce_base_c8.h" 362 first
+.src_ref 2 "reduce_base_c8.h" 365 18
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+.function_start
+ 4848 "11111000" // MOV r3, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4849 "11000000" // /* MW 3 */
+ 4850 "11010100" // /* MW 2 */
+ 4851 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 365 18 first
+ 4852 "00000010" // MOVS dn3, p7; ADD.NC p7, r3, #44 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4853 "00000000" // /* MW 7 */
+ 4854 "11001011" // /* MW 6 */
+ 4855 "10110000" // /* MW 5 */
+ 4856 "00000011" // /* MW 4 */
+ 4857 "01100000" // /* MW 3 */
+ 4858 "10010001" // /* MW 2 */
+ 4859 "01101011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 367 19 first
+ 4860 "10011000" // LDA.u16 r0, [p7], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4861 "00011010" // /* MW 3 */
+ 4862 "10001100" // /* MW 2 */
+ 4863 "00000111" // /* MW 1 */
+ 4864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4865 "00000000" // /* MW 1 */
+ 4866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4867 "00000000" // /* MW 1 */
+ 4868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4869 "00000000" // /* MW 1 */
+ 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4871 "00000000" // /* MW 1 */
+ 4872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4873 "00000000" // /* MW 1 */
+ 4874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4875 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 367 12
+.src_ref 2 "reduce_base_c8.h" 367 19
+ 4876 "10000100" // JNZ r0, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */
+ 4877 "00000001" // /* MW 5 */
+ 4878 "01000000" // /* MW 4 */
+ 4879 "11110000" // /* MW 3 */
+ 4880 "00001001" // /* MW 2 */
+ 4881 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 101 18
+.src_ref 5 "broadcast.hpp" 80 25
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 2 "reduce_base_c8.h" 372 34
+.delay_slot
+ 4882 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4883 "00000001" // /* MW 3 */
+ 4884 "00100000" // /* MW 2 */
+ 4885 "00010000" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+.delay_slot
+ 4886 "11111000" // VBCST.32 x1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4887 "01110010" // /* MW 3 */
+ 4888 "11000010" // /* MW 2 */
+ 4889 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4891 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4893 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 362
+.delay_slot
+ 4894 "11000100" // PADDXM [sp], #256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4895 "00000001" // /* MW 5 */
+ 4896 "00000000" // /* MW 4 */
+ 4897 "00000000" // /* MW 3 */
+ 4898 "00100000" // /* MW 2 */
+ 4899 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 43
+ 4900 "10111000" // MOV dj2, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4901 "01001000" // /* MW 3 */
+ 4902 "10000000" // /* MW 2 */
+ 4903 "00011010" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 43 first
+ 4904 "10011000" // LDA r1, [p2, dj2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4905 "00110110" // /* MW 3 */
+ 4906 "01000000" // /* MW 2 */
+ 4907 "00000010" // /* MW 1 */
+ 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4909 "00000000" // /* MW 1 */
+ 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4911 "00000000" // /* MW 1 */
+ 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4913 "00000000" // /* MW 1 */
+ 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4915 "00000000" // /* MW 1 */
+ 4916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4917 "00000000" // /* MW 1 */
+ 4918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4919 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 34
+ 4920 "10011000" // GE r2, r16, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4921 "00011001" // /* MW 3 */
+ 4922 "00000100" // /* MW 2 */
+ 4923 "00010100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4924 "10000100" // JNZ r2, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */
+ 4925 "00000001" // /* MW 5 */
+ 4926 "01000000" // /* MW 4 */
+ 4927 "11110000" // /* MW 3 */
+ 4928 "00001001" // /* MW 2 */
+ 4929 "00010000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 2 "reduce_base_c8.h" 374 29
+.delay_slot
+ 4930 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4931 "10010010" // /* MW 3 */
+ 4932 "00000010" // /* MW 2 */
+ 4933 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4935 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4941 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 2 "reduce_base_c8.h" 372 12
+.src_ref 2 "reduce_base_c8.h" 374 29
+ 4942 "01110110" // NOPA; MOVS p3, p1; MOVXM ls, #5072 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4943 "00010000" // /* MW 11 */
+ 4944 "11101000" // /* MW 10 */
+ 4945 "01111001" // /* MW 9 */
+ 4946 "00000100" // /* MW 8 */
+ 4947 "00000000" // /* MW 7 */
+ 4948 "00000000" // /* MW 6 */
+ 4949 "10001011" // /* MW 5 */
+ 4950 "10000100" // /* MW 4 */
+ 4951 "11110011" // /* MW 3 */
+ 4952 "00101100" // /* MW 2 */
+ 4953 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4954 "01000100" // MOVXM le, #5072 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4955 "10100000" // /* MW 5 */
+ 4956 "11100111" // /* MW 4 */
+ 4957 "00010110" // /* MW 3 */
+ 4958 "00000000" // /* MW 2 */
+ 4959 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 372 12
+ 4960 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4961 "00000000" // /* MW 15 */
+ 4962 "00000000" // /* MW 14 */
+ 4963 "01111000" // /* MW 13 */
+ 4964 "01010000" // /* MW 12 */
+ 4965 "10111000" // /* MW 11 */
+ 4966 "00000010" // /* MW 10 */
+ 4967 "00000000" // /* MW 9 */
+ 4968 "00000000" // /* MW 8 */
+ 4969 "01011011" // /* MW 7 */
+ 4970 "00000001" // /* MW 6 */
+ 4971 "00100000" // /* MW 5 */
+ 4972 "00000000" // /* MW 4 */
+ 4973 "11110000" // /* MW 3 */
+ 4974 "00101100" // /* MW 2 */
+ 4975 "00000000" // /* MW 1 */
+ 4976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4977 "00000000" // /* MW 15 */
+ 4978 "00000000" // /* MW 14 */
+ 4979 "01111000" // /* MW 13 */
+ 4980 "10100101" // /* MW 12 */
+ 4981 "00000001" // /* MW 11 */
+ 4982 "00000000" // /* MW 10 */
+ 4983 "00000000" // /* MW 9 */
+ 4984 "00000000" // /* MW 8 */
+ 4985 "01011011" // /* MW 7 */
+ 4986 "00000001" // /* MW 6 */
+ 4987 "00100000" // /* MW 5 */
+ 4988 "00000000" // /* MW 4 */
+ 4989 "11110000" // /* MW 3 */
+ 4990 "00101100" // /* MW 2 */
+ 4991 "00000000" // /* MW 1 */
+ 4992 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4993 "00000000" // /* MW 15 */
+ 4994 "00000000" // /* MW 14 */
+ 4995 "01111000" // /* MW 13 */
+ 4996 "10100101" // /* MW 12 */
+ 4997 "00000001" // /* MW 11 */
+ 4998 "00000000" // /* MW 10 */
+ 4999 "00000000" // /* MW 9 */
+ 5000 "00000000" // /* MW 8 */
+ 5001 "01011011" // /* MW 7 */
+ 5002 "00000001" // /* MW 6 */
+ 5003 "00100000" // /* MW 5 */
+ 5004 "00000000" // /* MW 4 */
+ 5005 "11110000" // /* MW 3 */
+ 5006 "00101100" // /* MW 2 */
+ 5007 "00000000" // /* MW 1 */
+ 5008 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5009 "00000000" // /* MW 15 */
+ 5010 "00000000" // /* MW 14 */
+ 5011 "01111000" // /* MW 13 */
+ 5012 "10100101" // /* MW 12 */
+ 5013 "00000001" // /* MW 11 */
+ 5014 "00000000" // /* MW 10 */
+ 5015 "00000000" // /* MW 9 */
+ 5016 "00000000" // /* MW 8 */
+ 5017 "01011011" // /* MW 7 */
+ 5018 "00000001" // /* MW 6 */
+ 5019 "00100000" // /* MW 5 */
+ 5020 "00000000" // /* MW 4 */
+ 5021 "11110000" // /* MW 3 */
+ 5022 "00101100" // /* MW 2 */
+ 5023 "00000000" // /* MW 1 */
+ 5024 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5025 "00000000" // /* MW 15 */
+ 5026 "00000000" // /* MW 14 */
+ 5027 "01111000" // /* MW 13 */
+ 5028 "10100101" // /* MW 12 */
+ 5029 "00000001" // /* MW 11 */
+ 5030 "00000000" // /* MW 10 */
+ 5031 "00000000" // /* MW 9 */
+ 5032 "00000000" // /* MW 8 */
+ 5033 "01011011" // /* MW 7 */
+ 5034 "00000001" // /* MW 6 */
+ 5035 "00100000" // /* MW 5 */
+ 5036 "00000000" // /* MW 4 */
+ 5037 "11110000" // /* MW 3 */
+ 5038 "00101100" // /* MW 2 */
+ 5039 "00000000" // /* MW 1 */
+ 5040 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5041 "00000000" // /* MW 15 */
+ 5042 "00000000" // /* MW 14 */
+ 5043 "01111000" // /* MW 13 */
+ 5044 "10100101" // /* MW 12 */
+ 5045 "00000001" // /* MW 11 */
+ 5046 "00000000" // /* MW 10 */
+ 5047 "00000000" // /* MW 9 */
+ 5048 "00000000" // /* MW 8 */
+ 5049 "01011011" // /* MW 7 */
+ 5050 "00000001" // /* MW 6 */
+ 5051 "00100000" // /* MW 5 */
+ 5052 "00000000" // /* MW 4 */
+ 5053 "11110000" // /* MW 3 */
+ 5054 "00101100" // /* MW 2 */
+ 5055 "00000000" // /* MW 1 */
+ 5056 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5057 "00000000" // /* MW 15 */
+ 5058 "00000000" // /* MW 14 */
+ 5059 "01111000" // /* MW 13 */
+ 5060 "10100101" // /* MW 12 */
+ 5061 "00000001" // /* MW 11 */
+ 5062 "00000000" // /* MW 10 */
+ 5063 "00000000" // /* MW 9 */
+ 5064 "00000000" // /* MW 8 */
+ 5065 "01011011" // /* MW 7 */
+ 5066 "00000001" // /* MW 6 */
+ 5067 "00100000" // /* MW 5 */
+ 5068 "00000000" // /* MW 4 */
+ 5069 "11110000" // /* MW 3 */
+ 5070 "00101100" // /* MW 2 */
+ 5071 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 2 "reduce_base_c8.h" 374 29 first
+.begin_of_loop
+.end_of_loop
+.loop_nesting 1
+ 5072 "11100001" // NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5073 "00000000" // /* MW 15 */
+ 5074 "00000000" // /* MW 14 */
+ 5075 "01111000" // /* MW 13 */
+ 5076 "10100101" // /* MW 12 */
+ 5077 "00000001" // /* MW 11 */
+ 5078 "00000000" // /* MW 10 */
+ 5079 "00000000" // /* MW 9 */
+ 5080 "10000000" // /* MW 8 */
+ 5081 "00000110" // /* MW 7 */
+ 5082 "00011101" // /* MW 6 */
+ 5083 "00100011" // /* MW 5 */
+ 5084 "00000000" // /* MW 4 */
+ 5085 "11110000" // /* MW 3 */
+ 5086 "00101100" // /* MW 2 */
+ 5087 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 412 41
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.loop_nesting 0
+ 5088 "10111000" // MOV m4, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5089 "01000000" // /* MW 3 */
+ 5090 "00000000" // /* MW 2 */
+ 5091 "00011100" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28 first
+ 5092 "10011000" // LDA.u16 r17, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5093 "00111010" // /* MW 3 */
+ 5094 "10001010" // /* MW 2 */
+ 5095 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 388 28
+ 5096 "01010100" // LDA.s16 r22, [p7], #-2; MOV m5, #-58 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5097 "00011001" // /* MW 5 */
+ 5098 "00011111" // /* MW 4 */
+ 5099 "01011010" // /* MW 3 */
+ 5100 "11011010" // /* MW 2 */
+ 5101 "11111111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 570 33
+ 5102 "01010100" // LDA.u16 r26, [p7], m5; MOV dj0, #46 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5103 "10111001" // /* MW 5 */
+ 5104 "00000000" // /* MW 4 */
+ 5105 "01010001" // /* MW 3 */
+ 5106 "01101011" // /* MW 2 */
+ 5107 "11110101" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 570 33 first
+.src_ref 2 "reduce_base_c8.h" 594 43
+ 5108 "11010100" // LDA.s16 r20, [p7, dj0]; MOV r19, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5109 "10000001" // /* MW 5 */
+ 5110 "10111101" // /* MW 4 */
+ 5111 "01011001" // /* MW 3 */
+ 5112 "01010010" // /* MW 2 */
+ 5113 "11100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 570 33
+.src_ref 2 "reduce_base_c8.h" 594 43 first
+ 5114 "00010100" // LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5115 "00111000" // /* MW 5 */
+ 5116 "11010011" // /* MW 4 */
+ 5117 "01010110" // /* MW 3 */
+ 5118 "01001110" // /* MW 2 */
+ 5119 "11100000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 594 43
+ 5120 "10011000" // LDA.s16 r21, [p3], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5121 "10110010" // /* MW 3 */
+ 5122 "11011110" // /* MW 2 */
+ 5123 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 594 64
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 5124 "10011000" // LDA.u16 r28, [p3], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5125 "10011010" // /* MW 3 */
+ 5126 "11111111" // /* MW 2 */
+ 5127 "00000011" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 595 56 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 5128 "00101100" // LDA.s16 r17, [p3], #6; MOVX r7, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5129 "00010010" // /* MW 5 */
+ 5130 "00011100" // /* MW 4 */
+ 5131 "01010000" // /* MW 3 */
+ 5132 "11000110" // /* MW 2 */
+ 5133 "01100111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 596 56 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5134 "10111010" // LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5135 "01011000" // /* MW 9 */
+ 5136 "00000000" // /* MW 8 */
+ 5137 "01100000" // /* MW 7 */
+ 5138 "11001010" // /* MW 6 */
+ 5139 "00100111" // /* MW 5 */
+ 5140 "00111111" // /* MW 4 */
+ 5141 "01010000" // /* MW 3 */
+ 5142 "11001010" // /* MW 2 */
+ 5143 "01111110" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 2 "reduce_base_c8.h" 388 28 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 33 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5144 "01110110" // LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5145 "01011000" // /* MW 11 */
+ 5146 "00000001" // /* MW 10 */
+ 5147 "11001000" // /* MW 9 */
+ 5148 "01101100" // /* MW 8 */
+ 5149 "00101001" // /* MW 7 */
+ 5150 "00100011" // /* MW 6 */
+ 5151 "01001011" // /* MW 5 */
+ 5152 "00010000" // /* MW 4 */
+ 5153 "01010010" // /* MW 3 */
+ 5154 "00011110" // /* MW 2 */
+ 5155 "11100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 388 28
+.src_ref 2 "reduce_base_c8.h" 595 75 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5156 "01110110" // LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5157 "01011000" // /* MW 11 */
+ 5158 "00111100" // /* MW 10 */
+ 5159 "01001000" // /* MW 9 */
+ 5160 "11101100" // /* MW 8 */
+ 5161 "01110011" // /* MW 7 */
+ 5162 "00101100" // /* MW 6 */
+ 5163 "00001011" // /* MW 5 */
+ 5164 "01011010" // /* MW 4 */
+ 5165 "01010010" // /* MW 3 */
+ 5166 "11101111" // /* MW 2 */
+ 5167 "01100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5168 "01110110" // MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5169 "01111000" // /* MW 11 */
+ 5170 "11010000" // /* MW 10 */
+ 5171 "00000001" // /* MW 9 */
+ 5172 "01101101" // /* MW 8 */
+ 5173 "01000011" // /* MW 7 */
+ 5174 "00101001" // /* MW 6 */
+ 5175 "10001011" // /* MW 5 */
+ 5176 "10000100" // /* MW 4 */
+ 5177 "10000011" // /* MW 3 */
+ 5178 "00001010" // /* MW 2 */
+ 5179 "00001000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5180 "10111010" // VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5181 "01111000" // /* MW 9 */
+ 5182 "00010000" // /* MW 8 */
+ 5183 "10000101" // /* MW 7 */
+ 5184 "01101110" // /* MW 6 */
+ 5185 "00110011" // /* MW 5 */
+ 5186 "00100111" // /* MW 4 */
+ 5187 "10110000" // /* MW 3 */
+ 5188 "00010010" // /* MW 2 */
+ 5189 "01101010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 5190 "10111010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5191 "01111000" // /* MW 9 */
+ 5192 "11010000" // /* MW 8 */
+ 5193 "00000100" // /* MW 7 */
+ 5194 "01101111" // /* MW 6 */
+ 5195 "00110011" // /* MW 5 */
+ 5196 "00101011" // /* MW 4 */
+ 5197 "00110000" // /* MW 3 */
+ 5198 "01000001" // /* MW 2 */
+ 5199 "00010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5200 "00100100" // LSHL r17, r17, r6; ADD.NC lc, r18, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5201 "11111110" // /* MW 5 */
+ 5202 "11110010" // /* MW 4 */
+ 5203 "10111010" // /* MW 3 */
+ 5204 "01001101" // /* MW 2 */
+ 5205 "10001100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+ 5206 "11100100" // LSHL r17, r18, r6; MOV dj0, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5207 "01000001" // /* MW 5 */
+ 5208 "00010001" // /* MW 4 */
+ 5209 "10110001" // /* MW 3 */
+ 5210 "01001101" // /* MW 2 */
+ 5211 "10010100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+ 5212 "01110110" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5213 "01111000" // /* MW 11 */
+ 5214 "11010000" // /* MW 10 */
+ 5215 "00000100" // /* MW 9 */
+ 5216 "01101100" // /* MW 8 */
+ 5217 "01100011" // /* MW 7 */
+ 5218 "00001110" // /* MW 6 */
+ 5219 "01001011" // /* MW 5 */
+ 5220 "00010000" // /* MW 4 */
+ 5221 "00110000" // /* MW 3 */
+ 5222 "00000001" // /* MW 2 */
+ 5223 "00011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 570 24
+ 5224 "01001010" // MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5225 "00111101" // /* MW 9 */
+ 5226 "00110000" // /* MW 8 */
+ 5227 "00010100" // /* MW 7 */
+ 5228 "11100100" // /* MW 6 */
+ 5229 "00100000" // /* MW 5 */
+ 5230 "00000011" // /* MW 4 */
+ 5231 "01100111" // /* MW 3 */
+ 5232 "10000001" // /* MW 2 */
+ 5233 "00001011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 6 "aie_core.h" 90 15
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+ 5234 "10111010" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5235 "01110010" // /* MW 9 */
+ 5236 "01010000" // /* MW 8 */
+ 5237 "01000100" // /* MW 7 */
+ 5238 "00000010" // /* MW 6 */
+ 5239 "00001011" // /* MW 5 */
+ 5240 "01011011" // /* MW 4 */
+ 5241 "00110100" // /* MW 3 */
+ 5242 "00100001" // /* MW 2 */
+ 5243 "00011101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+ 5244 "11010100" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5245 "00000001" // /* MW 5 */
+ 5246 "10010011" // /* MW 4 */
+ 5247 "00110011" // /* MW 3 */
+ 5248 "00110001" // /* MW 2 */
+ 5249 "00000011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 5250 "01100010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5251 "00111101" // /* MW 7 */
+ 5252 "10000000" // /* MW 6 */
+ 5253 "00010001" // /* MW 5 */
+ 5254 "00000100" // /* MW 4 */
+ 5255 "00110000" // /* MW 3 */
+ 5256 "01000001" // /* MW 2 */
+ 5257 "00010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5258 "10011000" // VLDA.2D bmll1, [p3], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5259 "10010101" // /* MW 3 */
+ 5260 "01010000" // /* MW 2 */
+ 5261 "00000011" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5263 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5264 "01011010" // MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5265 "00111101" // /* MW 9 */
+ 5266 "00101000" // /* MW 8 */
+ 5267 "00010000" // /* MW 7 */
+ 5268 "00000010" // /* MW 6 */
+ 5269 "01001100" // /* MW 5 */
+ 5270 "10001111" // /* MW 4 */
+ 5271 "00000000" // /* MW 3 */
+ 5272 "00000000" // /* MW 2 */
+ 5273 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 412 41
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 5274 "11010100" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5275 "00000001" // /* MW 5 */
+ 5276 "00010000" // /* MW 4 */
+ 5277 "00110111" // /* MW 3 */
+ 5278 "00000001" // /* MW 2 */
+ 5279 "00011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5280 "11101011" // MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5281 "10000001" // /* MW 15 */
+ 5282 "10100001" // /* MW 14 */
+ 5283 "01111000" // /* MW 13 */
+ 5284 "00000000" // /* MW 12 */
+ 5285 "10000010" // /* MW 11 */
+ 5286 "00001000" // /* MW 10 */
+ 5287 "01000100" // /* MW 9 */
+ 5288 "00000000" // /* MW 8 */
+ 5289 "10001011" // /* MW 7 */
+ 5290 "10000100" // /* MW 6 */
+ 5291 "00100100" // /* MW 5 */
+ 5292 "00000000" // /* MW 4 */
+ 5293 "10000000" // /* MW 3 */
+ 5294 "00000110" // /* MW 2 */
+ 5295 "00001000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 153 115
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 391 8 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5296 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5297 "01100001" // /* MW 15 */
+ 5298 "10010000" // /* MW 14 */
+ 5299 "00010000" // /* MW 13 */
+ 5300 "10010000" // /* MW 12 */
+ 5301 "10111010" // /* MW 11 */
+ 5302 "00000101" // /* MW 10 */
+ 5303 "00000000" // /* MW 9 */
+ 5304 "00000000" // /* MW 8 */
+ 5305 "00001011" // /* MW 7 */
+ 5306 "01011010" // /* MW 6 */
+ 5307 "00100001" // /* MW 5 */
+ 5308 "00000000" // /* MW 4 */
+ 5309 "00110000" // /* MW 3 */
+ 5310 "00100001" // /* MW 2 */
+ 5311 "00011101" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 5312 "10011000" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5313 "10001001" // /* MW 3 */
+ 5314 "00011001" // /* MW 2 */
+ 5315 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5316 "01100110" // VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5317 "00111101" // /* MW 11 */
+ 5318 "10000000" // /* MW 10 */
+ 5319 "00010001" // /* MW 9 */
+ 5320 "10001110" // /* MW 8 */
+ 5321 "10101101" // /* MW 7 */
+ 5322 "00000000" // /* MW 6 */
+ 5323 "00100000" // /* MW 5 */
+ 5324 "00000000" // /* MW 4 */
+ 5325 "10110000" // /* MW 3 */
+ 5326 "00010010" // /* MW 2 */
+ 5327 "01101010" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 198 120
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5328 "11100001" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5329 "00000000" // /* MW 15 */
+ 5330 "00000000" // /* MW 14 */
+ 5331 "01111000" // /* MW 13 */
+ 5332 "10100101" // /* MW 12 */
+ 5333 "00000001" // /* MW 11 */
+ 5334 "00000000" // /* MW 10 */
+ 5335 "00000000" // /* MW 9 */
+ 5336 "00000000" // /* MW 8 */
+ 5337 "01011011" // /* MW 7 */
+ 5338 "00000001" // /* MW 6 */
+ 5339 "00100000" // /* MW 5 */
+ 5340 "00000000" // /* MW 4 */
+ 5341 "00110000" // /* MW 3 */
+ 5342 "01000001" // /* MW 2 */
+ 5343 "00010101" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5345 "00000000" // /* MW 15 */
+ 5346 "00000000" // /* MW 14 */
+ 5347 "01111000" // /* MW 13 */
+ 5348 "10100101" // /* MW 12 */
+ 5349 "00000001" // /* MW 11 */
+ 5350 "00000000" // /* MW 10 */
+ 5351 "00000000" // /* MW 9 */
+ 5352 "00000000" // /* MW 8 */
+ 5353 "01011011" // /* MW 7 */
+ 5354 "00000001" // /* MW 6 */
+ 5355 "00100000" // /* MW 5 */
+ 5356 "00000000" // /* MW 4 */
+ 5357 "11110000" // /* MW 3 */
+ 5358 "00101100" // /* MW 2 */
+ 5359 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5360 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5361 "01000001" // /* MW 15 */
+ 5362 "10000001" // /* MW 14 */
+ 5363 "01111000" // /* MW 13 */
+ 5364 "10100101" // /* MW 12 */
+ 5365 "00000001" // /* MW 11 */
+ 5366 "00000000" // /* MW 10 */
+ 5367 "00000000" // /* MW 9 */
+ 5368 "00000000" // /* MW 8 */
+ 5369 "01011011" // /* MW 7 */
+ 5370 "00000001" // /* MW 6 */
+ 5371 "00100000" // /* MW 5 */
+ 5372 "00000000" // /* MW 4 */
+ 5373 "11110000" // /* MW 3 */
+ 5374 "00101100" // /* MW 2 */
+ 5375 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5376 "11100001" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5377 "00000000" // /* MW 15 */
+ 5378 "00000000" // /* MW 14 */
+ 5379 "01111000" // /* MW 13 */
+ 5380 "10100101" // /* MW 12 */
+ 5381 "00000001" // /* MW 11 */
+ 5382 "00000000" // /* MW 10 */
+ 5383 "00000000" // /* MW 9 */
+ 5384 "10000000" // /* MW 8 */
+ 5385 "00000110" // /* MW 7 */
+ 5386 "00110001" // /* MW 6 */
+ 5387 "00100100" // /* MW 5 */
+ 5388 "00000000" // /* MW 4 */
+ 5389 "00110000" // /* MW 3 */
+ 5390 "00000001" // /* MW 2 */
+ 5391 "00011001" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5392 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5393 "10000001" // /* MW 15 */
+ 5394 "10100001" // /* MW 14 */
+ 5395 "01111000" // /* MW 13 */
+ 5396 "10100101" // /* MW 12 */
+ 5397 "00000001" // /* MW 11 */
+ 5398 "00000000" // /* MW 10 */
+ 5399 "00000000" // /* MW 9 */
+ 5400 "00000000" // /* MW 8 */
+ 5401 "01011011" // /* MW 7 */
+ 5402 "00000001" // /* MW 6 */
+ 5403 "00100000" // /* MW 5 */
+ 5404 "00000000" // /* MW 4 */
+ 5405 "11110000" // /* MW 3 */
+ 5406 "00101100" // /* MW 2 */
+ 5407 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92
+.src_ref 2 "reduce_base_c8.h" 570 24 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5408 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5409 "01100001" // /* MW 15 */
+ 5410 "10010000" // /* MW 14 */
+ 5411 "01111000" // /* MW 13 */
+ 5412 "10100101" // /* MW 12 */
+ 5413 "00000001" // /* MW 11 */
+ 5414 "00000000" // /* MW 10 */
+ 5415 "00000000" // /* MW 9 */
+ 5416 "00000000" // /* MW 8 */
+ 5417 "01011011" // /* MW 7 */
+ 5418 "00000001" // /* MW 6 */
+ 5419 "00100000" // /* MW 5 */
+ 5420 "00000000" // /* MW 4 */
+ 5421 "00110000" // /* MW 3 */
+ 5422 "00100001" // /* MW 2 */
+ 5423 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 107 23
+.src_ref 2 "reduce_base_c8.h" 412 41 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 5424 "10111010" // LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5425 "00010000" // /* MW 9 */
+ 5426 "11000000" // /* MW 8 */
+ 5427 "10101111" // /* MW 7 */
+ 5428 "00001100" // /* MW 6 */
+ 5429 "00000000" // /* MW 5 */
+ 5430 "00000000" // /* MW 4 */
+ 5431 "01010000" // /* MW 3 */
+ 5432 "00000111" // /* MW 2 */
+ 5433 "11101100" // /* MW 1 */
+.src_ref 6 "aie_core.h" 90 15 first
+.src_ref 6 "me_vmult_float_emulated.h" 107 23 first
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 943 89 first
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5434 "01001010" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5435 "00111101" // /* MW 9 */
+ 5436 "10000000" // /* MW 8 */
+ 5437 "00010001" // /* MW 7 */
+ 5438 "11100010" // /* MW 6 */
+ 5439 "01110010" // /* MW 5 */
+ 5440 "00010101" // /* MW 4 */
+ 5441 "00110010" // /* MW 3 */
+ 5442 "00110001" // /* MW 2 */
+ 5443 "00000011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 101 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5444 "11111000" // VBCST.16 x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5445 "01110010" // /* MW 3 */
+ 5446 "01000001" // /* MW 2 */
+ 5447 "00011000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5449 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 5450 "01001000" // VADD.f dm0, dm1, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5451 "00111101" // /* MW 3 */
+ 5452 "00101000" // /* MW 2 */
+ 5453 "00010000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5454 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5455 "00000110" // /* MW 3 */
+ 5456 "00110001" // /* MW 2 */
+ 5457 "00001100" // /* MW 1 */
+ 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5459 "00000000" // /* MW 1 */
+.src_ref 7 "add_accum.hpp" 19 92 first
+.src_ref 2 "reduce_base_c8.h" 412 52 first
+ 5460 "01100010" // ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5461 "00111101" // /* MW 7 */
+ 5462 "00001100" // /* MW 6 */
+ 5463 "00010010" // /* MW 5 */
+ 5464 "11111001" // /* MW 4 */
+ 5465 "01011111" // /* MW 3 */
+ 5466 "00000010" // /* MW 2 */
+ 5467 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 412 31
+ 5468 "10011000" // NE r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5469 "00001000" // /* MW 3 */
+ 5470 "01000000" // /* MW 2 */
+ 5471 "00010001" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 412 16
+ 5472 "10000100" // JNZ r0, #6368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6368 delay_slots=5 */
+ 5473 "00000001" // /* MW 5 */
+ 5474 "01000000" // /* MW 4 */
+ 5475 "01110000" // /* MW 3 */
+ 5476 "00001100" // /* MW 2 */
+ 5477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5483 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 73 15 first
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 153 115 first
+.delay_slot
+ 5484 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5485 "00000110" // /* MW 3 */
+ 5486 "00110001" // /* MW 2 */
+ 5487 "00001100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5489 "00000000" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+.src_ref 3 "reduce_mean_c8_impl.h" 184 15 first
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5490 "00101100" // LDA r6, [p2, #12]; MOVX r5, #3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5491 "00011010" // /* MW 5 */
+ 5492 "00010100" // /* MW 4 */
+ 5493 "11010000" // /* MW 3 */
+ 5494 "10011010" // /* MW 2 */
+ 5495 "01000110" // /* MW 1 */
+ 5496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5497 "00000000" // /* MW 1 */
+ 5498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5499 "00000000" // /* MW 1 */
+ 5500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5501 "00000000" // /* MW 1 */
+ 5502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5503 "00000000" // /* MW 1 */
+ 5504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5505 "00000000" // /* MW 1 */
+ 5506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5507 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5508 "10011000" // GE r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5509 "01101001" // /* MW 3 */
+ 5510 "01001110" // /* MW 2 */
+ 5511 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5512 "10000100" // JNZ r7, #7296 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7296 delay_slots=5 */
+ 5513 "00000001" // /* MW 5 */
+ 5514 "01000000" // /* MW 4 */
+ 5515 "01000000" // /* MW 3 */
+ 5516 "00001110" // /* MW 2 */
+ 5517 "00111000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+.delay_slot
+ 5518 "00011000" // MOVX r0, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5519 "00010001" // /* MW 3 */
+ 5520 "00000000" // /* MW 2 */
+ 5521 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5527 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5529 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5530 "10011000" // NE r5, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5531 "00001000" // /* MW 3 */
+ 5532 "10001010" // /* MW 2 */
+ 5533 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 5534 "10000100" // JNZ r5, #6512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6512 delay_slots=5 */
+ 5535 "00000001" // /* MW 5 */
+ 5536 "01000000" // /* MW 4 */
+ 5537 "10111000" // /* MW 3 */
+ 5538 "00001100" // /* MW 2 */
+ 5539 "00101000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5541 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5543 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5549 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 5550 "11100100" // MOVX r17, #257; MOV dc4, lr /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5551 "11000001" // /* MW 5 */
+ 5552 "10000011" // /* MW 4 */
+ 5553 "10101001" // /* MW 3 */
+ 5554 "01000000" // /* MW 2 */
+ 5555 "00100100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 5556 "01000100" // MOVXM r21, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5557 "11111110" // /* MW 5 */
+ 5558 "10111111" // /* MW 4 */
+ 5559 "11111010" // /* MW 3 */
+ 5560 "00000000" // /* MW 2 */
+ 5561 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48
+ 5562 "00101100" // NOPA; MOVX r20, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5563 "00000010" // /* MW 5 */
+ 5564 "01010000" // /* MW 4 */
+ 5565 "11110000" // /* MW 3 */
+ 5566 "00101100" // /* MW 2 */
+ 5567 "00000000" // /* MW 1 */
+.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 6 "me_vmult_float_emulated.h" 117 42
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.src_ref 6 "me_vmult_float_emulated.h" 118 9
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.src_ref 6 "me_vmult_float_emulated.h" 119 9
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.src_ref 6 "me_vmult_float_emulated.h" 120 9
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.src_ref 6 "me_vmult_float_emulated.h" 121 9
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.src_ref 6 "me_vmult_float_emulated.h" 122 9
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 123 9
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 9
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9
+.src_ref 5 "add.hpp" 28 49
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 5 "add_reduce.hpp" 324 44
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+ 5568 "01110110" // MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5569 "01011000" // /* MW 11 */
+ 5570 "00111100" // /* MW 10 */
+ 5571 "01001000" // /* MW 9 */
+ 5572 "00001000" // /* MW 8 */
+ 5573 "01010010" // /* MW 7 */
+ 5574 "00000000" // /* MW 6 */
+ 5575 "00001011" // /* MW 5 */
+ 5576 "10000011" // /* MW 4 */
+ 5577 "10000010" // /* MW 3 */
+ 5578 "00001010" // /* MW 2 */
+ 5579 "00001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first
+ 5580 "00101100" // LDA.s16 r6, [p2, dj2]; MOVX r4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5581 "00000010" // /* MW 5 */
+ 5582 "00010001" // /* MW 4 */
+ 5583 "01010000" // /* MW 3 */
+ 5584 "00011010" // /* MW 2 */
+ 5585 "01001000" // /* MW 1 */
+ 5586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5587 "00000000" // /* MW 1 */
+ 5588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5589 "00000000" // /* MW 1 */
+ 5590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5591 "00000000" // /* MW 1 */
+ 5592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5593 "00000000" // /* MW 1 */
+ 5594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5595 "00000000" // /* MW 1 */
+ 5596 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5597 "01100111" // /* MW 3 */
+ 5598 "00000001" // /* MW 2 */
+ 5599 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+ 5600 "11100001" // NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5601 "00000000" // /* MW 15 */
+ 5602 "00000000" // /* MW 14 */
+ 5603 "01111000" // /* MW 13 */
+ 5604 "10100101" // /* MW 12 */
+ 5605 "00000001" // /* MW 11 */
+ 5606 "11110100" // /* MW 10 */
+ 5607 "01010010" // /* MW 9 */
+ 5608 "00001100" // /* MW 8 */
+ 5609 "01011011" // /* MW 7 */
+ 5610 "00000001" // /* MW 6 */
+ 5611 "00100000" // /* MW 5 */
+ 5612 "00000000" // /* MW 4 */
+ 5613 "11110000" // /* MW 3 */
+ 5614 "00101100" // /* MW 2 */
+ 5615 "00000000" // /* MW 1 */
+.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35
+ 5616 "01110110" // MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5617 "00010000" // /* MW 11 */
+ 5618 "01111000" // /* MW 10 */
+ 5619 "10110010" // /* MW 9 */
+ 5620 "11110011" // /* MW 8 */
+ 5621 "00000001" // /* MW 7 */
+ 5622 "10000000" // /* MW 6 */
+ 5623 "10100101" // /* MW 5 */
+ 5624 "11111101" // /* MW 4 */
+ 5625 "10000111" // /* MW 3 */
+ 5626 "10001010" // /* MW 2 */
+ 5627 "00000100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1289 16
+ 5628 "01110110" // LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5629 "01111000" // /* MW 11 */
+ 5630 "00111001" // /* MW 10 */
+ 5631 "10001011" // /* MW 9 */
+ 5632 "00001000" // /* MW 8 */
+ 5633 "01010000" // /* MW 7 */
+ 5634 "10000000" // /* MW 6 */
+ 5635 "01100101" // /* MW 5 */
+ 5636 "11111010" // /* MW 4 */
+ 5637 "01010111" // /* MW 3 */
+ 5638 "11011100" // /* MW 2 */
+ 5639 "11100000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 1280 49
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1287 41
+.src_ref 5 "vector.hpp" 1288 16
+.src_ref 5 "vector.hpp" 1289 16
+.src_ref 5 "vector.hpp" 1292 26
+.src_ref 3 "reduce_mean_c8_impl.h" 223 35 first
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+ 5640 "01110110" // LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5641 "01111000" // /* MW 11 */
+ 5642 "01001001" // /* MW 10 */
+ 5643 "00000010" // /* MW 9 */
+ 5644 "11101000" // /* MW 8 */
+ 5645 "01100111" // /* MW 7 */
+ 5646 "00111111" // /* MW 6 */
+ 5647 "10001011" // /* MW 5 */
+ 5648 "10000100" // /* MW 4 */
+ 5649 "11010111" // /* MW 3 */
+ 5650 "00011010" // /* MW 2 */
+ 5651 "01001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "vector.hpp" 1280 49
+ 5652 "10111010" // MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5653 "01111000" // /* MW 9 */
+ 5654 "01001001" // /* MW 8 */
+ 5655 "00000010" // /* MW 7 */
+ 5656 "00000001" // /* MW 6 */
+ 5657 "11010010" // /* MW 5 */
+ 5658 "00000010" // /* MW 4 */
+ 5659 "00000000" // /* MW 3 */
+ 5660 "11111000" // /* MW 2 */
+ 5661 "00000011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9 first
+ 5662 "10111010" // MOVA r25, #16; MOVXM ls, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5663 "00010000" // /* MW 9 */
+ 5664 "01000000" // /* MW 8 */
+ 5665 "01111011" // /* MW 7 */
+ 5666 "00000100" // /* MW 6 */
+ 5667 "00000000" // /* MW 5 */
+ 5668 "00000000" // /* MW 4 */
+ 5669 "00000000" // /* MW 3 */
+ 5670 "00011001" // /* MW 2 */
+ 5671 "00000010" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9
+ 5672 "10111010" // VLDA wl2, [sp, #-32]; MOVXM le, #6336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5673 "00010000" // /* MW 9 */
+ 5674 "01100000" // /* MW 8 */
+ 5675 "10111100" // /* MW 7 */
+ 5676 "00000101" // /* MW 6 */
+ 5677 "00000000" // /* MW 5 */
+ 5678 "00000000" // /* MW 4 */
+ 5679 "10110000" // /* MW 3 */
+ 5680 "10010100" // /* MW 2 */
+ 5681 "11111111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 98
+ 5682 "00011000" // MOVX r26, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5683 "00000001" // /* MW 3 */
+ 5684 "01110100" // /* MW 2 */
+ 5685 "00010000" // /* MW 1 */
+ 5686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5687 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "vector.hpp" 1286 72
+.src_ref 7 "accum.hpp" 1108 103
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 5688 "00011000" // MOVX crRnd, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5689 "10000000" // /* MW 3 */
+ 5690 "11111010" // /* MW 2 */
+ 5691 "00010101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 112 19 first
+.src_ref 3 "reduce_mean_c8_impl.h" 223 9
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 5692 "00000010" // VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5693 "00000000" // /* MW 7 */
+ 5694 "10000000" // /* MW 6 */
+ 5695 "10111001" // /* MW 5 */
+ 5696 "00000010" // /* MW 4 */
+ 5697 "11000000" // /* MW 3 */
+ 5698 "00000010" // /* MW 2 */
+ 5699 "00001000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5700 "11111000" // VMOV x3, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5701 "10010010" // /* MW 3 */
+ 5702 "10100000" // /* MW 2 */
+ 5703 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 5704 "01100010" // VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5705 "10000011" // /* MW 7 */
+ 5706 "01000000" // /* MW 6 */
+ 5707 "00010000" // /* MW 5 */
+ 5708 "11100110" // /* MW 4 */
+ 5709 "10010010" // /* MW 3 */
+ 5710 "10100110" // /* MW 2 */
+ 5711 "00000010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 5712 "11111000" // VMOV x6, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5713 "10010010" // /* MW 3 */
+ 5714 "00101010" // /* MW 2 */
+ 5715 "00011011" // /* MW 1 */
+ 5716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5717 "00000000" // /* MW 1 */
+ 5718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5719 "00000000" // /* MW 1 */
+ 5720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5721 "00000000" // /* MW 1 */
+ 5722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5723 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 19 first
+ 5724 "00011000" // VCONV.bf16.fp32 wl3, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5725 "00010110" // /* MW 3 */
+ 5726 "11000000" // /* MW 2 */
+ 5727 "00001001" // /* MW 1 */
+ 5728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5729 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 5730 "01001000" // VMSC.f dm0, dm0, x3, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5731 "10000011" // /* MW 3 */
+ 5732 "00000110" // /* MW 2 */
+ 5733 "00010000" // /* MW 1 */
+ 5734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5735 "00000000" // /* MW 1 */
+ 5736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5737 "00000000" // /* MW 1 */
+ 5738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5739 "00000000" // /* MW 1 */
+ 5740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5741 "00000000" // /* MW 1 */
+ 5742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5743 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+ 5744 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5745 "00000000" // /* MW 15 */
+ 5746 "00000000" // /* MW 14 */
+ 5747 "01111000" // /* MW 13 */
+ 5748 "10100101" // /* MW 12 */
+ 5749 "00000001" // /* MW 11 */
+ 5750 "00001000" // /* MW 10 */
+ 5751 "01110001" // /* MW 9 */
+ 5752 "00000000" // /* MW 8 */
+ 5753 "00010110" // /* MW 7 */
+ 5754 "11000000" // /* MW 6 */
+ 5755 "00100010" // /* MW 5 */
+ 5756 "00000000" // /* MW 4 */
+ 5757 "11110000" // /* MW 3 */
+ 5758 "00101100" // /* MW 2 */
+ 5759 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 3 "reduce_mean_c8_impl.h" 226 22 first
+.begin_of_loop
+.loop_nesting 1
+ 5760 "11110100" // VLDB x7, [p1], #64; VMOV bmhh4, x9 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5761 "00100101" // /* MW 5 */
+ 5762 "10100101" // /* MW 4 */
+ 5763 "10001001" // /* MW 3 */
+ 5764 "10111110" // /* MW 2 */
+ 5765 "00100011" // /* MW 1 */
+ 5766 "11111000" // VMOV bmhh3, x11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5767 "10010010" // /* MW 3 */
+ 5768 "11010110" // /* MW 2 */
+ 5769 "00011011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1280 49
+ 5770 "11111000" // MOV r28, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5771 "11000000" // /* MW 3 */
+ 5772 "00011110" // /* MW 2 */
+ 5773 "00011111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1280 49 first
+ 5774 "10011000" // AND r29, r28, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5775 "10000100" // /* MW 3 */
+ 5776 "00111011" // /* MW 2 */
+ 5777 "00010111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1285 72 first
+ 5778 "00100100" // LT r27, r29, r4; ADD.NC r28, r29, #-32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5779 "11100000" // /* MW 5 */
+ 5780 "00111101" // /* MW 4 */
+ 5781 "01011110" // /* MW 3 */
+ 5782 "11001001" // /* MW 2 */
+ 5783 "11101110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72
+ 5784 "10011000" // LSHL r30, r22, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5785 "11011101" // /* MW 3 */
+ 5786 "10111101" // /* MW 2 */
+ 5787 "00010101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 98 first
+ 5788 "10011000" // SUB r31, r26, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5789 "11010001" // /* MW 3 */
+ 5790 "10111111" // /* MW 2 */
+ 5791 "00010110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "add_reduce.hpp" 322 47 first
+ 5792 "10100100" // SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5793 "11001101" // /* MW 5 */
+ 5794 "01110000" // /* MW 4 */
+ 5795 "01001000" // /* MW 3 */
+ 5796 "10111100" // /* MW 2 */
+ 5797 "00101111" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+ 5798 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5799 "10010010" // /* MW 3 */
+ 5800 "00010000" // /* MW 2 */
+ 5801 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 7 "accum.hpp" 198 120
+ 5802 "11111000" // VMOV wl8, wh7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5803 "00100010" // /* MW 3 */
+ 5804 "01001110" // /* MW 2 */
+ 5805 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 7 "accum.hpp" 198 120 first
+ 5806 "11111000" // VMOV wl10, wl7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5807 "00100010" // /* MW 3 */
+ 5808 "01001111" // /* MW 2 */
+ 5809 "00011101" // /* MW 1 */
+ 5810 "11111000" // VMOV bmhl4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5811 "10010010" // /* MW 3 */
+ 5812 "10010000" // /* MW 2 */
+ 5813 "00011100" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5814 "11111000" // VMOV bmhl3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5815 "10010010" // /* MW 3 */
+ 5816 "10010100" // /* MW 2 */
+ 5817 "00011011" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 5 "add.hpp" 28 49 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5818 "01100010" // VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5819 "00111101" // /* MW 7 */
+ 5820 "00101000" // /* MW 6 */
+ 5821 "00010011" // /* MW 5 */
+ 5822 "11100110" // /* MW 4 */
+ 5823 "10001010" // /* MW 3 */
+ 5824 "00010010" // /* MW 2 */
+ 5825 "00000010" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5826 "11111000" // VMOV cml1, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5827 "10001010" // /* MW 3 */
+ 5828 "00001110" // /* MW 2 */
+ 5829 "00011001" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 5 "vector.hpp" 243 115 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5830 "01100010" // VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5831 "00111101" // /* MW 7 */
+ 5832 "01010000" // /* MW 6 */
+ 5833 "00010010" // /* MW 5 */
+ 5834 "11100110" // /* MW 4 */
+ 5835 "00100010" // /* MW 3 */
+ 5836 "01001110" // /* MW 2 */
+ 5837 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5838 "11111000" // VMOV bmll2, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5839 "10010010" // /* MW 3 */
+ 5840 "00001110" // /* MW 2 */
+ 5841 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5842 "11011000" // VSHIFT x9, x8, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5843 "01100110" // /* MW 3 */
+ 5844 "11000000" // /* MW 2 */
+ 5845 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5846 "01100010" // VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5847 "00111101" // /* MW 7 */
+ 5848 "00110000" // /* MW 6 */
+ 5849 "00010100" // /* MW 5 */
+ 5850 "11100110" // /* MW 4 */
+ 5851 "10010010" // /* MW 3 */
+ 5852 "00010000" // /* MW 2 */
+ 5853 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5854 "11111000" // VMOV bmll4, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5855 "10010010" // /* MW 3 */
+ 5856 "00010010" // /* MW 2 */
+ 5857 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 151 136 first
+ 5858 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5859 "00010010" // /* MW 3 */
+ 5860 "00101100" // /* MW 2 */
+ 5861 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 243 115 first
+.src_ref 7 "accum.hpp" 151 115
+ 5862 "11111000" // VMOV wl9, wl8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5863 "00100010" // /* MW 3 */
+ 5864 "11010001" // /* MW 2 */
+ 5865 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5866 "11011000" // VSHIFT x8, x9, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5867 "01100110" // /* MW 3 */
+ 5868 "01001000" // /* MW 2 */
+ 5869 "00011100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5870 "01100010" // VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5871 "00111101" // /* MW 7 */
+ 5872 "01100100" // /* MW 6 */
+ 5873 "00010001" // /* MW 5 */
+ 5874 "11100110" // /* MW 4 */
+ 5875 "10010010" // /* MW 3 */
+ 5876 "00010000" // /* MW 2 */
+ 5877 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5878 "11111000" // VMOV bmll3, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5879 "10010010" // /* MW 3 */
+ 5880 "00010010" // /* MW 2 */
+ 5881 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22
+ 5882 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5883 "00010010" // /* MW 3 */
+ 5884 "00101000" // /* MW 2 */
+ 5885 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 5886 "11011000" // VSHIFT x10, x8, x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5887 "00011110" // /* MW 3 */
+ 5888 "01000000" // /* MW 2 */
+ 5889 "00011101" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5890 "01100010" // VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5891 "00111101" // /* MW 7 */
+ 5892 "01001100" // /* MW 6 */
+ 5893 "00010010" // /* MW 5 */
+ 5894 "11100110" // /* MW 4 */
+ 5895 "00010010" // /* MW 3 */
+ 5896 "00110000" // /* MW 2 */
+ 5897 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 5898 "11111000" // VMOV bmll3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5899 "10010010" // /* MW 3 */
+ 5900 "00010100" // /* MW 2 */
+ 5901 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 5902 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5903 "00111101" // /* MW 7 */
+ 5904 "10001100" // /* MW 6 */
+ 5905 "00010011" // /* MW 5 */
+ 5906 "11000110" // /* MW 4 */
+ 5907 "00011110" // /* MW 3 */
+ 5908 "01000000" // /* MW 2 */
+ 5909 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5910 "11111000" // VMOV bmll3, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5911 "10010010" // /* MW 3 */
+ 5912 "00010000" // /* MW 2 */
+ 5913 "00011011" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 5914 "11111000" // VMOV x8, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5915 "00010010" // /* MW 3 */
+ 5916 "00100100" // /* MW 2 */
+ 5917 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 5918 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5919 "00111101" // /* MW 7 */
+ 5920 "00110000" // /* MW 6 */
+ 5921 "00010001" // /* MW 5 */
+ 5922 "11000110" // /* MW 4 */
+ 5923 "00011110" // /* MW 3 */
+ 5924 "01000000" // /* MW 2 */
+ 5925 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5926 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5927 "10010010" // /* MW 3 */
+ 5928 "00010000" // /* MW 2 */
+ 5929 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 5930 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5931 "00010010" // /* MW 3 */
+ 5932 "00101000" // /* MW 2 */
+ 5933 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 5934 "01100010" // VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5935 "00111101" // /* MW 7 */
+ 5936 "01010000" // /* MW 6 */
+ 5937 "00010010" // /* MW 5 */
+ 5938 "11000110" // /* MW 4 */
+ 5939 "00000010" // /* MW 3 */
+ 5940 "01000000" // /* MW 2 */
+ 5941 "00000100" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5942 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5943 "10010010" // /* MW 3 */
+ 5944 "00010000" // /* MW 2 */
+ 5945 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 5946 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5947 "00010010" // /* MW 3 */
+ 5948 "00101100" // /* MW 2 */
+ 5949 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 5950 "11011000" // VSHIFT x8, x8, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5951 "00000010" // /* MW 3 */
+ 5952 "01000000" // /* MW 2 */
+ 5953 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5954 "01100010" // VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5955 "00111101" // /* MW 7 */
+ 5956 "01110000" // /* MW 6 */
+ 5957 "00010011" // /* MW 5 */
+ 5958 "11100110" // /* MW 4 */
+ 5959 "00010010" // /* MW 3 */
+ 5960 "00100100" // /* MW 2 */
+ 5961 "00000101" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 10
+.nohwbrkpt
+.noswbrkpt
+ 5962 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5963 "10010010" // /* MW 3 */
+ 5964 "00010000" // /* MW 2 */
+ 5965 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 5966 "01100010" // VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5967 "00111101" // /* MW 7 */
+ 5968 "00110000" // /* MW 6 */
+ 5969 "00010000" // /* MW 5 */
+ 5970 "11000110" // /* MW 4 */
+ 5971 "00000010" // /* MW 3 */
+ 5972 "01010000" // /* MW 2 */
+ 5973 "00000101" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5974 "11111000" // VMOV bmll4, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5975 "10010010" // /* MW 3 */
+ 5976 "00010100" // /* MW 2 */
+ 5977 "00011100" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 5978 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5979 "00010010" // /* MW 3 */
+ 5980 "00101000" // /* MW 2 */
+ 5981 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+ 5982 "10111000" // VEXTRACT.32 r23, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5983 "00000001" // /* MW 3 */
+ 5984 "11100010" // /* MW 2 */
+ 5985 "00011101" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 5986 "11111000" // VMOV x10, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5987 "00010010" // /* MW 3 */
+ 5988 "00101100" // /* MW 2 */
+ 5989 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+.src_ref 5 "vector.hpp" 1288 16 first
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 5990 "01110100" // VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5991 "00000011" // /* MW 5 */
+ 5992 "01010100" // /* MW 4 */
+ 5993 "10000011" // /* MW 3 */
+ 5994 "11010000" // /* MW 2 */
+ 5995 "11100010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 5996 "11111000" // VMOV x11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5997 "00010010" // /* MW 3 */
+ 5998 "10100000" // /* MW 2 */
+ 5999 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+.src_ref 5 "vector.hpp" 1287 41 first
+.src_ref 5 "broadcast.hpp" 80 25 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6000 "10110100" // VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6001 "00000110" // /* MW 5 */
+ 6002 "10110100" // /* MW 4 */
+ 6003 "10001010" // /* MW 3 */
+ 6004 "11010100" // /* MW 2 */
+ 6005 "11100000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6006 "00111000" // VSEL.32 x9, x10, x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6007 "10100000" // /* MW 3 */
+ 6008 "11010100" // /* MW 2 */
+ 6009 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 853 46 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6010 "01111000" // VINSERT.32 x10, x2, #0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6011 "11010001" // /* MW 3 */
+ 6012 "00010000" // /* MW 2 */
+ 6013 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 853 46
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6014 "01111000" // VINSERT.32 x8, x2, #0, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6015 "11110001" // /* MW 3 */
+ 6016 "00010010" // /* MW 2 */
+ 6017 "00011100" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95 first
+.src_ref 5 "vector.hpp" 1413 19 first
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6018 "11111000" // VMOV wl11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6019 "00100010" // /* MW 3 */
+ 6020 "11010011" // /* MW 2 */
+ 6021 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 5 "vector.hpp" 1413 19
+.aggressive_scheduled_block_id 11
+.nohwbrkpt
+.noswbrkpt
+ 6022 "11111000" // VMOV wh11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6023 "00100010" // /* MW 3 */
+ 6024 "10010011" // /* MW 2 */
+ 6025 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 142 95
+.src_ref 5 "vector.hpp" 1413 19
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6026 "11111000" // VMOV wh8, wl10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6027 "00100010" // /* MW 3 */
+ 6028 "00010101" // /* MW 2 */
+ 6029 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36 first
+ 6030 "00111000" // VSEL.32 x8, x11, x8, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6031 "00000000" // /* MW 3 */
+ 6032 "01011100" // /* MW 2 */
+ 6033 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 6034 "00111000" // VSEL.32 x8, x1, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6035 "00001000" // /* MW 3 */
+ 6036 "00001100" // /* MW 2 */
+ 6037 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+ 6038 "00111000" // VSEL.32 x7, x8, x7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6039 "10101000" // /* MW 3 */
+ 6040 "11000011" // /* MW 2 */
+ 6041 "00011011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+ 6042 "11111000" // VMOV bmll0, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6043 "10010010" // /* MW 3 */
+ 6044 "00001110" // /* MW 2 */
+ 6045 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6046 "11111000" // VMOV x9, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6047 "10010010" // /* MW 3 */
+ 6048 "10101100" // /* MW 2 */
+ 6049 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+ 6050 "00000010" // VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6051 "01110000" // /* MW 7 */
+ 6052 "01001001" // /* MW 6 */
+ 6053 "00000111" // /* MW 5 */
+ 6054 "00000001" // /* MW 4 */
+ 6055 "11000000" // /* MW 3 */
+ 6056 "00000010" // /* MW 2 */
+ 6057 "01101000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6058 "11111000" // VMOV x8, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6059 "10010010" // /* MW 3 */
+ 6060 "00110010" // /* MW 2 */
+ 6061 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1289 16
+ 6062 "01011010" // LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6063 "10000011" // /* MW 9 */
+ 6064 "01001100" // /* MW 8 */
+ 6065 "00010010" // /* MW 7 */
+ 6066 "00001111" // /* MW 6 */
+ 6067 "11101010" // /* MW 5 */
+ 6068 "11101101" // /* MW 4 */
+ 6069 "11001101" // /* MW 3 */
+ 6070 "10111011" // /* MW 2 */
+ 6071 "00000101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 9 first
+.src_ref 5 "vector.hpp" 1285 72
+.src_ref 5 "vector.hpp" 1289 16 first
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id first
+ 6072 "01100010" // SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6073 "10100001" // /* MW 7 */
+ 6074 "11101100" // /* MW 6 */
+ 6075 "00010001" // /* MW 5 */
+ 6076 "10010001" // /* MW 4 */
+ 6077 "00111110" // /* MW 3 */
+ 6078 "00001011" // /* MW 2 */
+ 6079 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 124 9 first
+.src_ref 5 "vector.hpp" 1285 72 first
+.src_ref 5 "vector.hpp" 1289 16
+.aggressive_scheduled_block_id 12
+.noswbrkpt
+ 6080 "01011010" // SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6081 "01100001" // /* MW 9 */
+ 6082 "11101100" // /* MW 8 */
+ 6083 "00010000" // /* MW 7 */
+ 6084 "00101111" // /* MW 6 */
+ 6085 "00001001" // /* MW 5 */
+ 6086 "00110011" // /* MW 4 */
+ 6087 "11100010" // /* MW 3 */
+ 6088 "10100101" // /* MW 2 */
+ 6089 "00000101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 9 first
+.aggressive_scheduled_block_id 12
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6090 "01001000" // VMUL.f dm3, x6, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6091 "00000001" // /* MW 3 */
+ 6092 "11101100" // /* MW 2 */
+ 6093 "00010011" // /* MW 1 */
+ 6094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6095 "00000000" // /* MW 1 */
+ 6096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6097 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 19 first
+ 6098 "00011000" // VCONV.bf16.fp32 wl9, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6099 "00010110" // /* MW 3 */
+ 6100 "11000001" // /* MW 2 */
+ 6101 "00001100" // /* MW 1 */
+ 6102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6103 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 6104 "01001000" // VMSC.f dm2, dm2, x9, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6105 "10000011" // /* MW 3 */
+ 6106 "01010010" // /* MW 2 */
+ 6107 "00010010" // /* MW 1 */
+ 6108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6109 "00000000" // /* MW 1 */
+ 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6111 "00000000" // /* MW 1 */
+ 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6113 "00000000" // /* MW 1 */
+ 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6115 "00000000" // /* MW 1 */
+ 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6117 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6118 "00011000" // VCONV.bf16.fp32 wl8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6119 "00010110" // /* MW 3 */
+ 6120 "01000001" // /* MW 2 */
+ 6121 "00001100" // /* MW 1 */
+ 6122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6123 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+ 6124 "01001000" // VMUL.f dm4, x8, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6125 "10100001" // /* MW 3 */
+ 6126 "11110000" // /* MW 2 */
+ 6127 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 9 first
+ 6128 "01001000" // VMUL.f dm2, x8, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6129 "01100001" // /* MW 3 */
+ 6130 "11110000" // /* MW 2 */
+ 6131 "00010010" // /* MW 1 */
+ 6132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6133 "00000000" // /* MW 1 */
+ 6134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6135 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id first
+ 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6137 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 9 first
+.aggressive_scheduled_block_id 13
+.noswbrkpt
+ 6138 "01001000" // VMUL.f dm2, x9, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6139 "10100001" // /* MW 3 */
+ 6140 "11110010" // /* MW 2 */
+ 6141 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6142 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6143 "00010010" // /* MW 3 */
+ 6144 "01110000" // /* MW 2 */
+ 6145 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 13
+.nohwbrkpt
+.noswbrkpt
+ 6146 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6147 "00111101" // /* MW 3 */
+ 6148 "10001000" // /* MW 2 */
+ 6149 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.aggressive_scheduled_block_id 13
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6150 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6151 "10010010" // /* MW 3 */
+ 6152 "00000101" // /* MW 2 */
+ 6153 "00011100" // /* MW 1 */
+ 6154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6155 "00000000" // /* MW 1 */
+ 6156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6157 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id first
+ 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6159 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 9 first
+.aggressive_scheduled_block_id 14
+.noswbrkpt
+ 6160 "01001000" // VMUL.f dm2, x0, x8, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6161 "00000001" // /* MW 3 */
+ 6162 "11100001" // /* MW 2 */
+ 6163 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6164 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6165 "00010010" // /* MW 3 */
+ 6166 "01110000" // /* MW 2 */
+ 6167 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 14
+.nohwbrkpt
+.noswbrkpt
+ 6168 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6169 "00111101" // /* MW 3 */
+ 6170 "10001000" // /* MW 2 */
+ 6171 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.aggressive_scheduled_block_id 14
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6172 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6173 "10010010" // /* MW 3 */
+ 6174 "00000001" // /* MW 2 */
+ 6175 "00011100" // /* MW 1 */
+ 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6177 "00000000" // /* MW 1 */
+ 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6179 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id first
+ 6180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6181 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 9 first
+.aggressive_scheduled_block_id 15
+.noswbrkpt
+ 6182 "01001000" // VMUL.f dm1, x9, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6183 "01100001" // /* MW 3 */
+ 6184 "11110010" // /* MW 2 */
+ 6185 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6186 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6187 "00010010" // /* MW 3 */
+ 6188 "01110000" // /* MW 2 */
+ 6189 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 15
+.nohwbrkpt
+.noswbrkpt
+ 6190 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6191 "00111101" // /* MW 3 */
+ 6192 "10000100" // /* MW 2 */
+ 6193 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 15
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6194 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6195 "10010010" // /* MW 3 */
+ 6196 "00000101" // /* MW 2 */
+ 6197 "00011100" // /* MW 1 */
+ 6198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6199 "00000000" // /* MW 1 */
+ 6200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6201 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id first
+ 6202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6203 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 9 first
+.aggressive_scheduled_block_id 16
+.noswbrkpt
+ 6204 "01001000" // VMUL.f dm1, x9, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6205 "00000001" // /* MW 3 */
+ 6206 "11110010" // /* MW 2 */
+ 6207 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6208 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6209 "00010010" // /* MW 3 */
+ 6210 "01110000" // /* MW 2 */
+ 6211 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 16
+.nohwbrkpt
+.noswbrkpt
+ 6212 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6213 "00111101" // /* MW 3 */
+ 6214 "10000100" // /* MW 2 */
+ 6215 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 16
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6216 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6217 "10010010" // /* MW 3 */
+ 6218 "00000001" // /* MW 2 */
+ 6219 "00011100" // /* MW 1 */
+ 6220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6221 "00000000" // /* MW 1 */
+ 6222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6223 "00000000" // /* MW 1 */
+ 6224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6225 "00000000" // /* MW 1 */
+ 6226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6227 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id first
+ 6228 "11111000" // VMOV lfl1, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6229 "00010010" // /* MW 3 */
+ 6230 "01110000" // /* MW 2 */
+ 6231 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 17
+.noswbrkpt
+ 6232 "01001000" // VADD.f dm2, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6233 "00111101" // /* MW 3 */
+ 6234 "10001000" // /* MW 2 */
+ 6235 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 17
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6236 "11111000" // VMOV bmll4, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6237 "10010010" // /* MW 3 */
+ 6238 "00010101" // /* MW 2 */
+ 6239 "00011100" // /* MW 1 */
+ 6240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6241 "00000000" // /* MW 1 */
+ 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6243 "00000000" // /* MW 1 */
+ 6244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6245 "00000000" // /* MW 1 */
+ 6246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6247 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id first
+ 6248 "11111000" // VMOV lfh1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6249 "00010010" // /* MW 3 */
+ 6250 "01101000" // /* MW 2 */
+ 6251 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6 first
+.aggressive_scheduled_block_id 18
+.noswbrkpt
+ 6252 "01001000" // VADD.f dm2, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6253 "00111101" // /* MW 3 */
+ 6254 "01000100" // /* MW 2 */
+ 6255 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 18
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6256 "11111000" // VMOV bmll2, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6257 "10010010" // /* MW 3 */
+ 6258 "00010001" // /* MW 2 */
+ 6259 "00011010" // /* MW 1 */
+ 6260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6261 "00000000" // /* MW 1 */
+ 6262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6263 "00000000" // /* MW 1 */
+ 6264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6265 "00000000" // /* MW 1 */
+ 6266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6267 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id first
+ 6268 "11111000" // VMOV lfl1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6269 "00010010" // /* MW 3 */
+ 6270 "01101000" // /* MW 2 */
+ 6271 "00011101" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 19
+.noswbrkpt
+ 6272 "01001000" // VADD.f dm0, dm1, dm0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6273 "00111101" // /* MW 3 */
+ 6274 "00100000" // /* MW 2 */
+ 6275 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.aggressive_scheduled_block_id 19
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6276 "11111000" // VMOV bmll1, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6277 "10010010" // /* MW 3 */
+ 6278 "00010101" // /* MW 2 */
+ 6279 "00011001" // /* MW 1 */
+ 6280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6281 "00000000" // /* MW 1 */
+ 6282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6283 "00000000" // /* MW 1 */
+ 6284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6285 "00000000" // /* MW 1 */
+ 6286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6287 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id first
+ 6288 "11111000" // VMOV lfh1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6289 "00010010" // /* MW 3 */
+ 6290 "01100000" // /* MW 2 */
+ 6291 "00011100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6 first
+.aggressive_scheduled_block_id 20
+.noswbrkpt
+ 6292 "01001000" // VADD.f dm0, dm0, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6293 "00111101" // /* MW 3 */
+ 6294 "00001100" // /* MW 2 */
+ 6295 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.aggressive_scheduled_block_id 20
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6296 "11111000" // VMOV bmll0, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6297 "10010010" // /* MW 3 */
+ 6298 "00010001" // /* MW 2 */
+ 6299 "00011000" // /* MW 1 */
+ 6300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6301 "00000000" // /* MW 1 */
+ 6302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6303 "00000000" // /* MW 1 */
+ 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6305 "00000000" // /* MW 1 */
+ 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6307 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 72 first
+.src_ref 7 "accum.hpp" 1108 103 first
+ 6308 "00011000" // VCONV.bf16.fp32 wl11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6309 "00010110" // /* MW 3 */
+ 6310 "11000000" // /* MW 2 */
+ 6311 "00001101" // /* MW 1 */
+ 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6313 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1286 41
+ 6314 "11011000" // VSHIFT x11, x0, x11, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6315 "11111110" // /* MW 3 */
+ 6316 "10000101" // /* MW 2 */
+ 6317 "00011101" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1289 16 first
+ 6318 "00111000" // VSEL.8 x11, x10, x11, r19:r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6319 "11001100" // /* MW 3 */
+ 6320 "11010101" // /* MW 2 */
+ 6321 "00011101" // /* MW 1 */
+ 6322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6323 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98 first
+.src_ref 5 "vector.hpp" 1292 26 first
+ 6324 "00110110" // NOPA; NOPB; VST wh11, [p7, #32]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6325 "01000001" // /* MW 11 */
+ 6326 "01100101" // /* MW 10 */
+ 6327 "10001011" // /* MW 9 */
+ 6328 "00000011" // /* MW 8 */
+ 6329 "00000000" // /* MW 7 */
+ 6330 "00000000" // /* MW 6 */
+ 6331 "00100000" // /* MW 5 */
+ 6332 "00000000" // /* MW 4 */
+ 6333 "11110000" // /* MW 3 */
+ 6334 "00101100" // /* MW 2 */
+ 6335 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19 first
+.end_of_loop
+ 6336 "11100001" // NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6337 "00000000" // /* MW 15 */
+ 6338 "00000000" // /* MW 14 */
+ 6339 "01111000" // /* MW 13 */
+ 6340 "10100101" // /* MW 12 */
+ 6341 "00000001" // /* MW 11 */
+ 6342 "00000000" // /* MW 10 */
+ 6343 "00000000" // /* MW 9 */
+ 6344 "10000000" // /* MW 8 */
+ 6345 "11101010" // /* MW 7 */
+ 6346 "10001010" // /* MW 6 */
+ 6347 "00100111" // /* MW 5 */
+ 6348 "00000000" // /* MW 4 */
+ 6349 "11110000" // /* MW 3 */
+ 6350 "00101100" // /* MW 2 */
+ 6351 "00000000" // /* MW 1 */
+.loop_nesting 0
+ 6352 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */
+ 6353 "00000000" // /* MW 5 */
+ 6354 "00000000" // /* MW 4 */
+ 6355 "01111000" // /* MW 3 */
+ 6356 "00001100" // /* MW 2 */
+ 6357 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6359 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6361 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6363 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6365 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6367 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520
+ 6368 "01011100" // ST dn3, [sp, #-4]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6369 "10000000" // /* MW 5 */
+ 6370 "10110100" // /* MW 4 */
+ 6371 "10110000" // /* MW 3 */
+ 6372 "10110100" // /* MW 2 */
+ 6373 "11111111" // /* MW 1 */
+ 6374 "01111010" // NOPA; ST lr, [sp, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6375 "00000000" // /* MW 9 */
+ 6376 "00000000" // /* MW 8 */
+ 6377 "00000000" // /* MW 7 */
+ 6378 "10000000" // /* MW 6 */
+ 6379 "00111101" // /* MW 5 */
+ 6380 "11111000" // /* MW 4 */
+ 6381 "11110111" // /* MW 3 */
+ 6382 "00101100" // /* MW 2 */
+ 6383 "00000000" // /* MW 1 */
+.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 2 "reduce_base_c8.h" 352 30 first
+ 6384 "00011000" // ADD.NC p7, r3, #34 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6385 "10010001" // /* MW 3 */
+ 6386 "01100001" // /* MW 2 */
+ 6387 "00011111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+ 6388 "11010100" // LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6389 "11000001" // /* MW 5 */
+ 6390 "01100100" // /* MW 4 */
+ 6391 "01011011" // /* MW 3 */
+ 6392 "10001111" // /* MW 2 */
+ 6393 "11100000" // /* MW 1 */
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id first
+ 6394 "11111000" // MOV crSCDEn, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6395 "01100000" // /* MW 3 */
+ 6396 "01111011" // /* MW 2 */
+ 6397 "00011000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+.aggressive_scheduled_block_id 21
+.noswbrkpt
+ 6398 "00011000" // ST.s16 r3, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6399 "01110111" // /* MW 3 */
+ 6400 "00000100" // /* MW 2 */
+ 6401 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 353 57 first
+.aggressive_scheduled_block_id 21
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6402 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 6403 "00000001" // /* MW 5 */
+ 6404 "00000000" // /* MW 4 */
+ 6405 "11111000" // /* MW 3 */
+ 6406 "00010011" // /* MW 2 */
+ 6407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 21
+.nohwbrkpt
+.noswbrkpt
+ 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6413 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30 first
+.delay_slot
+.aggressive_scheduled_block_id 21
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6414 "00011000" // ADD r3, r3, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6415 "00000111" // /* MW 3 */
+ 6416 "11000110" // /* MW 2 */
+ 6417 "00010000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 352 30
+.delay_slot
+ 6418 "01111110" // NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6419 "01100000" // /* MW 13 */
+ 6420 "00101011" // /* MW 12 */
+ 6421 "00000000" // /* MW 11 */
+ 6422 "10101111" // /* MW 10 */
+ 6423 "00110100" // /* MW 9 */
+ 6424 "00000000" // /* MW 8 */
+ 6425 "10110000" // /* MW 7 */
+ 6426 "11000000" // /* MW 6 */
+ 6427 "00100000" // /* MW 5 */
+ 6428 "00000000" // /* MW 4 */
+ 6429 "11110000" // /* MW 3 */
+ 6430 "00101100" // /* MW 2 */
+ 6431 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4
+.return_address
+ 6432 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6433 "00111001" // /* MW 3 */
+ 6434 "11111000" // /* MW 2 */
+ 6435 "00000111" // /* MW 1 */
+ 6436 "00011000" // LDA p1, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6437 "10011001" // /* MW 3 */
+ 6438 "11111100" // /* MW 2 */
+ 6439 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 353 23 first
+ 6440 "00011000" // ST.s16 r3, [p7, #10] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6441 "01110111" // /* MW 3 */
+ 6442 "01010100" // /* MW 2 */
+ 6443 "00000111" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4 first
+ 6444 "11000100" // PADDXM [sp], #-256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6445 "00000001" // /* MW 5 */
+ 6446 "00000000" // /* MW 4 */
+ 6447 "00000000" // /* MW 3 */
+ 6448 "11100000" // /* MW 2 */
+ 6449 "11111111" // /* MW 1 */
+ 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6451 "00000000" // /* MW 1 */
+ 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6453 "00000000" // /* MW 1 */
+ 6454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6455 "00000000" // /* MW 1 */
+.src_ref 2 "reduce_base_c8.h" 420 4
+ 6456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 6457 "00000000" // /* MW 3 */
+ 6458 "00101000" // /* MW 2 */
+ 6459 "00010000" // /* MW 1 */
+.delay_slot
+ 6460 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6461 "11000000" // /* MW 3 */
+ 6462 "01100010" // /* MW 2 */
+ 6463 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6470 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6471 "01111110" // /* MW 9 */
+ 6472 "10100101" // /* MW 8 */
+ 6473 "00000001" // /* MW 7 */
+ 6474 "00000000" // /* MW 6 */
+ 6475 "00010000" // /* MW 5 */
+ 6476 "00000000" // /* MW 4 */
+ 6477 "11110000" // /* MW 3 */
+ 6478 "00101100" // /* MW 2 */
+ 6479 "00000000" // /* MW 1 */
+.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 5 "blend.hpp" 163 48
+ 6480 "10111010" // MOVA r20, #255; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 6481 "00100000" // /* MW 9 */
+ 6482 "00000000" // /* MW 8 */
+ 6483 "00000000" // /* MW 7 */
+ 6484 "10111000" // /* MW 6 */
+ 6485 "00000010" // /* MW 5 */
+ 6486 "00000000" // /* MW 4 */
+ 6487 "00000000" // /* MW 3 */
+ 6488 "11110100" // /* MW 2 */
+ 6489 "00011111" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 6490 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6491 "00000001" // /* MW 3 */
+ 6492 "00101010" // /* MW 2 */
+ 6493 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6495 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6500 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6501 "10000001" // /* MW 11 */
+ 6502 "10101101" // /* MW 10 */
+ 6503 "00000000" // /* MW 9 */
+ 6504 "00000000" // /* MW 8 */
+ 6505 "00000000" // /* MW 7 */
+ 6506 "00000000" // /* MW 6 */
+ 6507 "00100000" // /* MW 5 */
+ 6508 "00000000" // /* MW 4 */
+ 6509 "11110000" // /* MW 3 */
+ 6510 "00101100" // /* MW 2 */
+ 6511 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6512 "00011000" // MOVX r5, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6513 "00010101" // /* MW 3 */
+ 6514 "00001010" // /* MW 2 */
+ 6515 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first
+ 6516 "10011000" // EQ r5, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6517 "01100111" // /* MW 3 */
+ 6518 "01001010" // /* MW 2 */
+ 6519 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6520 "10000100" // JNZ r5, #7264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7264 delay_slots=5 */
+ 6521 "00000001" // /* MW 5 */
+ 6522 "01000000" // /* MW 4 */
+ 6523 "00110000" // /* MW 3 */
+ 6524 "00001110" // /* MW 2 */
+ 6525 "00101000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6527 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6535 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6536 "00011000" // MOVX r7, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6537 "00011001" // /* MW 3 */
+ 6538 "00001110" // /* MW 2 */
+ 6539 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6540 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6541 "01100111" // /* MW 3 */
+ 6542 "11001110" // /* MW 2 */
+ 6543 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 6544 "10000100" // JNZ r7, #7504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7504 delay_slots=5 */
+ 6545 "00000001" // /* MW 5 */
+ 6546 "01000000" // /* MW 4 */
+ 6547 "10101000" // /* MW 3 */
+ 6548 "00001110" // /* MW 2 */
+ 6549 "00111000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.delay_slot
+ 6550 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6551 "01000001" // /* MW 3 */
+ 6552 "00001010" // /* MW 2 */
+ 6553 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6555 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6557 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6559 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6561 "00000000" // /* MW 15 */
+ 6562 "00000000" // /* MW 14 */
+ 6563 "01111000" // /* MW 13 */
+ 6564 "10100101" // /* MW 12 */
+ 6565 "00000001" // /* MW 11 */
+ 6566 "00000000" // /* MW 10 */
+ 6567 "00000000" // /* MW 9 */
+ 6568 "00000000" // /* MW 8 */
+ 6569 "01011011" // /* MW 7 */
+ 6570 "00000001" // /* MW 6 */
+ 6571 "00100000" // /* MW 5 */
+ 6572 "00000000" // /* MW 4 */
+ 6573 "11110000" // /* MW 3 */
+ 6574 "00101100" // /* MW 2 */
+ 6575 "00000000" // /* MW 1 */
+.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first
+.src_ref 3 "reduce_mean_c8_impl.h" 202 30
+ 6576 "10111010" // LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6577 "01111000" // /* MW 9 */
+ 6578 "11110000" // /* MW 8 */
+ 6579 "01100000" // /* MW 7 */
+ 6580 "11101010" // /* MW 6 */
+ 6581 "00010000" // /* MW 5 */
+ 6582 "00000001" // /* MW 4 */
+ 6583 "01010000" // /* MW 3 */
+ 6584 "00011110" // /* MW 2 */
+ 6585 "01001000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 202 30 first
+ 6586 "01100100" // NE r6, r17, r6; MOV r17, #257 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6587 "00000101" // /* MW 5 */
+ 6588 "10100100" // /* MW 4 */
+ 6589 "00011000" // /* MW 3 */
+ 6590 "10001101" // /* MW 2 */
+ 6591 "10001001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 202 12
+ 6592 "10000100" // JNZ r6, #7232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7232 delay_slots=5 */
+ 6593 "00000001" // /* MW 5 */
+ 6594 "01000000" // /* MW 4 */
+ 6595 "00100000" // /* MW 3 */
+ 6596 "00001110" // /* MW 2 */
+ 6597 "00110000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6599 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6601 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6603 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6605 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49 first
+.delay_slot
+ 6606 "10011000" // ASHL r5, r7, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6607 "01011110" // /* MW 3 */
+ 6608 "11001010" // /* MW 2 */
+ 6609 "00010001" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 7 "accum.hpp" 199 120
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first
+.src_ref 3 "reduce_mean_c8_impl.h" 206 35
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22
+ 6610 "01110110" // MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6611 "00010000" // /* MW 11 */
+ 6612 "00001000" // /* MW 10 */
+ 6613 "01111101" // /* MW 9 */
+ 6614 "00000100" // /* MW 8 */
+ 6615 "00000000" // /* MW 7 */
+ 6616 "00000000" // /* MW 6 */
+ 6617 "10001011" // /* MW 5 */
+ 6618 "10000100" // /* MW 4 */
+ 6619 "10000000" // /* MW 3 */
+ 6620 "10001010" // /* MW 2 */
+ 6621 "00000100" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9
+.src_ref 3 "reduce_mean_c8_impl.h" 206 35
+ 6622 "01110110" // LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6623 "00010000" // /* MW 11 */
+ 6624 "00111000" // /* MW 10 */
+ 6625 "10111101" // /* MW 9 */
+ 6626 "00000101" // /* MW 8 */
+ 6627 "00000000" // /* MW 7 */
+ 6628 "10000000" // /* MW 6 */
+ 6629 "10100101" // /* MW 5 */
+ 6630 "11111101" // /* MW 4 */
+ 6631 "11010111" // /* MW 3 */
+ 6632 "00011110" // /* MW 2 */
+ 6633 "01001000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first
+ 6634 "10011000" // VLDA bmll2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6635 "00010101" // /* MW 3 */
+ 6636 "00011101" // /* MW 2 */
+ 6637 "00000000" // /* MW 1 */
+ 6638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6639 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+ 6640 "11111000" // VMOV bmhh4, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6641 "10010010" // /* MW 3 */
+ 6642 "11000010" // /* MW 2 */
+ 6643 "00011100" // /* MW 1 */
+ 6644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6645 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 199 120
+.src_ref 5 "add.hpp" 28 49 first
+ 6646 "01100010" // VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6647 "00111101" // /* MW 7 */
+ 6648 "01101000" // /* MW 6 */
+ 6649 "00010001" // /* MW 5 */
+ 6650 "11100110" // /* MW 4 */
+ 6651 "00010010" // /* MW 3 */
+ 6652 "00010011" // /* MW 2 */
+ 6653 "00000011" // /* MW 1 */
+ 6654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6655 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first
+ 6656 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6657 "00000000" // /* MW 15 */
+ 6658 "00000000" // /* MW 14 */
+ 6659 "11001000" // /* MW 13 */
+ 6660 "11111111" // /* MW 12 */
+ 6661 "10111001" // /* MW 11 */
+ 6662 "00000010" // /* MW 10 */
+ 6663 "00000000" // /* MW 9 */
+ 6664 "00000000" // /* MW 8 */
+ 6665 "01011011" // /* MW 7 */
+ 6666 "00000001" // /* MW 6 */
+ 6667 "00100000" // /* MW 5 */
+ 6668 "00000000" // /* MW 4 */
+ 6669 "11110000" // /* MW 3 */
+ 6670 "00101100" // /* MW 2 */
+ 6671 "00000000" // /* MW 1 */
+.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824
+.src_ref 5 "vector.hpp" 1139 17 first
+.src_ref 7 "accum.hpp" 199 120 first
+.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first
+.begin_of_loop
+.loop_nesting 1
+ 6672 "11100001" // VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6673 "00000000" // /* MW 15 */
+ 6674 "00000000" // /* MW 14 */
+ 6675 "01111000" // /* MW 13 */
+ 6676 "10100101" // /* MW 12 */
+ 6677 "00000001" // /* MW 11 */
+ 6678 "00000000" // /* MW 10 */
+ 6679 "00000000" // /* MW 9 */
+ 6680 "00000000" // /* MW 8 */
+ 6681 "01011011" // /* MW 7 */
+ 6682 "00000001" // /* MW 6 */
+ 6683 "00100000" // /* MW 5 */
+ 6684 "00000000" // /* MW 4 */
+ 6685 "10110000" // /* MW 3 */
+ 6686 "10100010" // /* MW 2 */
+ 6687 "00000011" // /* MW 1 */
+ 6688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6689 "00000000" // /* MW 15 */
+ 6690 "00000000" // /* MW 14 */
+ 6691 "01111000" // /* MW 13 */
+ 6692 "10100101" // /* MW 12 */
+ 6693 "00000001" // /* MW 11 */
+ 6694 "00000000" // /* MW 10 */
+ 6695 "00000000" // /* MW 9 */
+ 6696 "00000000" // /* MW 8 */
+ 6697 "01011011" // /* MW 7 */
+ 6698 "00000001" // /* MW 6 */
+ 6699 "00100000" // /* MW 5 */
+ 6700 "00000000" // /* MW 4 */
+ 6701 "11110000" // /* MW 3 */
+ 6702 "00101100" // /* MW 2 */
+ 6703 "00000000" // /* MW 1 */
+ 6704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6705 "00000000" // /* MW 15 */
+ 6706 "00000000" // /* MW 14 */
+ 6707 "01111000" // /* MW 13 */
+ 6708 "10100101" // /* MW 12 */
+ 6709 "00000001" // /* MW 11 */
+ 6710 "00000000" // /* MW 10 */
+ 6711 "00000000" // /* MW 9 */
+ 6712 "00000000" // /* MW 8 */
+ 6713 "01011011" // /* MW 7 */
+ 6714 "00000001" // /* MW 6 */
+ 6715 "00100000" // /* MW 5 */
+ 6716 "00000000" // /* MW 4 */
+ 6717 "11110000" // /* MW 3 */
+ 6718 "00101100" // /* MW 2 */
+ 6719 "00000000" // /* MW 1 */
+ 6720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6721 "00000000" // /* MW 15 */
+ 6722 "00000000" // /* MW 14 */
+ 6723 "01111000" // /* MW 13 */
+ 6724 "10100101" // /* MW 12 */
+ 6725 "00000001" // /* MW 11 */
+ 6726 "00000000" // /* MW 10 */
+ 6727 "00000000" // /* MW 9 */
+ 6728 "00000000" // /* MW 8 */
+ 6729 "01011011" // /* MW 7 */
+ 6730 "00000001" // /* MW 6 */
+ 6731 "00100000" // /* MW 5 */
+ 6732 "00000000" // /* MW 4 */
+ 6733 "11110000" // /* MW 3 */
+ 6734 "00101100" // /* MW 2 */
+ 6735 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 150 115 first
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id first
+ 6736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6737 "00000000" // /* MW 15 */
+ 6738 "00000000" // /* MW 14 */
+ 6739 "01111000" // /* MW 13 */
+ 6740 "00001001" // /* MW 12 */
+ 6741 "01100010" // /* MW 11 */
+ 6742 "00000010" // /* MW 10 */
+ 6743 "00000000" // /* MW 9 */
+ 6744 "00000000" // /* MW 8 */
+ 6745 "01011011" // /* MW 7 */
+ 6746 "00000001" // /* MW 6 */
+ 6747 "00100000" // /* MW 5 */
+ 6748 "00000000" // /* MW 4 */
+ 6749 "11110000" // /* MW 3 */
+ 6750 "00101100" // /* MW 2 */
+ 6751 "00000000" // /* MW 1 */
+.src_ref 5 "add.hpp" 28 49 first
+.aggressive_scheduled_block_id 22
+.noswbrkpt
+ 6752 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6753 "01000001" // /* MW 15 */
+ 6754 "10001011" // /* MW 14 */
+ 6755 "01111000" // /* MW 13 */
+ 6756 "10100101" // /* MW 12 */
+ 6757 "00000001" // /* MW 11 */
+ 6758 "00000000" // /* MW 10 */
+ 6759 "00000000" // /* MW 9 */
+ 6760 "00000000" // /* MW 8 */
+ 6761 "01011011" // /* MW 7 */
+ 6762 "00000001" // /* MW 6 */
+ 6763 "00100000" // /* MW 5 */
+ 6764 "00000000" // /* MW 4 */
+ 6765 "11110000" // /* MW 3 */
+ 6766 "00101100" // /* MW 2 */
+ 6767 "00000000" // /* MW 1 */
+.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920
+.src_ref 7 "accum.hpp" 199 120 first
+.end_of_loop
+.aggressive_scheduled_block_id 22
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6768 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6769 "00000000" // /* MW 15 */
+ 6770 "00000000" // /* MW 14 */
+ 6771 "01111000" // /* MW 13 */
+ 6772 "10001001" // /* MW 12 */
+ 6773 "10001001" // /* MW 11 */
+ 6774 "00000001" // /* MW 10 */
+ 6775 "00000000" // /* MW 9 */
+ 6776 "00000000" // /* MW 8 */
+ 6777 "01011011" // /* MW 7 */
+ 6778 "00000001" // /* MW 6 */
+ 6779 "00100000" // /* MW 5 */
+ 6780 "00000000" // /* MW 4 */
+ 6781 "11110000" // /* MW 3 */
+ 6782 "00101100" // /* MW 2 */
+ 6783 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id first
+.loop_nesting 0
+ 6784 "10111010" // MOVA r16, #16; MOVXM p7, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6785 "00010000" // /* MW 9 */
+ 6786 "01111000" // /* MW 8 */
+ 6787 "10110010" // /* MW 7 */
+ 6788 "11110011" // /* MW 6 */
+ 6789 "00000001" // /* MW 5 */
+ 6790 "00000000" // /* MW 4 */
+ 6791 "00000000" // /* MW 3 */
+ 6792 "00010000" // /* MW 2 */
+ 6793 "00000010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 5 "vector.hpp" 915 23
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6794 "10111010" // LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6795 "01011000" // /* MW 9 */
+ 6796 "00000001" // /* MW 8 */
+ 6797 "10011000" // /* MW 7 */
+ 6798 "00001000" // /* MW 6 */
+ 6799 "01100001" // /* MW 5 */
+ 6800 "00000000" // /* MW 4 */
+ 6801 "01010000" // /* MW 3 */
+ 6802 "10010000" // /* MW 2 */
+ 6803 "11100000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6804 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6805 "00000101" // /* MW 3 */
+ 6806 "00100010" // /* MW 2 */
+ 6807 "00010000" // /* MW 1 */
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6809 "00000000" // /* MW 1 */
+.src_ref 7 "accum.hpp" 150 115 first
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6810 "11111000" // VMOV bmhh4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6811 "00010010" // /* MW 3 */
+ 6812 "11000100" // /* MW 2 */
+ 6813 "00011100" // /* MW 1 */
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6815 "00000000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6816 "11111000" // VMOV x2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6817 "00010010" // /* MW 3 */
+ 6818 "00110011" // /* MW 2 */
+ 6819 "00011001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.aggressive_scheduled_block_id 23
+.nohwbrkpt
+.noswbrkpt
+ 6820 "11011000" // VSHIFT x2, x2, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6821 "00010010" // /* MW 3 */
+ 6822 "00010000" // /* MW 2 */
+ 6823 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "vector.hpp" 1159 33
+.src_ref 7 "accum.hpp" 198 120 first
+.src_ref 7 "accum.hpp" 1108 103
+.src_ref 5 "add_reduce.hpp" 324 44 first
+.aggressive_scheduled_block_id 23
+.noswbrkpt
+ 6824 "01011010" // MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6825 "00111101" // /* MW 9 */
+ 6826 "01000000" // /* MW 8 */
+ 6827 "00010000" // /* MW 7 */
+ 6828 "00101111" // /* MW 6 */
+ 6829 "01001001" // /* MW 5 */
+ 6830 "00000000" // /* MW 4 */
+ 6831 "10000000" // /* MW 3 */
+ 6832 "00111010" // /* MW 2 */
+ 6833 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120
+.aggressive_scheduled_block_id 23
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6834 "11111000" // VMOV bmll2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6835 "00010010" // /* MW 3 */
+ 6836 "00010011" // /* MW 2 */
+ 6837 "00011010" // /* MW 1 */
+.src_ref 5 "broadcast.hpp" 80 25 first
+ 6838 "11111000" // VBCST.32 x2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6839 "01110010" // /* MW 3 */
+ 6840 "00010110" // /* MW 2 */
+ 6841 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+ 6842 "11111000" // VMOV bmll1, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6843 "10010010" // /* MW 3 */
+ 6844 "00000100" // /* MW 2 */
+ 6845 "00011001" // /* MW 1 */
+ 6846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6847 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+ 6848 "11111000" // VMOV bmll2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6849 "00010010" // /* MW 3 */
+ 6850 "00000100" // /* MW 2 */
+ 6851 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id first
+ 6852 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6853 "00010010" // /* MW 3 */
+ 6854 "00100000" // /* MW 2 */
+ 6855 "00011001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 24
+.noswbrkpt
+ 6856 "01100010" // VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6857 "00111101" // /* MW 7 */
+ 6858 "00001100" // /* MW 6 */
+ 6859 "00010000" // /* MW 5 */
+ 6860 "11000110" // /* MW 4 */
+ 6861 "01000010" // /* MW 3 */
+ 6862 "00010000" // /* MW 2 */
+ 6863 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 24
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6864 "11111000" // VMOV bmll3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6865 "10010010" // /* MW 3 */
+ 6866 "00000100" // /* MW 2 */
+ 6867 "00011011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 6868 "11111000" // VMOV x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6869 "10010010" // /* MW 3 */
+ 6870 "00100000" // /* MW 2 */
+ 6871 "00011001" // /* MW 1 */
+ 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6873 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 112 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 112 19 first
+ 6874 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6875 "10010110" // /* MW 3 */
+ 6876 "01000000" // /* MW 2 */
+ 6877 "00001000" // /* MW 1 */
+ 6878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6879 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id first
+ 6880 "01100010" // VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6881 "10000011" // /* MW 7 */
+ 6882 "01000000" // /* MW 6 */
+ 6883 "00010100" // /* MW 5 */
+ 6884 "11100110" // /* MW 4 */
+ 6885 "00010010" // /* MW 3 */
+ 6886 "10100000" // /* MW 2 */
+ 6887 "00000001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 25
+.noswbrkpt
+ 6888 "01100010" // VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6889 "00111101" // /* MW 7 */
+ 6890 "00001000" // /* MW 6 */
+ 6891 "00010000" // /* MW 5 */
+ 6892 "11000110" // /* MW 4 */
+ 6893 "00011010" // /* MW 3 */
+ 6894 "10011000" // /* MW 2 */
+ 6895 "00000001" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 25
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6896 "11111000" // VMOV bmll2, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6897 "10010010" // /* MW 3 */
+ 6898 "00000110" // /* MW 2 */
+ 6899 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 6900 "11111000" // VMOV x3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6901 "10010010" // /* MW 3 */
+ 6902 "10100100" // /* MW 2 */
+ 6903 "00011001" // /* MW 1 */
+ 6904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6905 "00000000" // /* MW 1 */
+ 6906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6907 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 113 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 114 19 first
+ 6908 "00011000" // VCONV.bf16.fp32 wl2, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6909 "00010110" // /* MW 3 */
+ 6910 "01000010" // /* MW 2 */
+ 6911 "00001001" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id first
+ 6912 "11111000" // VMOV x5, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6913 "00010010" // /* MW 3 */
+ 6914 "10100000" // /* MW 2 */
+ 6915 "00011010" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 322 47 first
+.src_ref 5 "add_reduce.hpp" 324 44
+.aggressive_scheduled_block_id 26
+.noswbrkpt
+ 6916 "01100010" // VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6917 "00111101" // /* MW 7 */
+ 6918 "00001000" // /* MW 6 */
+ 6919 "00010000" // /* MW 5 */
+ 6920 "11000110" // /* MW 4 */
+ 6921 "00000010" // /* MW 3 */
+ 6922 "00101000" // /* MW 2 */
+ 6923 "00000011" // /* MW 1 */
+.src_ref 7 "accum.hpp" 198 120 first
+.aggressive_scheduled_block_id 26
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6924 "11111000" // VMOV bmll2, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6925 "10010010" // /* MW 3 */
+ 6926 "00001100" // /* MW 2 */
+ 6927 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6928 "11111000" // VMOV x5, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6929 "10010010" // /* MW 3 */
+ 6930 "10100110" // /* MW 2 */
+ 6931 "00011010" // /* MW 1 */
+ 6932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6933 "00000000" // /* MW 1 */
+ 6934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6935 "00000000" // /* MW 1 */
+ 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6937 "00000000" // /* MW 1 */
+.src_ref 5 "add_reduce.hpp" 324 22 first
+ 6938 "11111000" // VMOV x6, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6939 "00010010" // /* MW 3 */
+ 6940 "00100000" // /* MW 2 */
+ 6941 "00011011" // /* MW 1 */
+.src_ref 5 "vector.hpp" 915 23 first
+ 6942 "10111000" // VEXTRACT.32 r0, x6, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6943 "00000001" // /* MW 3 */
+ 6944 "00011010" // /* MW 2 */
+ 6945 "00011000" // /* MW 1 */
+ 6946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6947 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 856 23 first
+ 6948 "01111000" // VINSERT.32 x6, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6949 "00010001" // /* MW 3 */
+ 6950 "00000000" // /* MW 2 */
+ 6951 "00011011" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36 first
+ 6952 "00111000" // VSEL.32 x1, x1, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6953 "00001000" // /* MW 3 */
+ 6954 "10001011" // /* MW 2 */
+ 6955 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+ 6956 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6957 "10010010" // /* MW 3 */
+ 6958 "00000010" // /* MW 2 */
+ 6959 "00011010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6960 "11111000" // VMOV x1, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6961 "10010010" // /* MW 3 */
+ 6962 "10101010" // /* MW 2 */
+ 6963 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 108 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 108 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+ 6964 "00000010" // VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6965 "01110000" // /* MW 7 */
+ 6966 "01001001" // /* MW 6 */
+ 6967 "10010001" // /* MW 5 */
+ 6968 "00000001" // /* MW 4 */
+ 6969 "11000000" // /* MW 3 */
+ 6970 "00100010" // /* MW 2 */
+ 6971 "01011000" // /* MW 1 */
+ 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6973 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+ 6974 "01001000" // VMSC.f dm1, dm2, x5, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6975 "10000011" // /* MW 3 */
+ 6976 "01001010" // /* MW 2 */
+ 6977 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 9 first
+ 6978 "01001000" // VMUL.f dm0, x5, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6979 "01000001" // /* MW 3 */
+ 6980 "11101010" // /* MW 2 */
+ 6981 "00010000" // /* MW 1 */
+ 6982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6983 "00000000" // /* MW 1 */
+ 6984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6985 "00000000" // /* MW 1 */
+ 6986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6987 "00000000" // /* MW 1 */
+ 6988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6989 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 110 19 first
+ 6990 "00011000" // VCONV.bf16.fp32 wl1, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6991 "10010110" // /* MW 3 */
+ 6992 "11000000" // /* MW 2 */
+ 6993 "00001000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 6994 "01001000" // VMSC.f dm4, dm4, x2, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6995 "10000011" // /* MW 3 */
+ 6996 "10000100" // /* MW 2 */
+ 6997 "00010100" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 6998 "01001000" // VMSC.f dm3, dm1, x1, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6999 "10000011" // /* MW 3 */
+ 7000 "00100010" // /* MW 2 */
+ 7001 "00010011" // /* MW 1 */
+ 7002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7003 "00000000" // /* MW 1 */
+ 7004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7005 "00000000" // /* MW 1 */
+ 7006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7007 "00000000" // /* MW 1 */
+ 7008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7009 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 115 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 115 34 first
+ 7010 "00011000" // VCONV.bf16.fp32 wl3, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7011 "00010110" // /* MW 3 */
+ 7012 "11000010" // /* MW 2 */
+ 7013 "00001001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 111 6 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 19 first
+.src_ref 6 "me_vmult_float_emulated.h" 111 34 first
+ 7014 "00011000" // VCONV.bf16.fp32 wl6, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7015 "10010110" // /* MW 3 */
+ 7016 "01000001" // /* MW 2 */
+ 7017 "00001011" // /* MW 1 */
+ 7018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7019 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+ 7020 "01001000" // VMUL.f dm2, x6, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7021 "01100001" // /* MW 3 */
+ 7022 "11101100" // /* MW 2 */
+ 7023 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 9 first
+ 7024 "01001000" // VMUL.f dm3, x6, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7025 "01000001" // /* MW 3 */
+ 7026 "11101100" // /* MW 2 */
+ 7027 "00010011" // /* MW 1 */
+ 7028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7029 "00000000" // /* MW 1 */
+ 7030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7031 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id first
+ 7032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7033 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 9 first
+.aggressive_scheduled_block_id 27
+.noswbrkpt
+ 7034 "01001000" // VMUL.f dm3, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7035 "01100001" // /* MW 3 */
+ 7036 "11100010" // /* MW 2 */
+ 7037 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 117 42 first
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7038 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7039 "00010010" // /* MW 3 */
+ 7040 "01101000" // /* MW 2 */
+ 7041 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 27
+.nohwbrkpt
+.noswbrkpt
+ 7042 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7043 "00111101" // /* MW 3 */
+ 7044 "01001100" // /* MW 2 */
+ 7045 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6
+.aggressive_scheduled_block_id 27
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7046 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7047 "10010010" // /* MW 3 */
+ 7048 "00000101" // /* MW 2 */
+ 7049 "00011010" // /* MW 1 */
+ 7050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7051 "00000000" // /* MW 1 */
+ 7052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7053 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id first
+ 7054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7055 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 9 first
+.aggressive_scheduled_block_id 28
+.noswbrkpt
+ 7056 "01001000" // VMUL.f dm3, x5, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7057 "01100001" // /* MW 3 */
+ 7058 "11101010" // /* MW 2 */
+ 7059 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 118 6 first
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7060 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7061 "00010010" // /* MW 3 */
+ 7062 "01101000" // /* MW 2 */
+ 7063 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 28
+.nohwbrkpt
+.noswbrkpt
+ 7064 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7065 "00111101" // /* MW 3 */
+ 7066 "01001100" // /* MW 2 */
+ 7067 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6
+.aggressive_scheduled_block_id 28
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7068 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7069 "10010010" // /* MW 3 */
+ 7070 "00000001" // /* MW 2 */
+ 7071 "00011010" // /* MW 1 */
+ 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7073 "00000000" // /* MW 1 */
+ 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7075 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id first
+ 7076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7077 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 9 first
+.aggressive_scheduled_block_id 29
+.noswbrkpt
+ 7078 "01001000" // VMUL.f dm3, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7079 "01000001" // /* MW 3 */
+ 7080 "11100010" // /* MW 2 */
+ 7081 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 119 6 first
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7082 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7083 "00010010" // /* MW 3 */
+ 7084 "01101000" // /* MW 2 */
+ 7085 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6 first
+.aggressive_scheduled_block_id 29
+.nohwbrkpt
+.noswbrkpt
+ 7086 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7087 "00111101" // /* MW 3 */
+ 7088 "01001100" // /* MW 2 */
+ 7089 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 29
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7090 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7091 "10010010" // /* MW 3 */
+ 7092 "00000101" // /* MW 2 */
+ 7093 "00011010" // /* MW 1 */
+ 7094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7095 "00000000" // /* MW 1 */
+ 7096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7097 "00000000" // /* MW 1 */
+ 7098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7099 "00000000" // /* MW 1 */
+ 7100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7101 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 120 6
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id first
+ 7102 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7103 "00010010" // /* MW 3 */
+ 7104 "01101000" // /* MW 2 */
+ 7105 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 30
+.noswbrkpt
+ 7106 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7107 "00111101" // /* MW 3 */
+ 7108 "01001100" // /* MW 2 */
+ 7109 "00010010" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6
+.aggressive_scheduled_block_id 30
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7110 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7111 "10010010" // /* MW 3 */
+ 7112 "00000001" // /* MW 2 */
+ 7113 "00011010" // /* MW 1 */
+ 7114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7115 "00000000" // /* MW 1 */
+ 7116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7117 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 9 first
+ 7118 "01001000" // VMUL.f dm3, x0, x6, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7119 "11000001" // /* MW 3 */
+ 7120 "11100000" // /* MW 2 */
+ 7121 "00010011" // /* MW 1 */
+ 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7123 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 121 6 first
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id first
+ 7124 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7125 "00010010" // /* MW 3 */
+ 7126 "01101000" // /* MW 2 */
+ 7127 "00011001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 31
+.noswbrkpt
+ 7128 "01001000" // VADD.f dm3, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7129 "00111101" // /* MW 3 */
+ 7130 "01001100" // /* MW 2 */
+ 7131 "00010011" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6
+.aggressive_scheduled_block_id 31
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7132 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7133 "10010010" // /* MW 3 */
+ 7134 "00000101" // /* MW 2 */
+ 7135 "00011010" // /* MW 1 */
+ 7136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7137 "00000000" // /* MW 1 */
+ 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7139 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 9 first
+ 7140 "01001000" // VMUL.f dm1, x1, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7141 "00000001" // /* MW 3 */
+ 7142 "11100010" // /* MW 2 */
+ 7143 "00010001" // /* MW 1 */
+ 7144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7145 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 122 6 first
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id first
+ 7146 "11111000" // VMOV lfh0, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7147 "00010010" // /* MW 3 */
+ 7148 "01101100" // /* MW 2 */
+ 7149 "00011000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6 first
+.aggressive_scheduled_block_id 32
+.noswbrkpt
+ 7150 "01001000" // VADD.f dm1, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7151 "00111101" // /* MW 3 */
+ 7152 "01000100" // /* MW 2 */
+ 7153 "00010001" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.aggressive_scheduled_block_id 32
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7154 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7155 "10010010" // /* MW 3 */
+ 7156 "00000001" // /* MW 2 */
+ 7157 "00011010" // /* MW 1 */
+ 7158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7159 "00000000" // /* MW 1 */
+ 7160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7161 "00000000" // /* MW 1 */
+ 7162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7163 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id first
+ 7164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7165 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 123 6
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 33
+.noswbrkpt
+ 7166 "01100010" // VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7167 "00111101" // /* MW 7 */
+ 7168 "01000000" // /* MW 6 */
+ 7169 "00010000" // /* MW 5 */
+ 7170 "11100110" // /* MW 4 */
+ 7171 "00010010" // /* MW 3 */
+ 7172 "00100100" // /* MW 2 */
+ 7173 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6
+.src_ref 6 "me_vmult_float_emulated.h" 125 9 first
+.aggressive_scheduled_block_id 33
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7174 "01100010" // VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7175 "00000001" // /* MW 7 */
+ 7176 "11101010" // /* MW 6 */
+ 7177 "00010100" // /* MW 5 */
+ 7178 "11100110" // /* MW 4 */
+ 7179 "10010010" // /* MW 3 */
+ 7180 "00000000" // /* MW 2 */
+ 7181 "00000010" // /* MW 1 */
+ 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7183 "00000000" // /* MW 1 */
+ 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7185 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id first
+ 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7187 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6
+.aggressive_scheduled_block_id 34
+.noswbrkpt
+ 7188 "01001000" // VADD.f dm0, dm2, dm4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7189 "00111101" // /* MW 3 */
+ 7190 "01010000" // /* MW 2 */
+ 7191 "00010000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 124 6 first
+.aggressive_scheduled_block_id 34
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7193 "00010010" // /* MW 3 */
+ 7194 "00000000" // /* MW 2 */
+ 7195 "00011010" // /* MW 1 */
+ 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7197 "00000000" // /* MW 1 */
+ 7198 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */
+ 7199 "00000000" // /* MW 5 */
+ 7200 "00000000" // /* MW 4 */
+ 7201 "01111000" // /* MW 3 */
+ 7202 "00001100" // /* MW 2 */
+ 7203 "00000000" // /* MW 1 */
+.delay_slot
+ 7204 "10011000" // ST dc4, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7205 "01100101" // /* MW 3 */
+ 7206 "11111010" // /* MW 2 */
+ 7207 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7209 "00000000" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 125 6 first
+.delay_slot
+ 7210 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7211 "00010010" // /* MW 3 */
+ 7212 "00000000" // /* MW 2 */
+ 7213 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7215 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 1159 33 first
+.src_ref 7 "accum.hpp" 1108 103 first
+.delay_slot
+ 7216 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7217 "00000000" // /* MW 15 */
+ 7218 "00000000" // /* MW 14 */
+ 7219 "01111000" // /* MW 13 */
+ 7220 "10100101" // /* MW 12 */
+ 7221 "00000001" // /* MW 11 */
+ 7222 "00000000" // /* MW 10 */
+ 7223 "00000000" // /* MW 9 */
+ 7224 "10000000" // /* MW 8 */
+ 7225 "00010010" // /* MW 7 */
+ 7226 "00000101" // /* MW 6 */
+ 7227 "00100001" // /* MW 5 */
+ 7228 "00000000" // /* MW 4 */
+ 7229 "11110000" // /* MW 3 */
+ 7230 "00101100" // /* MW 2 */
+ 7231 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384
+.src_ref 5 "blend.hpp" 163 48
+ 7232 "10111010" // MOVA r20, #0; J #5616 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5616 delay_slots=5 */
+ 7233 "00100000" // /* MW 9 */
+ 7234 "00000000" // /* MW 8 */
+ 7235 "00000000" // /* MW 7 */
+ 7236 "10111110" // /* MW 6 */
+ 7237 "00000010" // /* MW 5 */
+ 7238 "00000000" // /* MW 4 */
+ 7239 "00000000" // /* MW 3 */
+ 7240 "00010100" // /* MW 2 */
+ 7241 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7242 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7243 "00000001" // /* MW 3 */
+ 7244 "00101010" // /* MW 2 */
+ 7245 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7247 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7249 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7251 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7252 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7253 "10000001" // /* MW 11 */
+ 7254 "10101101" // /* MW 10 */
+ 7255 "00000000" // /* MW 9 */
+ 7256 "00000000" // /* MW 8 */
+ 7257 "00000000" // /* MW 7 */
+ 7258 "00000000" // /* MW 6 */
+ 7259 "00100000" // /* MW 5 */
+ 7260 "00000000" // /* MW 4 */
+ 7261 "11110000" // /* MW 3 */
+ 7262 "00101100" // /* MW 2 */
+ 7263 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416
+ 7264 "10000100" // J #7456 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */
+ 7265 "00000000" // /* MW 5 */
+ 7266 "00000000" // /* MW 4 */
+ 7267 "10010000" // /* MW 3 */
+ 7268 "00001110" // /* MW 2 */
+ 7269 "00000000" // /* MW 1 */
+.delay_slot
+ 7270 "00000010" // ST p1, [sp, #-4]; MOV dc4, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7271 "01110000" // /* MW 7 */
+ 7272 "11110000" // /* MW 6 */
+ 7273 "01100000" // /* MW 5 */
+ 7274 "00000010" // /* MW 4 */
+ 7275 "10110000" // /* MW 3 */
+ 7276 "10010011" // /* MW 2 */
+ 7277 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7279 "00000000" // /* MW 1 */
+.delay_slot
+ 7280 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7281 "00110011" // /* MW 3 */
+ 7282 "11110000" // /* MW 2 */
+ 7283 "00001111" // /* MW 1 */
+.delay_slot
+ 7284 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7285 "00110011" // /* MW 3 */
+ 7286 "11110101" // /* MW 2 */
+ 7287 "00001111" // /* MW 1 */
+.delay_slot
+ 7288 "00000010" // VST x1, [sp, #-128]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7289 "01110000" // /* MW 7 */
+ 7290 "10100101" // /* MW 6 */
+ 7291 "00000001" // /* MW 5 */
+ 7292 "00000000" // /* MW 4 */
+ 7293 "01100000" // /* MW 3 */
+ 7294 "00001110" // /* MW 2 */
+ 7295 "11111111" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7296 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7297 "00000101" // /* MW 3 */
+ 7298 "00100010" // /* MW 2 */
+ 7299 "00010000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first
+ 7300 "10011000" // EQ r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7301 "01100111" // /* MW 3 */
+ 7302 "01100010" // /* MW 2 */
+ 7303 "00010100" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7304 "10000100" // JNZ r17, #7456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7456 delay_slots=5 */
+ 7305 "00000001" // /* MW 5 */
+ 7306 "01000000" // /* MW 4 */
+ 7307 "10010000" // /* MW 3 */
+ 7308 "00001110" // /* MW 2 */
+ 7309 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7311 "00000000" // /* MW 1 */
+.delay_slot
+ 7312 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7313 "00110011" // /* MW 3 */
+ 7314 "11110000" // /* MW 2 */
+ 7315 "00001111" // /* MW 1 */
+.delay_slot
+ 7316 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7317 "00110011" // /* MW 3 */
+ 7318 "11110101" // /* MW 2 */
+ 7319 "00001111" // /* MW 1 */
+.delay_slot
+ 7320 "00011000" // VST x1, [sp, #-128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7321 "01110011" // /* MW 3 */
+ 7322 "11111000" // /* MW 2 */
+ 7323 "00001111" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+.delay_slot
+ 7324 "00111010" // ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7325 "01111001" // /* MW 9 */
+ 7326 "11110000" // /* MW 8 */
+ 7327 "01100000" // /* MW 7 */
+ 7328 "01001010" // /* MW 6 */
+ 7329 "01110000" // /* MW 5 */
+ 7330 "00000000" // /* MW 4 */
+ 7331 "10110000" // /* MW 3 */
+ 7332 "10010011" // /* MW 2 */
+ 7333 "11111111" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7334 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7335 "01100111" // /* MW 3 */
+ 7336 "11001110" // /* MW 2 */
+ 7337 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7338 "10000100" // JNZ r7, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */
+ 7339 "00000001" // /* MW 5 */
+ 7340 "01000000" // /* MW 4 */
+ 7341 "10000000" // /* MW 3 */
+ 7342 "00001110" // /* MW 2 */
+ 7343 "00111000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7345 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7347 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7349 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7351 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7353 "00000000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7354 "10011000" // EQ r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7355 "01100111" // /* MW 3 */
+ 7356 "01001110" // /* MW 2 */
+ 7357 "00010001" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 184 32
+ 7358 "10000100" // JNZ r7, #7392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7392 delay_slots=5 */
+ 7359 "00000001" // /* MW 5 */
+ 7360 "01000000" // /* MW 4 */
+ 7361 "01110000" // /* MW 3 */
+ 7362 "00001110" // /* MW 2 */
+ 7363 "00111000" // /* MW 1 */
+.src_ref 3 "reduce_mean_c8_impl.h" 200 49
+.delay_slot
+ 7364 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7365 "01000001" // /* MW 3 */
+ 7366 "00001010" // /* MW 2 */
+ 7367 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7369 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7371 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7373 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7375 "00000000" // /* MW 1 */
+ 7376 "10000100" // J #6576 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6576 delay_slots=5 */
+ 7377 "00000000" // /* MW 5 */
+ 7378 "00000000" // /* MW 4 */
+ 7379 "11011000" // /* MW 3 */
+ 7380 "00001100" // /* MW 2 */
+ 7381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7383 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7385 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7387 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7391 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544
+.src_ref 5 "blend.hpp" 170 36
+ 7392 "10111010" // MOVA r17, #257; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 7393 "00100000" // /* MW 9 */
+ 7394 "00000000" // /* MW 8 */
+ 7395 "00000000" // /* MW 7 */
+ 7396 "10111000" // /* MW 6 */
+ 7397 "00000010" // /* MW 5 */
+ 7398 "00000000" // /* MW 4 */
+ 7399 "00000000" // /* MW 3 */
+ 7400 "00110001" // /* MW 2 */
+ 7401 "00100000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7402 "01100100" // MOVX r21, #0; MOV m4, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7403 "01000001" // /* MW 5 */
+ 7404 "00000000" // /* MW 4 */
+ 7405 "00101000" // /* MW 3 */
+ 7406 "01000000" // /* MW 2 */
+ 7407 "00000101" // /* MW 1 */
+.src_ref 5 "blend.hpp" 163 48
+.delay_slot
+ 7408 "00011000" // MOVX r20, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7409 "00000001" // /* MW 3 */
+ 7410 "00101000" // /* MW 2 */
+ 7411 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7416 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7417 "00011100" // /* MW 7 */
+ 7418 "00000000" // /* MW 6 */
+ 7419 "00000000" // /* MW 5 */
+ 7420 "00000100" // /* MW 4 */
+ 7421 "11110000" // /* MW 3 */
+ 7422 "00101100" // /* MW 2 */
+ 7423 "00000000" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576
+ 7424 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */
+ 7425 "00000000" // /* MW 5 */
+ 7426 "00000000" // /* MW 4 */
+ 7427 "10101000" // /* MW 3 */
+ 7428 "00001100" // /* MW 2 */
+ 7429 "00000000" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7430 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7431 "11111110" // /* MW 5 */
+ 7432 "10111111" // /* MW 4 */
+ 7433 "11111000" // /* MW 3 */
+ 7434 "00000000" // /* MW 2 */
+ 7435 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7436 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7437 "00100000" // /* MW 3 */
+ 7438 "00000000" // /* MW 2 */
+ 7439 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7444 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7445 "10000001" // /* MW 11 */
+ 7446 "10101101" // /* MW 10 */
+ 7447 "00000000" // /* MW 9 */
+ 7448 "00000000" // /* MW 8 */
+ 7449 "00000000" // /* MW 7 */
+ 7450 "00000000" // /* MW 6 */
+ 7451 "00100000" // /* MW 5 */
+ 7452 "00000000" // /* MW 4 */
+ 7453 "11110000" // /* MW 3 */
+ 7454 "00101100" // /* MW 2 */
+ 7455 "00000000" // /* MW 1 */
+.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E
+.src_ref 6 "me_vmult_float_emulated.h" 108 6
+.src_ref 6 "me_vmult_float_emulated.h" 108 19
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 110 6
+.src_ref 6 "me_vmult_float_emulated.h" 110 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 6
+.src_ref 6 "me_vmult_float_emulated.h" 111 19
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 112 6
+.src_ref 6 "me_vmult_float_emulated.h" 112 19
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 114 6
+.src_ref 6 "me_vmult_float_emulated.h" 114 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 6
+.src_ref 6 "me_vmult_float_emulated.h" 115 19
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+ 7456 "10111010" // VLDA x0, [sp, #-256]; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */
+ 7457 "00100000" // /* MW 9 */
+ 7458 "00000000" // /* MW 8 */
+ 7459 "00000000" // /* MW 7 */
+ 7460 "10111000" // /* MW 6 */
+ 7461 "00000010" // /* MW 5 */
+ 7462 "00000000" // /* MW 4 */
+ 7463 "01110000" // /* MW 3 */
+ 7464 "00000111" // /* MW 2 */
+ 7465 "11111110" // /* MW 1 */
+.src_ref 6 "me_vmult_float_emulated.h" 109 21
+.src_ref 6 "me_vmult_float_emulated.h" 111 34
+.src_ref 6 "me_vmult_float_emulated.h" 113 21
+.src_ref 6 "me_vmult_float_emulated.h" 115 34
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "add_reduce.hpp" 322 47
+.src_ref 5 "blend.hpp" 163 48
+.delay_slot
+ 7466 "10111010" // VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7467 "01011000" // /* MW 9 */
+ 7468 "00000000" // /* MW 8 */
+ 7469 "10001000" // /* MW 7 */
+ 7470 "10001010" // /* MW 6 */
+ 7471 "00000000" // /* MW 5 */
+ 7472 "00000000" // /* MW 4 */
+ 7473 "01110000" // /* MW 3 */
+ 7474 "10100111" // /* MW 2 */
+ 7475 "11111110" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "vector.hpp" 1139 17
+.src_ref 5 "vector.hpp" 1280 49
+.src_ref 5 "vector.hpp" 1287 41
+.src_ref 5 "vector.hpp" 1288 16
+.src_ref 5 "vector.hpp" 1292 26
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 226 22
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7476 "10111010" // LDA p1, [sp, #-4]; MOVXM r16, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7477 "10010000" // /* MW 9 */
+ 7478 "11111111" // /* MW 8 */
+ 7479 "00001111" // /* MW 7 */
+ 7480 "00111110" // /* MW 6 */
+ 7481 "00000000" // /* MW 5 */
+ 7482 "00000000" // /* MW 4 */
+ 7483 "00100000" // /* MW 3 */
+ 7484 "10010011" // /* MW 2 */
+ 7485 "11111111" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 5 "blend.hpp" 170 36
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7486 "01100100" // MOVX r21, #0; MOV m4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7487 "10000001" // /* MW 5 */
+ 7488 "00000000" // /* MW 4 */
+ 7489 "00101000" // /* MW 3 */
+ 7490 "01000000" // /* MW 2 */
+ 7491 "00000101" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7492 "00011000" // MOVX r17, #257 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7493 "00000101" // /* MW 3 */
+ 7494 "00100010" // /* MW 2 */
+ 7495 "00010001" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7496 "00100010" // VLDA x1, [sp, #-128]; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7497 "00011100" // /* MW 7 */
+ 7498 "00000000" // /* MW 6 */
+ 7499 "00000000" // /* MW 5 */
+ 7500 "00000100" // /* MW 4 */
+ 7501 "01110000" // /* MW 3 */
+ 7502 "00001111" // /* MW 2 */
+ 7503 "11111111" // /* MW 1 */
+.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656
+ 7504 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */
+ 7505 "00000000" // /* MW 5 */
+ 7506 "00000000" // /* MW 4 */
+ 7507 "10101000" // /* MW 3 */
+ 7508 "00001100" // /* MW 2 */
+ 7509 "00000000" // /* MW 1 */
+.delay_slot
+ 7510 "11111000" // MOV dc4, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7511 "11100000" // /* MW 3 */
+ 7512 "11000001" // /* MW 2 */
+ 7513 "00011100" // /* MW 1 */
+.src_ref 5 "blend.hpp" 170 36
+.delay_slot
+ 7514 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7515 "11111110" // /* MW 5 */
+ 7516 "10111111" // /* MW 4 */
+ 7517 "11111000" // /* MW 3 */
+ 7518 "00000000" // /* MW 2 */
+ 7519 "00000000" // /* MW 1 */
+.src_ref 5 "vector.hpp" 57 98
+.src_ref 3 "reduce_mean_c8_impl.h" 268 19
+.delay_slot
+ 7520 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7521 "00100000" // /* MW 3 */
+ 7522 "00000000" // /* MW 2 */
+ 7523 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end
+.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0
+ 7527 "00000000" // /* MW 1 */
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_reduce_mean_c8 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 8 "superkernels.cpp" 472
+.src_ref 8 "superkernels.cpp" 472 first
+.function_start
+ 7536 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7537 "00000001" // /* MW 5 */
+ 7538 "00000000" // /* MW 4 */
+ 7539 "00000000" // /* MW 3 */
+ 7540 "00010000" // /* MW 2 */
+ 7541 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7542 "00111010" // ST p7, [sp, #-20]; MOVXM p7, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7543 "00010001" // /* MW 9 */
+ 7544 "01100000" // /* MW 8 */
+ 7545 "10110010" // /* MW 7 */
+ 7546 "11110011" // /* MW 6 */
+ 7547 "00000001" // /* MW 5 */
+ 7548 "00000000" // /* MW 4 */
+ 7549 "10110000" // /* MW 3 */
+ 7550 "11110011" // /* MW 2 */
+ 7551 "11111101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7552 "10111010" // LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7553 "01110010" // /* MW 9 */
+ 7554 "01110000" // /* MW 8 */
+ 7555 "00001101" // /* MW 7 */
+ 7556 "10000010" // /* MW 6 */
+ 7557 "00011101" // /* MW 5 */
+ 7558 "11100111" // /* MW 4 */
+ 7559 "11010111" // /* MW 3 */
+ 7560 "11000010" // /* MW 2 */
+ 7561 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 22 first
+.src_ref 8 "superkernels.cpp" 569
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7562 "00111010" // ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7563 "01111001" // /* MW 9 */
+ 7564 "11110000" // /* MW 8 */
+ 7565 "01101000" // /* MW 7 */
+ 7566 "10000001" // /* MW 6 */
+ 7567 "00000100" // /* MW 5 */
+ 7568 "00100001" // /* MW 4 */
+ 7569 "10110000" // /* MW 3 */
+ 7570 "00101110" // /* MW 2 */
+ 7571 "11111111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 30
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7572 "01011100" // ST r15, [sp, #-16]; ADD r17, r16, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7573 "11110110" // /* MW 5 */
+ 7574 "01000111" // /* MW 4 */
+ 7575 "10111000" // /* MW 3 */
+ 7576 "00111110" // /* MW 2 */
+ 7577 "11111110" // /* MW 1 */
+ 7578 "10011000" // ST r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7579 "10110101" // /* MW 3 */
+ 7580 "11101001" // /* MW 2 */
+ 7581 "00001111" // /* MW 1 */
+ 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7583 "00000000" // /* MW 1 */
+ 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7585 "00000000" // /* MW 1 */
+ 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7587 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 477 6 first
+.src_ref 8 "superkernels.cpp" 477 16 first
+ 7588 "10000100" // JNZ r16, #8160 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8160 delay_slots=5 */
+ 7589 "00000001" // /* MW 5 */
+ 7590 "01000000" // /* MW 4 */
+ 7591 "11110000" // /* MW 3 */
+ 7592 "00001111" // /* MW 2 */
+ 7593 "10000000" // /* MW 1 */
+.delay_slot
+ 7594 "10011000" // ST r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7595 "10010101" // /* MW 3 */
+ 7596 "11111101" // /* MW 2 */
+ 7597 "00001111" // /* MW 1 */
+.delay_slot
+ 7598 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7599 "11010101" // /* MW 3 */
+ 7600 "11110101" // /* MW 2 */
+ 7601 "00001111" // /* MW 1 */
+.delay_slot
+ 7602 "10011000" // ST p0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7603 "00011101" // /* MW 3 */
+ 7604 "11100000" // /* MW 2 */
+ 7605 "00001111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 11
+.delay_slot
+ 7606 "01000100" // MOVXM p6, #509128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7607 "10010000" // /* MW 5 */
+ 7608 "11001001" // /* MW 4 */
+ 7609 "11001100" // /* MW 3 */
+ 7610 "00000111" // /* MW 2 */
+ 7611 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 474 11 first
+.delay_slot
+ 7612 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7613 "00110001" // /* MW 3 */
+ 7614 "00000110" // /* MW 2 */
+ 7615 "00001110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 5 "tile.hpp" 74 8
+.src_ref 5 "tile.hpp" 74 8
+ 7616 "01110110" // MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7617 "00010000" // /* MW 11 */
+ 7618 "01110110" // /* MW 10 */
+ 7619 "00110010" // /* MW 9 */
+ 7620 "11110001" // /* MW 8 */
+ 7621 "00000001" // /* MW 7 */
+ 7622 "00000000" // /* MW 6 */
+ 7623 "10001011" // /* MW 5 */
+ 7624 "10001000" // /* MW 4 */
+ 7625 "00000111" // /* MW 3 */
+ 7626 "00110001" // /* MW 2 */
+ 7627 "00000000" // /* MW 1 */
+.src_ref 5 "tile.hpp" 74 8 first
+.src_ref 5 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7628 "00111010" // ST r17, [p2]; MOVXM p2, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7629 "00010001" // /* MW 9 */
+ 7630 "01111000" // /* MW 8 */
+ 7631 "00110010" // /* MW 7 */
+ 7632 "11110001" // /* MW 6 */
+ 7633 "00000001" // /* MW 5 */
+ 7634 "00000000" // /* MW 4 */
+ 7635 "00110000" // /* MW 3 */
+ 7636 "11000110" // /* MW 2 */
+ 7637 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 44
+.src_ref 5 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7638 "11010100" // ST.s8 r16, [p2]; MOV p6, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7639 "10000001" // /* MW 5 */
+ 7640 "11000101" // /* MW 4 */
+ 7641 "11101100" // /* MW 3 */
+ 7642 "11000000" // /* MW 2 */
+ 7643 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 480 4 first
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 7644 "00000100" // JL #2576 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2576 delay_slots=5 */
+ 7645 "00000001" // /* MW 5 */
+ 7646 "00000000" // /* MW 4 */
+ 7647 "00001000" // /* MW 3 */
+ 7648 "00000101" // /* MW 2 */
+ 7649 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 480 4
+.delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7650 "01000100" // MOVXM p0, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7651 "10000000" // /* MW 5 */
+ 7652 "11001000" // /* MW 4 */
+ 7653 "11000000" // /* MW 3 */
+ 7654 "00000111" // /* MW 2 */
+ 7655 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7659 "00000000" // /* MW 1 */
+.src_ref 5 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7660 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7661 "00110001" // /* MW 3 */
+ 7662 "00100000" // /* MW 2 */
+ 7663 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7665 "00000000" // /* MW 15 */
+ 7666 "00000000" // /* MW 14 */
+ 7667 "01111000" // /* MW 13 */
+ 7668 "10100101" // /* MW 12 */
+ 7669 "00000001" // /* MW 11 */
+ 7670 "00000000" // /* MW 10 */
+ 7671 "00000000" // /* MW 9 */
+ 7672 "00000000" // /* MW 8 */
+ 7673 "01011011" // /* MW 7 */
+ 7674 "00000001" // /* MW 6 */
+ 7675 "00100000" // /* MW 5 */
+ 7676 "00000000" // /* MW 4 */
+ 7677 "11110000" // /* MW 3 */
+ 7678 "00101100" // /* MW 2 */
+ 7679 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 51
+.src_ref 8 "superkernels.cpp" 487 47
+.return_address
+ 7680 "10111010" // MOVA r17, #0; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7681 "00010000" // /* MW 9 */
+ 7682 "00100000" // /* MW 8 */
+ 7683 "00110010" // /* MW 7 */
+ 7684 "11110001" // /* MW 6 */
+ 7685 "00000001" // /* MW 5 */
+ 7686 "00000000" // /* MW 4 */
+ 7687 "00000000" // /* MW 3 */
+ 7688 "00010001" // /* MW 2 */
+ 7689 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 18
+.src_ref 8 "superkernels.cpp" 481 51 first
+ 7690 "10111010" // LDA r14, [p2]; MOVXM p2, #509128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7691 "00010000" // /* MW 9 */
+ 7692 "01100100" // /* MW 8 */
+ 7693 "00110010" // /* MW 7 */
+ 7694 "11110001" // /* MW 6 */
+ 7695 "00000001" // /* MW 5 */
+ 7696 "00000000" // /* MW 4 */
+ 7697 "11010000" // /* MW 3 */
+ 7698 "10111010" // /* MW 2 */
+ 7699 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 18
+.src_ref 8 "superkernels.cpp" 481 85
+ 7700 "10111010" // LDA r18, [p2]; MOVXM p2, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7701 "00010000" // /* MW 9 */
+ 7702 "00100010" // /* MW 8 */
+ 7703 "00110010" // /* MW 7 */
+ 7704 "11110001" // /* MW 6 */
+ 7705 "00000001" // /* MW 5 */
+ 7706 "00000000" // /* MW 4 */
+ 7707 "11010000" // /* MW 3 */
+ 7708 "11001010" // /* MW 2 */
+ 7709 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 85
+.src_ref 8 "superkernels.cpp" 482 16
+ 7710 "10111010" // LDA r13, [p2], #4; MOVXM p3, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7711 "00010000" // /* MW 9 */
+ 7712 "01101000" // /* MW 8 */
+ 7713 "10110010" // /* MW 7 */
+ 7714 "11110001" // /* MW 6 */
+ 7715 "00000001" // /* MW 5 */
+ 7716 "00000000" // /* MW 4 */
+ 7717 "11010000" // /* MW 3 */
+ 7718 "10110110" // /* MW 2 */
+ 7719 "01000011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 16
+.src_ref 8 "superkernels.cpp" 482 40 first
+ 7720 "10111010" // LDA el0, [p2, #4]; MOVXM p1, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7721 "00010000" // /* MW 9 */
+ 7722 "01100110" // /* MW 8 */
+ 7723 "10110010" // /* MW 7 */
+ 7724 "11110000" // /* MW 6 */
+ 7725 "00000001" // /* MW 5 */
+ 7726 "00000000" // /* MW 4 */
+ 7727 "11010000" // /* MW 3 */
+ 7728 "10000101" // /* MW 2 */
+ 7729 "01000010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 120 first
+.src_ref 8 "superkernels.cpp" 483 44
+ 7730 "11010100" // LDA r15, [p2]; MOV r16, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7731 "10000001" // /* MW 5 */
+ 7732 "00111001" // /* MW 4 */
+ 7733 "11011000" // /* MW 3 */
+ 7734 "10111110" // /* MW 2 */
+ 7735 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 44
+ 7736 "00011000" // ADD.NC p2, r16, #40 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7737 "00010100" // /* MW 3 */
+ 7738 "01101000" // /* MW 2 */
+ 7739 "00011010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7740 "01000100" // MOVXM p6, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7741 "00000000" // /* MW 5 */
+ 7742 "11001010" // /* MW 4 */
+ 7743 "11001100" // /* MW 3 */
+ 7744 "00000111" // /* MW 2 */
+ 7745 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 13
+ 7746 "01000100" // MOVXM p0, #509160 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7747 "11010000" // /* MW 5 */
+ 7748 "11001001" // /* MW 4 */
+ 7749 "11000000" // /* MW 3 */
+ 7750 "00000111" // /* MW 2 */
+ 7751 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 27
+ 7752 "10011000" // MUL r18, r14, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7753 "00101111" // /* MW 3 */
+ 7754 "10100101" // /* MW 2 */
+ 7755 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7756 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7757 "00000000" // /* MW 5 */
+ 7758 "00100000" // /* MW 4 */
+ 7759 "00001000" // /* MW 3 */
+ 7760 "00000000" // /* MW 2 */
+ 7761 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 61
+.src_ref 8 "superkernels.cpp" 482 16 first
+ 7762 "01011100" // ST el0, [p3]; MUL r18, r13, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7763 "01011111" // /* MW 5 */
+ 7764 "11001010" // /* MW 4 */
+ 7765 "00110110" // /* MW 3 */
+ 7766 "10000101" // /* MW 2 */
+ 7767 "01100000" // /* MW 1 */
+ 7768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7769 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 96 first
+ 7770 "10011000" // MUL r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7771 "00101111" // /* MW 3 */
+ 7772 "11100101" // /* MW 2 */
+ 7773 "00010011" // /* MW 1 */
+ 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7775 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 481 16
+ 7776 "10011000" // ST r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7777 "01010001" // /* MW 3 */
+ 7778 "00000110" // /* MW 2 */
+ 7779 "00001001" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 15 first
+ 7780 "10011000" // LDA el0, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7781 "00101110" // /* MW 3 */
+ 7782 "01001100" // /* MW 2 */
+ 7783 "00000010" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47 first
+ 7784 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7785 "00110001" // /* MW 3 */
+ 7786 "00011110" // /* MW 2 */
+ 7787 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7788 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7789 "00110001" // /* MW 3 */
+ 7790 "00011110" // /* MW 2 */
+ 7791 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7792 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7793 "00110001" // /* MW 3 */
+ 7794 "00011110" // /* MW 2 */
+ 7795 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7796 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7797 "00110001" // /* MW 3 */
+ 7798 "00011110" // /* MW 2 */
+ 7799 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7800 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7801 "00110001" // /* MW 3 */
+ 7802 "00011110" // /* MW 2 */
+ 7803 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7804 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7805 "00110001" // /* MW 3 */
+ 7806 "00011110" // /* MW 2 */
+ 7807 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 483 13 first
+ 7808 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7809 "00101001" // /* MW 3 */
+ 7810 "00000100" // /* MW 2 */
+ 7811 "00001000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47 first
+ 7812 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7813 "00110001" // /* MW 3 */
+ 7814 "00011110" // /* MW 2 */
+ 7815 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7816 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7817 "00110001" // /* MW 3 */
+ 7818 "00011110" // /* MW 2 */
+ 7819 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 487 47
+ 7820 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7821 "00110001" // /* MW 3 */
+ 7822 "00011110" // /* MW 2 */
+ 7823 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40 first
+ 7824 "10011000" // LDA r1, [p2], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7825 "00110110" // /* MW 3 */
+ 7826 "11011100" // /* MW 2 */
+ 7827 "00000010" // /* MW 1 */
+ 7828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7829 "00000000" // /* MW 1 */
+ 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7831 "00000000" // /* MW 1 */
+ 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7833 "00000000" // /* MW 1 */
+ 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7835 "00000000" // /* MW 1 */
+ 7836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7837 "00000000" // /* MW 1 */
+ 7838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7839 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7840 "10011000" // GEU r17, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7841 "00001011" // /* MW 3 */
+ 7842 "01100011" // /* MW 2 */
+ 7843 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+ 7844 "10000100" // JNZ r17, #7920 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7920 delay_slots=5 */
+ 7845 "00000001" // /* MW 5 */
+ 7846 "01000000" // /* MW 4 */
+ 7847 "01111000" // /* MW 3 */
+ 7848 "00001111" // /* MW 2 */
+ 7849 "10001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+.delay_slot
+ 7850 "11111000" // MOV r12, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7851 "11000000" // /* MW 3 */
+ 7852 "00011110" // /* MW 2 */
+ 7853 "00011011" // /* MW 1 */
+.delay_slot
+ 7854 "10011000" // ST p2, [sp, #-40] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7855 "00011101" // /* MW 3 */
+ 7856 "11011001" // /* MW 2 */
+ 7857 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7859 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7861 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7863 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.no_stack_arguments
+ 7864 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */
+ 7865 "00000001" // /* MW 5 */
+ 7866 "00000000" // /* MW 4 */
+ 7867 "01010000" // /* MW 3 */
+ 7868 "00010101" // /* MW 2 */
+ 7869 "00000000" // /* MW 1 */
+.delay_slot
+ 7870 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7871 "10010101" // /* MW 3 */
+ 7872 "11011101" // /* MW 2 */
+ 7873 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7875 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7877 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7879 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7880 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7881 "00011100" // /* MW 7 */
+ 7882 "00000000" // /* MW 6 */
+ 7883 "00000000" // /* MW 5 */
+ 7884 "00000100" // /* MW 4 */
+ 7885 "11110000" // /* MW 3 */
+ 7886 "00101100" // /* MW 2 */
+ 7887 "00000000" // /* MW 1 */
+.return_address
+ 7888 "10000100" // J #7984 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7984 delay_slots=5 */
+ 7889 "00000000" // /* MW 5 */
+ 7890 "00000000" // /* MW 4 */
+ 7891 "10011000" // /* MW 3 */
+ 7892 "00001111" // /* MW 2 */
+ 7893 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7894 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7895 "11100000" // /* MW 5 */
+ 7896 "11001001" // /* MW 4 */
+ 7897 "11001110" // /* MW 3 */
+ 7898 "00000111" // /* MW 2 */
+ 7899 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7901 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7903 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7905 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7906 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7907 "00011100" // /* MW 13 */
+ 7908 "00000000" // /* MW 12 */
+ 7909 "00000000" // /* MW 11 */
+ 7910 "01010111" // /* MW 10 */
+ 7911 "00011010" // /* MW 9 */
+ 7912 "01000000" // /* MW 8 */
+ 7913 "00000000" // /* MW 7 */
+ 7914 "00000000" // /* MW 6 */
+ 7915 "10110110" // /* MW 5 */
+ 7916 "00000010" // /* MW 4 */
+ 7917 "11110000" // /* MW 3 */
+ 7918 "00101100" // /* MW 2 */
+ 7919 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384
+.src_ref 8 "superkernels.cpp" 491 40
+.no_stack_arguments
+ 7920 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */
+ 7921 "00000001" // /* MW 5 */
+ 7922 "00000000" // /* MW 4 */
+ 7923 "01010000" // /* MW 3 */
+ 7924 "00010101" // /* MW 2 */
+ 7925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7932 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7933 "01100111" // /* MW 3 */
+ 7934 "00000001" // /* MW 2 */
+ 7935 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7936 "11100001" // NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7937 "00000000" // /* MW 15 */
+ 7938 "00000000" // /* MW 14 */
+ 7939 "01111000" // /* MW 13 */
+ 7940 "10100101" // /* MW 12 */
+ 7941 "00000001" // /* MW 11 */
+ 7942 "00001100" // /* MW 10 */
+ 7943 "00011000" // /* MW 9 */
+ 7944 "00000010" // /* MW 8 */
+ 7945 "01011011" // /* MW 7 */
+ 7946 "00000001" // /* MW 6 */
+ 7947 "00100000" // /* MW 5 */
+ 7948 "00000000" // /* MW 4 */
+ 7949 "11110000" // /* MW 3 */
+ 7950 "00101100" // /* MW 2 */
+ 7951 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.return_address
+.no_stack_arguments
+ 7952 "00000100" // JL #12416 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12416 delay_slots=5 */
+ 7953 "00000001" // /* MW 5 */
+ 7954 "00000000" // /* MW 4 */
+ 7955 "01000000" // /* MW 3 */
+ 7956 "00011000" // /* MW 2 */
+ 7957 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7958 "11111000" // MOV r1, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7959 "00100000" // /* MW 3 */
+ 7960 "01010000" // /* MW 2 */
+ 7961 "00011000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7962 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7963 "11100000" // /* MW 5 */
+ 7964 "11001001" // /* MW 4 */
+ 7965 "11001110" // /* MW 3 */
+ 7966 "00000111" // /* MW 2 */
+ 7967 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.delay_slot
+ 7968 "01000100" // MOVXM r2, #1325400064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7969 "00000000" // /* MW 5 */
+ 7970 "00100000" // /* MW 4 */
+ 7971 "00000001" // /* MW 3 */
+ 7972 "00000000" // /* MW 2 */
+ 7973 "01001111" // /* MW 1 */
+.delay_slot
+ 7974 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7975 "10010101" // /* MW 3 */
+ 7976 "11011101" // /* MW 2 */
+ 7977 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7978 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7979 "00100000" // /* MW 5 */
+ 7980 "00000000" // /* MW 4 */
+ 7981 "11110000" // /* MW 3 */
+ 7982 "00101100" // /* MW 2 */
+ 7983 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 491 40
+.return_address
+ 7984 "10111010" // LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7985 "10111000" // /* MW 9 */
+ 7986 "00001000" // /* MW 8 */
+ 7987 "00000000" // /* MW 7 */
+ 7988 "00000000" // /* MW 6 */
+ 7989 "11010010" // /* MW 5 */
+ 7990 "00000010" // /* MW 4 */
+ 7991 "01010000" // /* MW 3 */
+ 7992 "11000000" // /* MW 2 */
+ 7993 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.src_ref 8 "superkernels.cpp" 492 38
+.src_ref 8 "superkernels.cpp" 492 38
+ 7994 "10111010" // MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7995 "01111000" // /* MW 9 */
+ 7996 "01001001" // /* MW 8 */
+ 7997 "00000000" // /* MW 7 */
+ 7998 "00001000" // /* MW 6 */
+ 7999 "10000000" // /* MW 5 */
+ 8000 "00000001" // /* MW 4 */
+ 8001 "10000000" // /* MW 3 */
+ 8002 "01000000" // /* MW 2 */
+ 8003 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 13
+.src_ref 8 "superkernels.cpp" 498 15
+ 8004 "10111010" // LDA p2, [sp, #-40]; MOVXM p3, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8005 "00010000" // /* MW 9 */
+ 8006 "01101010" // /* MW 8 */
+ 8007 "10110010" // /* MW 7 */
+ 8008 "11110001" // /* MW 6 */
+ 8009 "00000001" // /* MW 5 */
+ 8010 "00000000" // /* MW 4 */
+ 8011 "00100000" // /* MW 3 */
+ 8012 "00100011" // /* MW 2 */
+ 8013 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 14
+ 8014 "01000100" // MOVXM p1, #509144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8015 "10110000" // /* MW 5 */
+ 8016 "11001001" // /* MW 4 */
+ 8017 "11000010" // /* MW 3 */
+ 8018 "00000111" // /* MW 2 */
+ 8019 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 7
+.src_ref 8 "superkernels.cpp" 508 7
+.src_ref 8 "superkernels.cpp" 511 7
+ 8020 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8021 "10100000" // /* MW 5 */
+ 8022 "11001001" // /* MW 4 */
+ 8023 "11001110" // /* MW 3 */
+ 8024 "00000111" // /* MW 2 */
+ 8025 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8027 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 38
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 8028 "00011000" // ST.s16 r16, [p6], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8029 "00010111" // /* MW 3 */
+ 8030 "00011110" // /* MW 2 */
+ 8031 "00000110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8032 "00011000" // MOVX crRnd, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8033 "10000000" // /* MW 3 */
+ 8034 "00111010" // /* MW 2 */
+ 8035 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8036 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8037 "00010110" // /* MW 3 */
+ 8038 "01000000" // /* MW 2 */
+ 8039 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 8040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8041 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 491 40
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8042 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8043 "00000001" // /* MW 3 */
+ 8044 "00000001" // /* MW 2 */
+ 8045 "00011100" // /* MW 1 */
+ 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8047 "00000000" // /* MW 1 */
+ 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8049 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 492 38 first
+ 8050 "00011000" // ST.s8 r24, [p6], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8051 "00000111" // /* MW 3 */
+ 8052 "00001011" // /* MW 2 */
+ 8053 "00000110" // /* MW 1 */
+ 8054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8055 "00000000" // /* MW 1 */
+ 8056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8057 "00000000" // /* MW 1 */
+ 8058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8059 "00000000" // /* MW 1 */
+ 8060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8061 "00000000" // /* MW 1 */
+ 8062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8063 "00000000" // /* MW 1 */
+ 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8065 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 494 25 first
+ 8066 "10011000" // ST r14, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8067 "11010001" // /* MW 3 */
+ 8068 "00011101" // /* MW 2 */
+ 8069 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 495 24 first
+ 8070 "10011000" // ST r15, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8071 "11110001" // /* MW 3 */
+ 8072 "00000101" // /* MW 2 */
+ 8073 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 496 24 first
+ 8074 "10011000" // ST r13, [p6, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8075 "10110001" // /* MW 3 */
+ 8076 "00010101" // /* MW 2 */
+ 8077 "00001110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 15 first
+ 8078 "10011000" // LDA el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8079 "00101110" // /* MW 3 */
+ 8080 "00011100" // /* MW 2 */
+ 8081 "00000010" // /* MW 1 */
+ 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8083 "00000000" // /* MW 1 */
+ 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8085 "00000000" // /* MW 1 */
+ 8086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8087 "00000000" // /* MW 1 */
+ 8088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8089 "00000000" // /* MW 1 */
+ 8090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8091 "00000000" // /* MW 1 */
+ 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8093 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 498 13
+ 8094 "10011000" // ST el0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8095 "00101001" // /* MW 3 */
+ 8096 "00000100" // /* MW 2 */
+ 8097 "00001011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 16 first
+ 8098 "10011000" // LDA el0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8099 "00101110" // /* MW 3 */
+ 8100 "00000100" // /* MW 2 */
+ 8101 "00000010" // /* MW 1 */
+ 8102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8103 "00000000" // /* MW 1 */
+ 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8105 "00000000" // /* MW 1 */
+ 8106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8107 "00000000" // /* MW 1 */
+ 8108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8109 "00000000" // /* MW 1 */
+ 8110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8111 "00000000" // /* MW 1 */
+ 8112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8113 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 499 14
+ 8114 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8115 "00101001" // /* MW 3 */
+ 8116 "00000100" // /* MW 2 */
+ 8117 "00001001" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 15 first
+ 8118 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8119 "00101110" // /* MW 3 */
+ 8120 "00010100" // /* MW 2 */
+ 8121 "00000010" // /* MW 1 */
+ 8122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8123 "00000000" // /* MW 1 */
+ 8124 "10000100" // J #8176 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8176 delay_slots=5 */
+ 8125 "00000000" // /* MW 5 */
+ 8126 "00000000" // /* MW 4 */
+ 8127 "11111000" // /* MW 3 */
+ 8128 "00001111" // /* MW 2 */
+ 8129 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 13
+.delay_slot
+ 8130 "01000100" // MOVXM p0, #509148 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8131 "10111000" // /* MW 5 */
+ 8132 "11001001" // /* MW 4 */
+ 8133 "11000000" // /* MW 3 */
+ 8134 "00000111" // /* MW 2 */
+ 8135 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8137 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8139 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8140 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8141 "01100111" // /* MW 3 */
+ 8142 "00000001" // /* MW 2 */
+ 8143 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 500 13
+.delay_slot
+ 8144 "11100001" // NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8145 "00000000" // /* MW 15 */
+ 8146 "00000000" // /* MW 14 */
+ 8147 "01111000" // /* MW 13 */
+ 8148 "10100101" // /* MW 12 */
+ 8149 "00000001" // /* MW 11 */
+ 8150 "00000000" // /* MW 10 */
+ 8151 "00000000" // /* MW 9 */
+ 8152 "10000000" // /* MW 8 */
+ 8153 "00101001" // /* MW 7 */
+ 8154 "00000100" // /* MW 6 */
+ 8155 "00100000" // /* MW 5 */
+ 8156 "00000000" // /* MW 4 */
+ 8157 "11110000" // /* MW 3 */
+ 8158 "00101100" // /* MW 2 */
+ 8159 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624
+.src_ref 8 "superkernels.cpp" 505 7
+.src_ref 8 "superkernels.cpp" 508 7
+.src_ref 8 "superkernels.cpp" 511 7
+ 8160 "00111010" // ST p2, [sp, #-36]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8161 "00010001" // /* MW 9 */
+ 8162 "01101000" // /* MW 8 */
+ 8163 "10110010" // /* MW 7 */
+ 8164 "11110011" // /* MW 6 */
+ 8165 "00000001" // /* MW 5 */
+ 8166 "00000000" // /* MW 4 */
+ 8167 "10110000" // /* MW 3 */
+ 8168 "10100011" // /* MW 2 */
+ 8169 "11111011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+ 8170 "11010100" // NOPA; MOV r12, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8171 "10000001" // /* MW 5 */
+ 8172 "00101001" // /* MW 4 */
+ 8173 "11110110" // /* MW 3 */
+ 8174 "00101100" // /* MW 2 */
+ 8175 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640
+.src_ref 8 "superkernels.cpp" 505 7 first
+.src_ref 8 "superkernels.cpp" 505 19
+ 8176 "00101100" // LDA r16, [p7]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8177 "00001010" // /* MW 5 */
+ 8178 "01000100" // /* MW 4 */
+ 8179 "11010000" // /* MW 3 */
+ 8180 "11000010" // /* MW 2 */
+ 8181 "11100000" // /* MW 1 */
+ 8182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8183 "00000000" // /* MW 1 */
+ 8184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8185 "00000000" // /* MW 1 */
+ 8186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8187 "00000000" // /* MW 1 */
+ 8188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8189 "00000000" // /* MW 1 */
+ 8190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8191 "00000000" // /* MW 1 */
+ 8192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8193 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 19
+ 8194 "10011000" // NE r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8195 "00001000" // /* MW 3 */
+ 8196 "01100011" // /* MW 2 */
+ 8197 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 25
+ 8198 "10000100" // JNZ r17, #8368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8368 delay_slots=5 */
+ 8199 "00000001" // /* MW 5 */
+ 8200 "01000000" // /* MW 4 */
+ 8201 "01011000" // /* MW 3 */
+ 8202 "00010000" // /* MW 2 */
+ 8203 "10001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.delay_slot
+ 8204 "00011000" // ADD.NC p6, r12, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8205 "00000110" // /* MW 3 */
+ 8206 "01100110" // /* MW 2 */
+ 8207 "00011110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8209 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8211 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8213 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8215 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 29
+ 8216 "01000100" // MOVXM p2, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8217 "10001000" // /* MW 5 */
+ 8218 "11001001" // /* MW 4 */
+ 8219 "11000100" // /* MW 3 */
+ 8220 "00000111" // /* MW 2 */
+ 8221 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 29 first
+.src_ref 8 "superkernels.cpp" 505 65
+ 8222 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8223 "00010000" // /* MW 9 */
+ 8224 "00110000" // /* MW 8 */
+ 8225 "00110010" // /* MW 7 */
+ 8226 "11110001" // /* MW 6 */
+ 8227 "00000001" // /* MW 5 */
+ 8228 "00000000" // /* MW 4 */
+ 8229 "11010000" // /* MW 3 */
+ 8230 "11000010" // /* MW 2 */
+ 8231 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 65
+ 8232 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8233 "00111010" // /* MW 3 */
+ 8234 "00000100" // /* MW 2 */
+ 8235 "00000010" // /* MW 1 */
+ 8236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8237 "00000000" // /* MW 1 */
+ 8238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8239 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.no_stack_arguments
+ 8240 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8241 "00000001" // /* MW 5 */
+ 8242 "00000000" // /* MW 4 */
+ 8243 "11111000" // /* MW 3 */
+ 8244 "00010011" // /* MW 2 */
+ 8245 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8246 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8247 "00000001" // /* MW 3 */
+ 8248 "00011010" // /* MW 2 */
+ 8249 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8251 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8252 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8253 "11011010" // /* MW 3 */
+ 8254 "00110110" // /* MW 2 */
+ 8255 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8256 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8257 "01000001" // /* MW 5 */
+ 8258 "10111011" // /* MW 4 */
+ 8259 "00110111" // /* MW 3 */
+ 8260 "01100000" // /* MW 2 */
+ 8261 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.delay_slot
+ 8262 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8263 "00010010" // /* MW 9 */
+ 8264 "00000001" // /* MW 8 */
+ 8265 "00000100" // /* MW 7 */
+ 8266 "00000000" // /* MW 6 */
+ 8267 "01011011" // /* MW 5 */
+ 8268 "00000001" // /* MW 4 */
+ 8269 "11110000" // /* MW 3 */
+ 8270 "00101100" // /* MW 2 */
+ 8271 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+.src_ref 8 "superkernels.cpp" 505 41
+.return_address
+ 8272 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8273 "01000001" // /* MW 5 */
+ 8274 "10101111" // /* MW 4 */
+ 8275 "00111101" // /* MW 3 */
+ 8276 "00000110" // /* MW 2 */
+ 8277 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 41
+ 8278 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8279 "00000010" // /* MW 3 */
+ 8280 "11100001" // /* MW 2 */
+ 8281 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 505 6
+.src_ref 8 "superkernels.cpp" 505 76
+ 8282 "10000100" // JNZ r16, #8352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8352 delay_slots=5 */
+ 8283 "00000001" // /* MW 5 */
+ 8284 "01000000" // /* MW 4 */
+ 8285 "01010000" // /* MW 3 */
+ 8286 "00010000" // /* MW 2 */
+ 8287 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8289 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8293 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8295 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8297 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8298 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8299 "10000001" // /* MW 5 */
+ 8300 "11011001" // /* MW 4 */
+ 8301 "10100100" // /* MW 3 */
+ 8302 "00011111" // /* MW 2 */
+ 8303 "11111100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8304 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8305 "01110110" // /* MW 3 */
+ 8306 "11111111" // /* MW 2 */
+ 8307 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8308 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8309 "00110110" // /* MW 3 */
+ 8310 "11111110" // /* MW 2 */
+ 8311 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8312 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8313 "01010110" // /* MW 3 */
+ 8314 "11111110" // /* MW 2 */
+ 8315 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8317 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 8318 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8319 "00110110" // /* MW 3 */
+ 8320 "01000110" // /* MW 2 */
+ 8321 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8323 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8325 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8327 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8329 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 8330 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8331 "00010010" // /* MW 3 */
+ 8332 "10100011" // /* MW 2 */
+ 8333 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8334 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8335 "00110001" // /* MW 3 */
+ 8336 "00000110" // /* MW 2 */
+ 8337 "00001010" // /* MW 1 */
+ 8338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8339 "00000000" // /* MW 1 */
+ 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8341 "00000000" // /* MW 1 */
+ 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8343 "00000000" // /* MW 1 */
+ 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8345 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8346 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8347 "00010000" // /* MW 5 */
+ 8348 "10100110" // /* MW 4 */
+ 8349 "11111000" // /* MW 3 */
+ 8350 "00101100" // /* MW 2 */
+ 8351 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816
+ 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8353 "00000000" // /* MW 1 */
+ 8354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8355 "00000000" // /* MW 1 */
+ 8356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8357 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 7 first
+ 8358 "10111010" // LDA r16, [p7]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8359 "01111110" // /* MW 9 */
+ 8360 "10100101" // /* MW 8 */
+ 8361 "00000001" // /* MW 7 */
+ 8362 "00000000" // /* MW 6 */
+ 8363 "00010000" // /* MW 5 */
+ 8364 "00000000" // /* MW 4 */
+ 8365 "11010000" // /* MW 3 */
+ 8366 "11000010" // /* MW 2 */
+ 8367 "11100000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832
+.src_ref 8 "superkernels.cpp" 508 19
+.src_ref 8 "superkernels.cpp" 522 6
+.src_ref 8 "superkernels.cpp" 558 19
+ 8368 "00011000" // MOVX r14, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8369 "00001001" // /* MW 3 */
+ 8370 "00011100" // /* MW 2 */
+ 8371 "00010000" // /* MW 1 */
+ 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8373 "00000000" // /* MW 1 */
+ 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8375 "00000000" // /* MW 1 */
+ 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8377 "00000000" // /* MW 1 */
+ 8378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8379 "00000000" // /* MW 1 */
+ 8380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8381 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 19
+ 8382 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8383 "00001000" // /* MW 3 */
+ 8384 "10100001" // /* MW 2 */
+ 8385 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 25
+ 8386 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */
+ 8387 "00000001" // /* MW 5 */
+ 8388 "01000000" // /* MW 4 */
+ 8389 "10110000" // /* MW 3 */
+ 8390 "00010000" // /* MW 2 */
+ 8391 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8395 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8397 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8399 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8401 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 29
+ 8402 "01000100" // MOVXM p2, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8403 "11000000" // /* MW 5 */
+ 8404 "11001001" // /* MW 4 */
+ 8405 "11000100" // /* MW 3 */
+ 8406 "00000111" // /* MW 2 */
+ 8407 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 29
+.src_ref 8 "superkernels.cpp" 508 65
+ 8408 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8409 "00010000" // /* MW 9 */
+ 8410 "00110000" // /* MW 8 */
+ 8411 "00110010" // /* MW 7 */
+ 8412 "11110001" // /* MW 6 */
+ 8413 "00000001" // /* MW 5 */
+ 8414 "00000000" // /* MW 4 */
+ 8415 "11010000" // /* MW 3 */
+ 8416 "11000010" // /* MW 2 */
+ 8417 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 65
+ 8418 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8419 "00111010" // /* MW 3 */
+ 8420 "00000100" // /* MW 2 */
+ 8421 "00000010" // /* MW 1 */
+ 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8423 "00000000" // /* MW 1 */
+ 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8425 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.no_stack_arguments
+ 8426 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8427 "00000001" // /* MW 5 */
+ 8428 "00000000" // /* MW 4 */
+ 8429 "11111000" // /* MW 3 */
+ 8430 "00010011" // /* MW 2 */
+ 8431 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8432 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8433 "00000001" // /* MW 3 */
+ 8434 "00011010" // /* MW 2 */
+ 8435 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8437 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8438 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8439 "11011010" // /* MW 3 */
+ 8440 "00110110" // /* MW 2 */
+ 8441 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8442 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8443 "01000001" // /* MW 5 */
+ 8444 "10111011" // /* MW 4 */
+ 8445 "00110111" // /* MW 3 */
+ 8446 "01100000" // /* MW 2 */
+ 8447 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.delay_slot
+ 8448 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8449 "00000000" // /* MW 15 */
+ 8450 "00000000" // /* MW 14 */
+ 8451 "01111000" // /* MW 13 */
+ 8452 "10100101" // /* MW 12 */
+ 8453 "00000001" // /* MW 11 */
+ 8454 "10010000" // /* MW 10 */
+ 8455 "00001000" // /* MW 9 */
+ 8456 "00100000" // /* MW 8 */
+ 8457 "01011011" // /* MW 7 */
+ 8458 "00000001" // /* MW 6 */
+ 8459 "00100000" // /* MW 5 */
+ 8460 "00000000" // /* MW 4 */
+ 8461 "11110000" // /* MW 3 */
+ 8462 "00101100" // /* MW 2 */
+ 8463 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+.src_ref 8 "superkernels.cpp" 508 41
+.return_address
+ 8464 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8465 "01000001" // /* MW 5 */
+ 8466 "10101111" // /* MW 4 */
+ 8467 "00111101" // /* MW 3 */
+ 8468 "00000110" // /* MW 2 */
+ 8469 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 41
+ 8470 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8471 "00000010" // /* MW 3 */
+ 8472 "11100001" // /* MW 2 */
+ 8473 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 508 6
+.src_ref 8 "superkernels.cpp" 508 76
+ 8474 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */
+ 8475 "00000001" // /* MW 5 */
+ 8476 "01000000" // /* MW 4 */
+ 8477 "10110000" // /* MW 3 */
+ 8478 "00010000" // /* MW 2 */
+ 8479 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8483 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8485 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8487 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8489 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8490 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8491 "10000001" // /* MW 5 */
+ 8492 "11011001" // /* MW 4 */
+ 8493 "10100100" // /* MW 3 */
+ 8494 "00011111" // /* MW 2 */
+ 8495 "11111100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8496 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8497 "01110110" // /* MW 3 */
+ 8498 "11111111" // /* MW 2 */
+ 8499 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8500 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8501 "00110110" // /* MW 3 */
+ 8502 "11111110" // /* MW 2 */
+ 8503 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8504 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8505 "01010110" // /* MW 3 */
+ 8506 "11111110" // /* MW 2 */
+ 8507 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 8508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8509 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 8510 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8511 "00110110" // /* MW 3 */
+ 8512 "01000110" // /* MW 2 */
+ 8513 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8515 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8517 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8519 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8521 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 8522 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8523 "00010010" // /* MW 3 */
+ 8524 "10100011" // /* MW 2 */
+ 8525 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8526 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8527 "00110001" // /* MW 3 */
+ 8528 "00000110" // /* MW 2 */
+ 8529 "00001010" // /* MW 1 */
+ 8530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8531 "00000000" // /* MW 1 */
+ 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8533 "00000000" // /* MW 1 */
+ 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8535 "00000000" // /* MW 1 */
+ 8536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8537 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8538 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8539 "00010000" // /* MW 5 */
+ 8540 "10100110" // /* MW 4 */
+ 8541 "11111000" // /* MW 3 */
+ 8542 "00101100" // /* MW 2 */
+ 8543 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008
+ 8544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8545 "00000000" // /* MW 1 */
+ 8546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8547 "00000000" // /* MW 1 */
+ 8548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8549 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 7 first
+.src_ref 8 "superkernels.cpp" 511 29
+ 8550 "10111010" // LDA r16, [p7]; MOVXM p7, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8551 "00010000" // /* MW 9 */
+ 8552 "01110010" // /* MW 8 */
+ 8553 "10110010" // /* MW 7 */
+ 8554 "11110011" // /* MW 6 */
+ 8555 "00000001" // /* MW 5 */
+ 8556 "00000000" // /* MW 4 */
+ 8557 "11010000" // /* MW 3 */
+ 8558 "11000010" // /* MW 2 */
+ 8559 "11100000" // /* MW 1 */
+ 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8561 "00000000" // /* MW 1 */
+ 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8563 "00000000" // /* MW 1 */
+ 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8565 "00000000" // /* MW 1 */
+ 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8567 "00000000" // /* MW 1 */
+ 8568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8569 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 19
+ 8570 "00011000" // MOVX r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8571 "00010001" // /* MW 3 */
+ 8572 "00100100" // /* MW 2 */
+ 8573 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 19
+ 8574 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8575 "00001000" // /* MW 3 */
+ 8576 "10100001" // /* MW 2 */
+ 8577 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 25
+ 8578 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */
+ 8579 "00000001" // /* MW 5 */
+ 8580 "01000000" // /* MW 4 */
+ 8581 "00100000" // /* MW 3 */
+ 8582 "00010001" // /* MW 2 */
+ 8583 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 66
+.delay_slot
+ 8584 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8585 "11000000" // /* MW 5 */
+ 8586 "11001000" // /* MW 4 */
+ 8587 "11000100" // /* MW 3 */
+ 8588 "00000111" // /* MW 2 */
+ 8589 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8591 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8593 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8595 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8596 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8597 "00000001" // /* MW 3 */
+ 8598 "00100010" // /* MW 2 */
+ 8599 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 29
+.src_ref 8 "superkernels.cpp" 511 42
+ 8600 "00101100" // LDA r16, [p7]; MOVX r13, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8601 "00000010" // /* MW 5 */
+ 8602 "00110100" // /* MW 4 */
+ 8603 "11010000" // /* MW 3 */
+ 8604 "11000010" // /* MW 2 */
+ 8605 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 66
+ 8606 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8607 "00111010" // /* MW 3 */
+ 8608 "00000100" // /* MW 2 */
+ 8609 "00000010" // /* MW 1 */
+ 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8611 "00000000" // /* MW 1 */
+ 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8613 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.no_stack_arguments
+ 8614 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 8615 "00000001" // /* MW 5 */
+ 8616 "00000000" // /* MW 4 */
+ 8617 "11111000" // /* MW 3 */
+ 8618 "00010011" // /* MW 2 */
+ 8619 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8621 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8623 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8624 "10011000" // LT r27, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8625 "00011010" // /* MW 3 */
+ 8626 "00110111" // /* MW 2 */
+ 8627 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8628 "11100100" // SUB r17, r17, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8629 "01000001" // /* MW 5 */
+ 8630 "10111011" // /* MW 4 */
+ 8631 "00110111" // /* MW 3 */
+ 8632 "01100000" // /* MW 2 */
+ 8633 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.delay_slot
+ 8634 "00101100" // NOPA; SEL.EQZ r0, r16, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8635 "00100100" // /* MW 5 */
+ 8636 "00000010" // /* MW 4 */
+ 8637 "11111000" // /* MW 3 */
+ 8638 "00101100" // /* MW 2 */
+ 8639 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+.src_ref 8 "superkernels.cpp" 511 42
+.return_address
+ 8640 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8641 "01000001" // /* MW 5 */
+ 8642 "10101111" // /* MW 4 */
+ 8643 "00111101" // /* MW 3 */
+ 8644 "00000110" // /* MW 2 */
+ 8645 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 42
+ 8646 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8647 "00000010" // /* MW 3 */
+ 8648 "11100001" // /* MW 2 */
+ 8649 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 511 6
+.src_ref 8 "superkernels.cpp" 511 77
+ 8650 "10000100" // JNZ r16, #8736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8736 delay_slots=5 */
+ 8651 "00000001" // /* MW 5 */
+ 8652 "01000000" // /* MW 4 */
+ 8653 "00010000" // /* MW 3 */
+ 8654 "00010001" // /* MW 2 */
+ 8655 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8659 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8661 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8663 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8665 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8666 "10111010" // LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8667 "01011000" // /* MW 9 */
+ 8668 "00000001" // /* MW 8 */
+ 8669 "00001000" // /* MW 7 */
+ 8670 "11101010" // /* MW 6 */
+ 8671 "00010111" // /* MW 5 */
+ 8672 "00111111" // /* MW 4 */
+ 8673 "11010000" // /* MW 3 */
+ 8674 "11101110" // /* MW 2 */
+ 8675 "11011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8676 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8677 "01010110" // /* MW 3 */
+ 8678 "11111110" // /* MW 2 */
+ 8679 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8680 "10011000" // LDA r19, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8681 "01110110" // /* MW 3 */
+ 8682 "11111110" // /* MW 2 */
+ 8683 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 8684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8685 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 8686 "10011000" // LDA r18, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8687 "01010110" // /* MW 3 */
+ 8688 "01000110" // /* MW 2 */
+ 8689 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8691 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8693 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8695 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8697 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 8698 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8699 "00100010" // /* MW 3 */
+ 8700 "11100101" // /* MW 2 */
+ 8701 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8702 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8703 "01010001" // /* MW 3 */
+ 8704 "00000110" // /* MW 2 */
+ 8705 "00001110" // /* MW 1 */
+ 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8707 "00000000" // /* MW 1 */
+ 8708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8709 "00000000" // /* MW 1 */
+ 8710 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */
+ 8711 "00000000" // /* MW 5 */
+ 8712 "00000000" // /* MW 4 */
+ 8713 "00101000" // /* MW 3 */
+ 8714 "00010001" // /* MW 2 */
+ 8715 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8717 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+.delay_slot
+ 8718 "00011000" // ACQ r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8719 "00011000" // /* MW 3 */
+ 8720 "10010011" // /* MW 2 */
+ 8721 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8723 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8726 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8727 "01111110" // /* MW 9 */
+ 8728 "10100101" // /* MW 8 */
+ 8729 "00000001" // /* MW 7 */
+ 8730 "00000000" // /* MW 6 */
+ 8731 "00010000" // /* MW 5 */
+ 8732 "00000000" // /* MW 4 */
+ 8733 "11110000" // /* MW 3 */
+ 8734 "00101100" // /* MW 2 */
+ 8735 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200
+ 8736 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */
+ 8737 "00000000" // /* MW 5 */
+ 8738 "00000000" // /* MW 4 */
+ 8739 "00101000" // /* MW 3 */
+ 8740 "00010001" // /* MW 2 */
+ 8741 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+.delay_slot
+ 8742 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8743 "00000101" // /* MW 3 */
+ 8744 "00100000" // /* MW 2 */
+ 8745 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8747 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8749 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8751 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8753 "00000000" // /* MW 15 */
+ 8754 "00000000" // /* MW 14 */
+ 8755 "01111000" // /* MW 13 */
+ 8756 "10100101" // /* MW 12 */
+ 8757 "00000001" // /* MW 11 */
+ 8758 "00000000" // /* MW 10 */
+ 8759 "00000000" // /* MW 9 */
+ 8760 "00000000" // /* MW 8 */
+ 8761 "01011011" // /* MW 7 */
+ 8762 "00000001" // /* MW 6 */
+ 8763 "00100000" // /* MW 5 */
+ 8764 "00000000" // /* MW 4 */
+ 8765 "11110000" // /* MW 3 */
+ 8766 "00101100" // /* MW 2 */
+ 8767 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232
+.src_ref 8 "superkernels.cpp" 516 45
+.src_ref 8 "superkernels.cpp" 522 6
+ 8768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8769 "00000000" // /* MW 15 */
+ 8770 "00000000" // /* MW 14 */
+ 8771 "01111000" // /* MW 13 */
+ 8772 "10100101" // /* MW 12 */
+ 8773 "00000001" // /* MW 11 */
+ 8774 "00101000" // /* MW 10 */
+ 8775 "00000000" // /* MW 9 */
+ 8776 "00000001" // /* MW 8 */
+ 8777 "01011011" // /* MW 7 */
+ 8778 "00000001" // /* MW 6 */
+ 8779 "00100000" // /* MW 5 */
+ 8780 "00000000" // /* MW 4 */
+ 8781 "11110000" // /* MW 3 */
+ 8782 "00101100" // /* MW 2 */
+ 8783 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248
+.src_ref 8 "superkernels.cpp" 516 47
+.src_ref 1 "io_buffer_main.h" 125 25
+ 8784 "10111010" // LDA p7, [sp, #-32]; MOVXM p6, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8785 "00010000" // /* MW 9 */
+ 8786 "01100110" // /* MW 8 */
+ 8787 "00110010" // /* MW 7 */
+ 8788 "11110011" // /* MW 6 */
+ 8789 "00000001" // /* MW 5 */
+ 8790 "00000000" // /* MW 4 */
+ 8791 "00100000" // /* MW 3 */
+ 8792 "01110011" // /* MW 2 */
+ 8793 "11111100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 47 first
+.src_ref 8 "superkernels.cpp" 522 6
+ 8794 "10111010" // LDA r21, [p6]; MOVXM p2, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8795 "00010000" // /* MW 9 */
+ 8796 "01101000" // /* MW 8 */
+ 8797 "00110010" // /* MW 7 */
+ 8798 "11110001" // /* MW 6 */
+ 8799 "00000001" // /* MW 5 */
+ 8800 "00000000" // /* MW 4 */
+ 8801 "11010000" // /* MW 3 */
+ 8802 "11010110" // /* MW 2 */
+ 8803 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8804 "10111010" // LDA r17, [p2]; MOVXM p6, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8805 "00010000" // /* MW 9 */
+ 8806 "01100000" // /* MW 8 */
+ 8807 "00110010" // /* MW 7 */
+ 8808 "11110011" // /* MW 6 */
+ 8809 "00000001" // /* MW 5 */
+ 8810 "00000000" // /* MW 4 */
+ 8811 "11010000" // /* MW 3 */
+ 8812 "11000110" // /* MW 2 */
+ 8813 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2 first
+ 8814 "10011000" // LDA r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8815 "10010110" // /* MW 3 */
+ 8816 "00000110" // /* MW 2 */
+ 8817 "00000110" // /* MW 1 */
+ 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8819 "00000000" // /* MW 1 */
+ 8820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8821 "00000000" // /* MW 1 */
+ 8822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8823 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 8824 "10011000" // LDA r19, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8825 "01110110" // /* MW 3 */
+ 8826 "00000110" // /* MW 2 */
+ 8827 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45 first
+ 8828 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8829 "00001101" // /* MW 3 */
+ 8830 "01101011" // /* MW 2 */
+ 8831 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8832 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8833 "00000111" // /* MW 3 */
+ 8834 "01100001" // /* MW 2 */
+ 8835 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8836 "10000100" // JNZ r16, #9232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9232 delay_slots=5 */
+ 8837 "00000001" // /* MW 5 */
+ 8838 "01000000" // /* MW 4 */
+ 8839 "00001000" // /* MW 3 */
+ 8840 "00010010" // /* MW 2 */
+ 8841 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2 first
+.delay_slot
+ 8842 "00011000" // ADD r20, r20, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8843 "00000111" // /* MW 3 */
+ 8844 "00101000" // /* MW 2 */
+ 8845 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 514 2
+.delay_slot
+ 8846 "10011000" // ST r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8847 "10010001" // /* MW 3 */
+ 8848 "00000110" // /* MW 2 */
+ 8849 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8851 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 45 first
+.delay_slot
+ 8852 "01011000" // ADD.NC p0, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8853 "11010101" // /* MW 3 */
+ 8854 "01101001" // /* MW 2 */
+ 8855 "00011000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 516 12
+.src_ref 8 "superkernels.cpp" 522 6
+.delay_slot
+ 8856 "01011100" // ST p0, [sp, #-68]; MOVX r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8857 "00100010" // /* MW 5 */
+ 8858 "01001000" // /* MW 4 */
+ 8859 "10110000" // /* MW 3 */
+ 8860 "10000011" // /* MW 2 */
+ 8861 "11110111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6 first
+ 8862 "10011000" // EQ r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8863 "00100111" // /* MW 3 */
+ 8864 "01100001" // /* MW 2 */
+ 8865 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8866 "10000100" // JNZ r16, #9088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9088 delay_slots=5 */
+ 8867 "00000001" // /* MW 5 */
+ 8868 "01000000" // /* MW 4 */
+ 8869 "11000000" // /* MW 3 */
+ 8870 "00010001" // /* MW 2 */
+ 8871 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8873 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8875 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8877 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8879 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8881 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8882 "10011000" // NE r16, r17, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8883 "11101000" // /* MW 3 */
+ 8884 "01100000" // /* MW 2 */
+ 8885 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 522 6
+ 8886 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */
+ 8887 "00000001" // /* MW 5 */
+ 8888 "01000000" // /* MW 4 */
+ 8889 "10101000" // /* MW 3 */
+ 8890 "00010001" // /* MW 2 */
+ 8891 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 26
+.delay_slot
+ 8892 "01000100" // MOVXM p6, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8893 "11000000" // /* MW 5 */
+ 8894 "11001001" // /* MW 4 */
+ 8895 "11001100" // /* MW 3 */
+ 8896 "00000111" // /* MW 2 */
+ 8897 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8899 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8901 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8903 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8905 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 26 first
+.src_ref 8 "superkernels.cpp" 523 61
+ 8906 "10111010" // LDA r18, [p6]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8907 "00010000" // /* MW 9 */
+ 8908 "00100100" // /* MW 8 */
+ 8909 "00110010" // /* MW 7 */
+ 8910 "11110011" // /* MW 6 */
+ 8911 "00000001" // /* MW 5 */
+ 8912 "00000000" // /* MW 4 */
+ 8913 "11010000" // /* MW 3 */
+ 8914 "11001010" // /* MW 2 */
+ 8915 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 61
+.src_ref 8 "superkernels.cpp" 524 44
+ 8916 "10111010" // LDA r16, [p6]; MOVXM p6, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8917 "00010000" // /* MW 9 */
+ 8918 "01101010" // /* MW 8 */
+ 8919 "00110010" // /* MW 7 */
+ 8920 "11110011" // /* MW 6 */
+ 8921 "00000001" // /* MW 5 */
+ 8922 "00000000" // /* MW 4 */
+ 8923 "11010000" // /* MW 3 */
+ 8924 "11000010" // /* MW 2 */
+ 8925 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+.src_ref 8 "superkernels.cpp" 524 44 first
+ 8926 "00101100" // LDA r17, [p6]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8927 "00000010" // /* MW 5 */
+ 8928 "01100000" // /* MW 4 */
+ 8929 "11010000" // /* MW 3 */
+ 8930 "11000110" // /* MW 2 */
+ 8931 "11000000" // /* MW 1 */
+ 8932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8933 "00000000" // /* MW 1 */
+ 8934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8935 "00000000" // /* MW 1 */
+ 8936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8937 "00000000" // /* MW 1 */
+ 8938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8939 "00000000" // /* MW 1 */
+ 8940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8941 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 523 37 first
+ 8942 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8943 "00001111" // /* MW 3 */
+ 8944 "10100101" // /* MW 2 */
+ 8945 "00010100" // /* MW 1 */
+ 8946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8947 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30 first
+.src_ref 8 "superkernels.cpp" 524 30 first
+ 8948 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8949 "10000010" // /* MW 5 */
+ 8950 "00110010" // /* MW 4 */
+ 8951 "00111010" // /* MW 3 */
+ 8952 "11100100" // /* MW 2 */
+ 8953 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8954 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8955 "00011100" // /* MW 3 */
+ 8956 "00110111" // /* MW 2 */
+ 8957 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8958 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8959 "00000010" // /* MW 3 */
+ 8960 "11100111" // /* MW 2 */
+ 8961 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 42
+ 8962 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8963 "00011100" // /* MW 3 */
+ 8964 "10110111" // /* MW 2 */
+ 8965 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 524 30
+ 8966 "00011000" // SEL.EQZ r17, r24, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8967 "00110010" // /* MW 3 */
+ 8968 "00100011" // /* MW 2 */
+ 8969 "00010110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 65 first
+ 8970 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8971 "00010001" // /* MW 3 */
+ 8972 "00100101" // /* MW 2 */
+ 8973 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 526 36 first
+ 8974 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8975 "00001000" // /* MW 3 */
+ 8976 "01100001" // /* MW 2 */
+ 8977 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 8978 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */
+ 8979 "00000001" // /* MW 5 */
+ 8980 "01000000" // /* MW 4 */
+ 8981 "01000000" // /* MW 3 */
+ 8982 "00010010" // /* MW 2 */
+ 8983 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 32
+.delay_slot
+ 8984 "01000100" // MOVXM p6, #509200 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8985 "00100000" // /* MW 5 */
+ 8986 "11001010" // /* MW 4 */
+ 8987 "11001100" // /* MW 3 */
+ 8988 "00000111" // /* MW 2 */
+ 8989 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 525 32 first
+.delay_slot
+ 8990 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8991 "01010001" // /* MW 3 */
+ 8992 "00000110" // /* MW 2 */
+ 8993 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8995 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8997 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8999 "00000000" // /* MW 1 */
+ 9000 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */
+ 9001 "00000000" // /* MW 5 */
+ 9002 "00000000" // /* MW 4 */
+ 9003 "11111000" // /* MW 3 */
+ 9004 "00010001" // /* MW 2 */
+ 9005 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.src_ref 8 "superkernels.cpp" 558 19
+.delay_slot
+ 9006 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9007 "00010000" // /* MW 9 */
+ 9008 "01101000" // /* MW 8 */
+ 9009 "10110010" // /* MW 7 */
+ 9010 "11110011" // /* MW 6 */
+ 9011 "00000001" // /* MW 5 */
+ 9012 "00000000" // /* MW 4 */
+ 9013 "00000000" // /* MW 3 */
+ 9014 "01001110" // /* MW 2 */
+ 9015 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9016 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9017 "00010000" // /* MW 9 */
+ 9018 "00100000" // /* MW 8 */
+ 9019 "00110010" // /* MW 7 */
+ 9020 "11110001" // /* MW 6 */
+ 9021 "00000001" // /* MW 5 */
+ 9022 "00000000" // /* MW 4 */
+ 9023 "00000000" // /* MW 3 */
+ 9024 "00101111" // /* MW 2 */
+ 9025 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9026 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9027 "00000001" // /* MW 3 */
+ 9028 "00011010" // /* MW 2 */
+ 9029 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9031 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9032 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9033 "00011100" // /* MW 7 */
+ 9034 "00000000" // /* MW 6 */
+ 9035 "00000000" // /* MW 5 */
+ 9036 "00000100" // /* MW 4 */
+ 9037 "11110000" // /* MW 3 */
+ 9038 "00101100" // /* MW 2 */
+ 9039 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504
+ 9040 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */
+ 9041 "00000000" // /* MW 5 */
+ 9042 "00000000" // /* MW 4 */
+ 9043 "11111000" // /* MW 3 */
+ 9044 "00010001" // /* MW 2 */
+ 9045 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.src_ref 8 "superkernels.cpp" 558 19
+.delay_slot
+ 9046 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9047 "00010000" // /* MW 9 */
+ 9048 "01101000" // /* MW 8 */
+ 9049 "10110010" // /* MW 7 */
+ 9050 "11110011" // /* MW 6 */
+ 9051 "00000001" // /* MW 5 */
+ 9052 "00000000" // /* MW 4 */
+ 9053 "00000000" // /* MW 3 */
+ 9054 "01001110" // /* MW 2 */
+ 9055 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9056 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9057 "00010000" // /* MW 9 */
+ 9058 "00100000" // /* MW 8 */
+ 9059 "00110010" // /* MW 7 */
+ 9060 "11110001" // /* MW 6 */
+ 9061 "00000001" // /* MW 5 */
+ 9062 "00000000" // /* MW 4 */
+ 9063 "00000000" // /* MW 3 */
+ 9064 "00101111" // /* MW 2 */
+ 9065 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9066 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9067 "00000001" // /* MW 3 */
+ 9068 "00011010" // /* MW 2 */
+ 9069 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9073 "00000000" // /* MW 15 */
+ 9074 "00000000" // /* MW 14 */
+ 9075 "01111000" // /* MW 13 */
+ 9076 "10100101" // /* MW 12 */
+ 9077 "00000001" // /* MW 11 */
+ 9078 "00000000" // /* MW 10 */
+ 9079 "00000000" // /* MW 9 */
+ 9080 "00000000" // /* MW 8 */
+ 9081 "01011011" // /* MW 7 */
+ 9082 "00000001" // /* MW 6 */
+ 9083 "00100000" // /* MW 5 */
+ 9084 "00000000" // /* MW 4 */
+ 9085 "11110000" // /* MW 3 */
+ 9086 "00101100" // /* MW 2 */
+ 9087 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552
+.src_ref 8 "superkernels.cpp" 532 27
+.src_ref 8 "superkernels.cpp" 533 31
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+ 9088 "10111010" // MOVA r13, #0; MOVXM p6, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9089 "00010000" // /* MW 9 */
+ 9090 "01110010" // /* MW 8 */
+ 9091 "00110010" // /* MW 7 */
+ 9092 "11110011" // /* MW 6 */
+ 9093 "00000001" // /* MW 5 */
+ 9094 "00000000" // /* MW 4 */
+ 9095 "00000000" // /* MW 3 */
+ 9096 "00001101" // /* MW 2 */
+ 9097 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 27 first
+.src_ref 8 "superkernels.cpp" 532 63
+.src_ref 8 "superkernels.cpp" 552 2
+ 9098 "10111010" // LDA r18, [p6]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9099 "00010000" // /* MW 9 */
+ 9100 "00100000" // /* MW 8 */
+ 9101 "00110010" // /* MW 7 */
+ 9102 "11110001" // /* MW 6 */
+ 9103 "00000001" // /* MW 5 */
+ 9104 "00000000" // /* MW 4 */
+ 9105 "11010000" // /* MW 3 */
+ 9106 "11001010" // /* MW 2 */
+ 9107 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 63
+.src_ref 8 "superkernels.cpp" 533 46
+ 9108 "10111010" // LDA r16, [p2]; MOVXM p6, #509144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9109 "00010000" // /* MW 9 */
+ 9110 "01101100" // /* MW 8 */
+ 9111 "00110010" // /* MW 7 */
+ 9112 "11110011" // /* MW 6 */
+ 9113 "00000001" // /* MW 5 */
+ 9114 "00000000" // /* MW 4 */
+ 9115 "11010000" // /* MW 3 */
+ 9116 "11000010" // /* MW 2 */
+ 9117 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 46 first
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+ 9118 "00101100" // LDA r17, [p6]; MOVX r15, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9119 "00001010" // /* MW 5 */
+ 9120 "00111100" // /* MW 4 */
+ 9121 "11010000" // /* MW 3 */
+ 9122 "11000110" // /* MW 2 */
+ 9123 "11000000" // /* MW 1 */
+ 9124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9125 "00000000" // /* MW 1 */
+ 9126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9127 "00000000" // /* MW 1 */
+ 9128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9129 "00000000" // /* MW 1 */
+ 9130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9131 "00000000" // /* MW 1 */
+ 9132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9133 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 532 39 first
+ 9134 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9135 "00001111" // /* MW 3 */
+ 9136 "10100101" // /* MW 2 */
+ 9137 "00010100" // /* MW 1 */
+ 9138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9139 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31 first
+.src_ref 8 "superkernels.cpp" 533 31 first
+ 9140 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9141 "10000010" // /* MW 5 */
+ 9142 "00110010" // /* MW 4 */
+ 9143 "00111010" // /* MW 3 */
+ 9144 "11100100" // /* MW 2 */
+ 9145 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9146 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9147 "00011100" // /* MW 3 */
+ 9148 "00110111" // /* MW 2 */
+ 9149 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9150 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9151 "00000010" // /* MW 3 */
+ 9152 "11100111" // /* MW 2 */
+ 9153 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 44
+ 9154 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9155 "00011100" // /* MW 3 */
+ 9156 "10110111" // /* MW 2 */
+ 9157 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 533 31
+ 9158 "00011000" // SEL.EQZ r17, r13, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9159 "00110010" // /* MW 3 */
+ 9160 "01100011" // /* MW 2 */
+ 9161 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 67 first
+ 9162 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9163 "00010001" // /* MW 3 */
+ 9164 "00100101" // /* MW 2 */
+ 9165 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 535 37 first
+ 9166 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9167 "00001000" // /* MW 3 */
+ 9168 "01100001" // /* MW 2 */
+ 9169 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 9170 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */
+ 9171 "00000001" // /* MW 5 */
+ 9172 "01000000" // /* MW 4 */
+ 9173 "01000000" // /* MW 3 */
+ 9174 "00010010" // /* MW 2 */
+ 9175 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 33
+.delay_slot
+ 9176 "01000100" // MOVXM p6, #509208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9177 "00110000" // /* MW 5 */
+ 9178 "11001010" // /* MW 4 */
+ 9179 "11001100" // /* MW 3 */
+ 9180 "00000111" // /* MW 2 */
+ 9181 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 534 33 first
+.delay_slot
+ 9182 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9183 "01010001" // /* MW 3 */
+ 9184 "00000110" // /* MW 2 */
+ 9185 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9187 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9189 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.delay_slot
+ 9190 "10111010" // NOPA; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9191 "00010000" // /* MW 9 */
+ 9192 "01101000" // /* MW 8 */
+ 9193 "10110010" // /* MW 7 */
+ 9194 "11110011" // /* MW 6 */
+ 9195 "00000001" // /* MW 5 */
+ 9196 "00000000" // /* MW 4 */
+ 9197 "11110000" // /* MW 3 */
+ 9198 "00101100" // /* MW 2 */
+ 9199 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9200 "00111010" // MOVS p6, r12; J #9408 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */
+ 9201 "00100001" // /* MW 9 */
+ 9202 "00000000" // /* MW 8 */
+ 9203 "00000000" // /* MW 7 */
+ 9204 "10011000" // /* MW 6 */
+ 9205 "00000100" // /* MW 5 */
+ 9206 "00000000" // /* MW 4 */
+ 9207 "01100000" // /* MW 3 */
+ 9208 "10000001" // /* MW 2 */
+ 9209 "11010001" // /* MW 1 */
+.delay_slot
+ 9210 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9211 "10010001" // /* MW 3 */
+ 9212 "11100101" // /* MW 2 */
+ 9213 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9215 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9217 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9219 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9220 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9221 "10000001" // /* MW 11 */
+ 9222 "10101101" // /* MW 10 */
+ 9223 "00000000" // /* MW 9 */
+ 9224 "00000000" // /* MW 8 */
+ 9225 "00000000" // /* MW 7 */
+ 9226 "00000000" // /* MW 6 */
+ 9227 "00100000" // /* MW 5 */
+ 9228 "00000000" // /* MW 4 */
+ 9229 "11110000" // /* MW 3 */
+ 9230 "00101100" // /* MW 2 */
+ 9231 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696
+.src_ref 8 "superkernels.cpp" 541 26
+ 9232 "01000100" // MOVXM p6, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9233 "10001000" // /* MW 5 */
+ 9234 "11001001" // /* MW 4 */
+ 9235 "11001100" // /* MW 3 */
+ 9236 "00000111" // /* MW 2 */
+ 9237 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 26 first
+.src_ref 8 "superkernels.cpp" 541 61
+ 9238 "10111010" // LDA r19, [p6]; MOVXM p6, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9239 "00010000" // /* MW 9 */
+ 9240 "00100010" // /* MW 8 */
+ 9241 "00110010" // /* MW 7 */
+ 9242 "11110011" // /* MW 6 */
+ 9243 "00000001" // /* MW 5 */
+ 9244 "00000000" // /* MW 4 */
+ 9245 "11010000" // /* MW 3 */
+ 9246 "11001110" // /* MW 2 */
+ 9247 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 61
+.src_ref 8 "superkernels.cpp" 542 44
+ 9248 "10111010" // LDA r16, [p6]; MOVXM p6, #509148 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9249 "00010000" // /* MW 9 */
+ 9250 "01101110" // /* MW 8 */
+ 9251 "00110010" // /* MW 7 */
+ 9252 "11110011" // /* MW 6 */
+ 9253 "00000001" // /* MW 5 */
+ 9254 "00000000" // /* MW 4 */
+ 9255 "11010000" // /* MW 3 */
+ 9256 "11000010" // /* MW 2 */
+ 9257 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 44 first
+ 9258 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9259 "01010110" // /* MW 3 */
+ 9260 "00000110" // /* MW 2 */
+ 9261 "00000110" // /* MW 1 */
+ 9262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9263 "00000000" // /* MW 1 */
+ 9264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9265 "00000000" // /* MW 1 */
+ 9266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9267 "00000000" // /* MW 1 */
+ 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9269 "00000000" // /* MW 1 */
+ 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9271 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 541 37 first
+ 9272 "10011000" // MUL r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9273 "00001111" // /* MW 3 */
+ 9274 "11100111" // /* MW 2 */
+ 9275 "00010100" // /* MW 1 */
+ 9276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9277 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30 first
+.src_ref 8 "superkernels.cpp" 542 30 first
+ 9278 "10100100" // SUB r20, r18, r19; ADD.NC r21, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9279 "10000010" // /* MW 5 */
+ 9280 "10110011" // /* MW 4 */
+ 9281 "00111010" // /* MW 3 */
+ 9282 "00100110" // /* MW 2 */
+ 9283 "10010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9284 "10011000" // LTU r27, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9285 "00101100" // /* MW 3 */
+ 9286 "01110111" // /* MW 2 */
+ 9287 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9288 "00011000" // SEL.EQZ r20, r20, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9289 "00000010" // /* MW 3 */
+ 9290 "00101001" // /* MW 2 */
+ 9291 "00010101" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+.src_ref 8 "superkernels.cpp" 542 42
+ 9292 "01100100" // LTU r27, r19, r18; MOV r17, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9293 "00000001" // /* MW 5 */
+ 9294 "10100000" // /* MW 4 */
+ 9295 "10011000" // /* MW 3 */
+ 9296 "11100101" // /* MW 2 */
+ 9297 "10011110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 542 30
+ 9298 "00011000" // SEL.EQZ r17, r17, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9299 "01000010" // /* MW 3 */
+ 9300 "01100011" // /* MW 2 */
+ 9301 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 69 first
+ 9302 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9303 "00010001" // /* MW 3 */
+ 9304 "00100101" // /* MW 2 */
+ 9305 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 544 38 first
+ 9306 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9307 "00000111" // /* MW 3 */
+ 9308 "01100001" // /* MW 2 */
+ 9309 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 547 6 first
+ 9310 "10000100" // JNZ r16, #10176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10176 delay_slots=5 */
+ 9311 "00000001" // /* MW 5 */
+ 9312 "01000000" // /* MW 4 */
+ 9313 "11100000" // /* MW 3 */
+ 9314 "00010011" // /* MW 2 */
+ 9315 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 34
+.delay_slot
+ 9316 "01000100" // MOVXM p6, #509216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9317 "01000000" // /* MW 5 */
+ 9318 "11001010" // /* MW 4 */
+ 9319 "11001100" // /* MW 3 */
+ 9320 "00000111" // /* MW 2 */
+ 9321 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 543 34 first
+.delay_slot
+ 9322 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9323 "01010001" // /* MW 3 */
+ 9324 "00000110" // /* MW 2 */
+ 9325 "00001110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9327 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9329 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 9330 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 9331 "00011100" // /* MW 13 */
+ 9332 "00000000" // /* MW 12 */
+ 9333 "00000000" // /* MW 11 */
+ 9334 "01010111" // /* MW 10 */
+ 9335 "00011010" // /* MW 9 */
+ 9336 "01000000" // /* MW 8 */
+ 9337 "00000000" // /* MW 7 */
+ 9338 "00000000" // /* MW 6 */
+ 9339 "10110110" // /* MW 5 */
+ 9340 "00000010" // /* MW 4 */
+ 9341 "11110000" // /* MW 3 */
+ 9342 "00101100" // /* MW 2 */
+ 9343 "00000000" // /* MW 1 */
+.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 8 "superkernels.cpp" 558 19
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 9344 "01110110" // LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9345 "01011000" // /* MW 11 */
+ 9346 "00000001" // /* MW 10 */
+ 9347 "11101000" // /* MW 9 */
+ 9348 "01001001" // /* MW 8 */
+ 9349 "11100000" // /* MW 7 */
+ 9350 "00000000" // /* MW 6 */
+ 9351 "00001011" // /* MW 5 */
+ 9352 "10001100" // /* MW 4 */
+ 9353 "00100110" // /* MW 3 */
+ 9354 "10000011" // /* MW 2 */
+ 9355 "11110111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9356 "00011000" // LDA p1, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9357 "10011001" // /* MW 3 */
+ 9358 "10111100" // /* MW 2 */
+ 9359 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 9360 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9361 "10010001" // /* MW 3 */
+ 9362 "11100101" // /* MW 2 */
+ 9363 "00000111" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 287 11 first
+.aggressive_scheduled_block_id 7
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9364 "00000100" // JL #4176 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4176 delay_slots=5 */
+ 9365 "00000001" // /* MW 5 */
+ 9366 "00000000" // /* MW 4 */
+ 9367 "00101000" // /* MW 3 */
+ 9368 "00001000" // /* MW 2 */
+ 9369 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.delay_slot
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9370 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9371 "11000000" // /* MW 3 */
+ 9372 "01100000" // /* MW 2 */
+ 9373 "00011111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.delay_slot
+ 9374 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9375 "00000001" // /* MW 3 */
+ 9376 "00011010" // /* MW 2 */
+ 9377 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9379 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9381 "00000000" // /* MW 1 */
+.src_ref 3 "pad_3d.h" 287 11
+.delay_slot
+ 9382 "10111010" // NOPA; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9383 "00010000" // /* MW 9 */
+ 9384 "10000000" // /* MW 8 */
+ 9385 "00110010" // /* MW 7 */
+ 9386 "11110001" // /* MW 6 */
+ 9387 "00000001" // /* MW 5 */
+ 9388 "00000000" // /* MW 4 */
+ 9389 "11110000" // /* MW 3 */
+ 9390 "00101100" // /* MW 2 */
+ 9391 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.src_ref 8 "superkernels.cpp" 552 2
+.return_address
+ 9392 "00111010" // MOVS p0, p7; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9393 "00010001" // /* MW 9 */
+ 9394 "00100000" // /* MW 8 */
+ 9395 "00110010" // /* MW 7 */
+ 9396 "11110001" // /* MW 6 */
+ 9397 "00000001" // /* MW 5 */
+ 9398 "00000000" // /* MW 4 */
+ 9399 "01100000" // /* MW 3 */
+ 9400 "10010001" // /* MW 2 */
+ 9401 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+ 9402 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9403 "10100000" // /* MW 5 */
+ 9404 "11001001" // /* MW 4 */
+ 9405 "11001110" // /* MW 3 */
+ 9406 "00000111" // /* MW 2 */
+ 9407 "00000000" // /* MW 1 */
+.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9408 "10011000" // LDA p1, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9409 "10011110" // /* MW 3 */
+ 9410 "01011100" // /* MW 2 */
+ 9411 "00000110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2 first
+.no_stack_arguments
+ 9412 "00000100" // JL #4848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4848 delay_slots=5 */
+ 9413 "00000001" // /* MW 5 */
+ 9414 "00000000" // /* MW 4 */
+ 9415 "01111000" // /* MW 3 */
+ 9416 "00001001" // /* MW 2 */
+ 9417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9421 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9423 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 9427 "00011100" // /* MW 13 */
+ 9428 "00000000" // /* MW 12 */
+ 9429 "00000000" // /* MW 11 */
+ 9430 "01010111" // /* MW 10 */
+ 9431 "00011010" // /* MW 9 */
+ 9432 "01000000" // /* MW 8 */
+ 9433 "00000000" // /* MW 7 */
+ 9434 "00000000" // /* MW 6 */
+ 9435 "10110110" // /* MW 5 */
+ 9436 "00000010" // /* MW 4 */
+ 9437 "11110000" // /* MW 3 */
+ 9438 "00101100" // /* MW 2 */
+ 9439 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7 first
+.return_address
+ 9440 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9441 "00010110" // /* MW 3 */
+ 9442 "00000110" // /* MW 2 */
+ 9443 "00000111" // /* MW 1 */
+ 9444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9445 "00000000" // /* MW 1 */
+ 9446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9447 "00000000" // /* MW 1 */
+ 9448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9449 "00000000" // /* MW 1 */
+ 9450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9451 "00000000" // /* MW 1 */
+ 9452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9453 "00000000" // /* MW 1 */
+ 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9455 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 19
+ 9456 "10011000" // NE r17, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9457 "00001000" // /* MW 3 */
+ 9458 "11100011" // /* MW 2 */
+ 9459 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 25
+ 9460 "10000100" // JNZ r17, #9664 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9664 delay_slots=5 */
+ 9461 "00000001" // /* MW 5 */
+ 9462 "01000000" // /* MW 4 */
+ 9463 "11100000" // /* MW 3 */
+ 9464 "00010010" // /* MW 2 */
+ 9465 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9475 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 555 15
+ 9476 "01000100" // MOVXM p7, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9477 "10001000" // /* MW 5 */
+ 9478 "11001001" // /* MW 4 */
+ 9479 "11001110" // /* MW 3 */
+ 9480 "00000111" // /* MW 2 */
+ 9481 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 554 67
+ 9482 "10111010" // LDA r16, [p7]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9483 "00010000" // /* MW 9 */
+ 9484 "00110000" // /* MW 8 */
+ 9485 "00110010" // /* MW 7 */
+ 9486 "11110001" // /* MW 6 */
+ 9487 "00000001" // /* MW 5 */
+ 9488 "00000000" // /* MW 4 */
+ 9489 "11010000" // /* MW 3 */
+ 9490 "11000010" // /* MW 2 */
+ 9491 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 67
+ 9492 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9493 "00111010" // /* MW 3 */
+ 9494 "00000100" // /* MW 2 */
+ 9495 "00000010" // /* MW 1 */
+ 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9497 "00000000" // /* MW 1 */
+ 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9499 "00000000" // /* MW 1 */
+ 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9501 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.no_stack_arguments
+ 9502 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9503 "00000001" // /* MW 5 */
+ 9504 "00000000" // /* MW 4 */
+ 9505 "11111000" // /* MW 3 */
+ 9506 "00010011" // /* MW 2 */
+ 9507 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9509 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.delay_slot
+ 9510 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9511 "00000111" // /* MW 3 */
+ 9512 "00100000" // /* MW 2 */
+ 9513 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 29
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9514 "01011100" // ST r16, [p7]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9515 "10110101" // /* MW 5 */
+ 9516 "01101101" // /* MW 4 */
+ 9517 "00111000" // /* MW 3 */
+ 9518 "11000010" // /* MW 2 */
+ 9519 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9520 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9521 "01000001" // /* MW 5 */
+ 9522 "10111011" // /* MW 4 */
+ 9523 "00110111" // /* MW 3 */
+ 9524 "01100000" // /* MW 2 */
+ 9525 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.delay_slot
+ 9526 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9527 "00010010" // /* MW 9 */
+ 9528 "00000001" // /* MW 8 */
+ 9529 "00000100" // /* MW 7 */
+ 9530 "00000000" // /* MW 6 */
+ 9531 "01011011" // /* MW 5 */
+ 9532 "00000001" // /* MW 4 */
+ 9533 "11110000" // /* MW 3 */
+ 9534 "00101100" // /* MW 2 */
+ 9535 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 9536 "10111010" // LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9537 "01111000" // /* MW 9 */
+ 9538 "11010000" // /* MW 8 */
+ 9539 "01101011" // /* MW 7 */
+ 9540 "10001111" // /* MW 6 */
+ 9541 "00000001" // /* MW 5 */
+ 9542 "00011011" // /* MW 4 */
+ 9543 "00100000" // /* MW 3 */
+ 9544 "10100011" // /* MW 2 */
+ 9545 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 43
+ 9546 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9547 "00000010" // /* MW 3 */
+ 9548 "11100001" // /* MW 2 */
+ 9549 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 6
+.src_ref 8 "superkernels.cpp" 554 78
+ 9550 "10000100" // JNZ r16, #9632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9632 delay_slots=5 */
+ 9551 "00000001" // /* MW 5 */
+ 9552 "01000000" // /* MW 4 */
+ 9553 "11010000" // /* MW 3 */
+ 9554 "00010010" // /* MW 2 */
+ 9555 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 9556 "00011000" // MOVX r15, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9557 "00000101" // /* MW 3 */
+ 9558 "00011110" // /* MW 2 */
+ 9559 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9561 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9563 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9565 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9567 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 555 15 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9568 "00001100" // LDA r16, [p2, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9569 "01100011" // /* MW 5 */
+ 9570 "00001011" // /* MW 4 */
+ 9571 "11011110" // /* MW 3 */
+ 9572 "11000010" // /* MW 2 */
+ 9573 "01001010" // /* MW 1 */
+ 9574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9575 "00000000" // /* MW 1 */
+ 9576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9577 "00000000" // /* MW 1 */
+ 9578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9579 "00000000" // /* MW 1 */
+ 9580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9581 "00000000" // /* MW 1 */
+ 9582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9583 "00000000" // /* MW 1 */
+ 9584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9585 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9586 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9587 "11111000" // /* MW 3 */
+ 9588 "00010000" // /* MW 2 */
+ 9589 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 7
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9590 "10111010" // LDA r16, [p6, #-8]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9591 "00010000" // /* MW 9 */
+ 9592 "01101000" // /* MW 8 */
+ 9593 "10110010" // /* MW 7 */
+ 9594 "11110011" // /* MW 6 */
+ 9595 "00000001" // /* MW 5 */
+ 9596 "00000000" // /* MW 4 */
+ 9597 "11010000" // /* MW 3 */
+ 9598 "11000010" // /* MW 2 */
+ 9599 "11011100" // /* MW 1 */
+ 9600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9601 "00000000" // /* MW 1 */
+ 9602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9603 "00000000" // /* MW 1 */
+ 9604 "10000100" // J #9648 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9648 delay_slots=5 */
+ 9605 "00000000" // /* MW 5 */
+ 9606 "00000000" // /* MW 4 */
+ 9607 "11011000" // /* MW 3 */
+ 9608 "00010010" // /* MW 2 */
+ 9609 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.delay_slot
+ 9616 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9617 "00000001" // /* MW 3 */
+ 9618 "11100001" // /* MW 2 */
+ 9619 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.delay_slot
+ 9620 "00110110" // NOPA; NOPB; ST r16, [p6, #-8]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9621 "11000001" // /* MW 11 */
+ 9622 "00001000" // /* MW 10 */
+ 9623 "01110011" // /* MW 9 */
+ 9624 "00000011" // /* MW 8 */
+ 9625 "00000000" // /* MW 7 */
+ 9626 "00000000" // /* MW 6 */
+ 9627 "00100000" // /* MW 5 */
+ 9628 "00000000" // /* MW 4 */
+ 9629 "11110000" // /* MW 3 */
+ 9630 "00101100" // /* MW 2 */
+ 9631 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096
+.src_ref 8 "superkernels.cpp" 558 7
+ 9632 "11100001" // NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9633 "00000000" // /* MW 15 */
+ 9634 "00000000" // /* MW 14 */
+ 9635 "00010000" // /* MW 13 */
+ 9636 "01101000" // /* MW 12 */
+ 9637 "10110010" // /* MW 11 */
+ 9638 "11110011" // /* MW 10 */
+ 9639 "00000001" // /* MW 9 */
+ 9640 "00000000" // /* MW 8 */
+ 9641 "01011011" // /* MW 7 */
+ 9642 "00000001" // /* MW 6 */
+ 9643 "00100000" // /* MW 5 */
+ 9644 "00000000" // /* MW 4 */
+ 9645 "11110000" // /* MW 3 */
+ 9646 "00101100" // /* MW 2 */
+ 9647 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112
+.src_ref 8 "superkernels.cpp" 558 7 first
+ 9648 "11100001" // LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9649 "00000000" // /* MW 15 */
+ 9650 "00000000" // /* MW 14 */
+ 9651 "01111000" // /* MW 13 */
+ 9652 "10100101" // /* MW 12 */
+ 9653 "00000001" // /* MW 11 */
+ 9654 "00000000" // /* MW 10 */
+ 9655 "00000000" // /* MW 9 */
+ 9656 "00000000" // /* MW 8 */
+ 9657 "01011011" // /* MW 7 */
+ 9658 "00000001" // /* MW 6 */
+ 9659 "00100000" // /* MW 5 */
+ 9660 "00000000" // /* MW 4 */
+ 9661 "11010000" // /* MW 3 */
+ 9662 "11000010" // /* MW 2 */
+ 9663 "11100000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128
+.src_ref 8 "superkernels.cpp" 558 43
+ 9664 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9665 "00000001" // /* MW 3 */
+ 9666 "00100010" // /* MW 2 */
+ 9667 "00010000" // /* MW 1 */
+ 9668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9669 "00000000" // /* MW 1 */
+ 9670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9671 "00000000" // /* MW 1 */
+ 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9673 "00000000" // /* MW 1 */
+ 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9675 "00000000" // /* MW 1 */
+ 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9677 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 19
+ 9678 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9679 "00001000" // /* MW 3 */
+ 9680 "10100001" // /* MW 2 */
+ 9681 "00010011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 25
+ 9682 "10000100" // JNZ r16, #9872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9872 delay_slots=5 */
+ 9683 "00000001" // /* MW 5 */
+ 9684 "01000000" // /* MW 4 */
+ 9685 "01001000" // /* MW 3 */
+ 9686 "00010011" // /* MW 2 */
+ 9687 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.src_ref 8 "superkernels.cpp" 559 15
+.delay_slot
+ 9688 "01000100" // MOVXM p7, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9689 "11000000" // /* MW 5 */
+ 9690 "11001001" // /* MW 4 */
+ 9691 "11001110" // /* MW 3 */
+ 9692 "00000111" // /* MW 2 */
+ 9693 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 67
+.delay_slot
+ 9694 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9695 "11000000" // /* MW 5 */
+ 9696 "11001000" // /* MW 4 */
+ 9697 "11000100" // /* MW 3 */
+ 9698 "00000111" // /* MW 2 */
+ 9699 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9701 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9703 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9705 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+ 9706 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9707 "00010110" // /* MW 3 */
+ 9708 "00000110" // /* MW 2 */
+ 9709 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 67
+ 9710 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9711 "00111010" // /* MW 3 */
+ 9712 "00000100" // /* MW 2 */
+ 9713 "00000010" // /* MW 1 */
+ 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9715 "00000000" // /* MW 1 */
+ 9716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9717 "00000000" // /* MW 1 */
+ 9718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9719 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.no_stack_arguments
+ 9720 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9721 "00000001" // /* MW 5 */
+ 9722 "00000000" // /* MW 4 */
+ 9723 "11111000" // /* MW 3 */
+ 9724 "00010011" // /* MW 2 */
+ 9725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9727 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.delay_slot
+ 9728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9729 "00000111" // /* MW 3 */
+ 9730 "00100000" // /* MW 2 */
+ 9731 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 29
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9732 "01011100" // ST r16, [p7]; LT r27, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9733 "00110101" // /* MW 5 */
+ 9734 "01101110" // /* MW 4 */
+ 9735 "00111000" // /* MW 3 */
+ 9736 "11000010" // /* MW 2 */
+ 9737 "11100000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9738 "11100100" // SUB r17, r17, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9739 "01000001" // /* MW 5 */
+ 9740 "00111011" // /* MW 4 */
+ 9741 "00110111" // /* MW 3 */
+ 9742 "01100000" // /* MW 2 */
+ 9743 "10001100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.delay_slot
+ 9744 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9745 "00000000" // /* MW 15 */
+ 9746 "00000000" // /* MW 14 */
+ 9747 "01111000" // /* MW 13 */
+ 9748 "10100101" // /* MW 12 */
+ 9749 "00000001" // /* MW 11 */
+ 9750 "10010000" // /* MW 10 */
+ 9751 "00001000" // /* MW 9 */
+ 9752 "00100000" // /* MW 8 */
+ 9753 "01011011" // /* MW 7 */
+ 9754 "00000001" // /* MW 6 */
+ 9755 "00100000" // /* MW 5 */
+ 9756 "00000000" // /* MW 4 */
+ 9757 "11110000" // /* MW 3 */
+ 9758 "00101100" // /* MW 2 */
+ 9759 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 9760 "10111010" // LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9761 "01111000" // /* MW 9 */
+ 9762 "10010000" // /* MW 8 */
+ 9763 "01101011" // /* MW 7 */
+ 9764 "10001111" // /* MW 6 */
+ 9765 "00000001" // /* MW 5 */
+ 9766 "00011011" // /* MW 4 */
+ 9767 "00100000" // /* MW 3 */
+ 9768 "10010011" // /* MW 2 */
+ 9769 "11111011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 43
+ 9770 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9771 "00000010" // /* MW 3 */
+ 9772 "11100001" // /* MW 2 */
+ 9773 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 558 6
+.src_ref 8 "superkernels.cpp" 558 78
+ 9774 "10000100" // JNZ r16, #9840 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9840 delay_slots=5 */
+ 9775 "00000001" // /* MW 5 */
+ 9776 "01000000" // /* MW 4 */
+ 9777 "00111000" // /* MW 3 */
+ 9778 "00010011" // /* MW 2 */
+ 9779 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 7
+.delay_slot
+ 9780 "01000100" // MOVXM p2, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9781 "10100000" // /* MW 5 */
+ 9782 "11001001" // /* MW 4 */
+ 9783 "11000100" // /* MW 3 */
+ 9784 "00000111" // /* MW 2 */
+ 9785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9789 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9791 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9793 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 559 15 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 9794 "00001100" // LDA r16, [p1, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9795 "01100011" // /* MW 5 */
+ 9796 "00001011" // /* MW 4 */
+ 9797 "11011110" // /* MW 3 */
+ 9798 "11000010" // /* MW 2 */
+ 9799 "00101010" // /* MW 1 */
+ 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9801 "00000000" // /* MW 1 */
+ 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9803 "00000000" // /* MW 1 */
+ 9804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9805 "00000000" // /* MW 1 */
+ 9806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9807 "00000000" // /* MW 1 */
+ 9808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9809 "00000000" // /* MW 1 */
+ 9810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9812 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9813 "11111000" // /* MW 3 */
+ 9814 "00010000" // /* MW 2 */
+ 9815 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9816 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9817 "00010110" // /* MW 3 */
+ 9818 "11100110" // /* MW 2 */
+ 9819 "00000110" // /* MW 1 */
+ 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9821 "00000000" // /* MW 1 */
+ 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9823 "00000000" // /* MW 1 */
+ 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9825 "00000000" // /* MW 1 */
+ 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9827 "00000000" // /* MW 1 */
+ 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9829 "00000000" // /* MW 1 */
+ 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9831 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 9832 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9833 "00000001" // /* MW 3 */
+ 9834 "11100001" // /* MW 2 */
+ 9835 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 9836 "10011000" // ST r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9837 "00010001" // /* MW 3 */
+ 9838 "11100110" // /* MW 2 */
+ 9839 "00001110" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304
+ 9840 "10000100" // J #9888 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9888 delay_slots=5 */
+ 9841 "00000000" // /* MW 5 */
+ 9842 "00000000" // /* MW 4 */
+ 9843 "01010000" // /* MW 3 */
+ 9844 "00010011" // /* MW 2 */
+ 9845 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.delay_slot
+ 9846 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9847 "11000000" // /* MW 3 */
+ 9848 "01100010" // /* MW 2 */
+ 9849 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9851 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9853 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9855 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9856 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9857 "00000000" // /* MW 15 */
+ 9858 "00000000" // /* MW 14 */
+ 9859 "01111000" // /* MW 13 */
+ 9860 "10100101" // /* MW 12 */
+ 9861 "00000001" // /* MW 11 */
+ 9862 "00000000" // /* MW 10 */
+ 9863 "00000000" // /* MW 9 */
+ 9864 "00000000" // /* MW 8 */
+ 9865 "01011011" // /* MW 7 */
+ 9866 "00000001" // /* MW 6 */
+ 9867 "00100000" // /* MW 5 */
+ 9868 "00000000" // /* MW 4 */
+ 9869 "11110000" // /* MW 3 */
+ 9870 "00101100" // /* MW 2 */
+ 9871 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336
+.src_ref 8 "superkernels.cpp" 562 7
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9872 "11100001" // LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9873 "00000000" // /* MW 15 */
+ 9874 "00000000" // /* MW 14 */
+ 9875 "00010000" // /* MW 13 */
+ 9876 "01101000" // /* MW 12 */
+ 9877 "00110010" // /* MW 11 */
+ 9878 "11110001" // /* MW 10 */
+ 9879 "00000001" // /* MW 9 */
+ 9880 "00000000" // /* MW 8 */
+ 9881 "01011011" // /* MW 7 */
+ 9882 "00000001" // /* MW 6 */
+ 9883 "00100000" // /* MW 5 */
+ 9884 "00000000" // /* MW 4 */
+ 9885 "00100000" // /* MW 3 */
+ 9886 "11110011" // /* MW 2 */
+ 9887 "11111011" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352
+.src_ref 8 "superkernels.cpp" 562 7 first
+.src_ref 8 "superkernels.cpp" 562 19
+ 9888 "00101100" // LDA r16, [p2]; MOVX r17, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9889 "00100010" // /* MW 5 */
+ 9890 "01000100" // /* MW 4 */
+ 9891 "11010000" // /* MW 3 */
+ 9892 "11000010" // /* MW 2 */
+ 9893 "01000000" // /* MW 1 */
+ 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9895 "00000000" // /* MW 1 */
+ 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9897 "00000000" // /* MW 1 */
+ 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9899 "00000000" // /* MW 1 */
+ 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9901 "00000000" // /* MW 1 */
+ 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9903 "00000000" // /* MW 1 */
+ 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9905 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 19
+ 9906 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9907 "00001000" // /* MW 3 */
+ 9908 "01100001" // /* MW 2 */
+ 9909 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 25
+ 9910 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */
+ 9911 "00000001" // /* MW 5 */
+ 9912 "01000000" // /* MW 4 */
+ 9913 "10101000" // /* MW 3 */
+ 9914 "00010011" // /* MW 2 */
+ 9915 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.delay_slot
+ 9916 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9917 "11001000" // /* MW 5 */
+ 9918 "11001001" // /* MW 4 */
+ 9919 "11000100" // /* MW 3 */
+ 9920 "00000111" // /* MW 2 */
+ 9921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9929 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.src_ref 8 "superkernels.cpp" 562 68
+ 9930 "10111010" // LDA r16, [p2]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9931 "00010000" // /* MW 9 */
+ 9932 "00110000" // /* MW 8 */
+ 9933 "10110010" // /* MW 7 */
+ 9934 "11110000" // /* MW 6 */
+ 9935 "00000001" // /* MW 5 */
+ 9936 "00000000" // /* MW 4 */
+ 9937 "11010000" // /* MW 3 */
+ 9938 "11000010" // /* MW 2 */
+ 9939 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 68
+ 9940 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9941 "00111010" // /* MW 3 */
+ 9942 "00000100" // /* MW 2 */
+ 9943 "00000001" // /* MW 1 */
+ 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9945 "00000000" // /* MW 1 */
+ 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9947 "00000000" // /* MW 1 */
+ 9948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9949 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.no_stack_arguments
+ 9950 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */
+ 9951 "00000001" // /* MW 5 */
+ 9952 "00000000" // /* MW 4 */
+ 9953 "11111000" // /* MW 3 */
+ 9954 "00010011" // /* MW 2 */
+ 9955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9957 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.delay_slot
+ 9958 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9959 "00000111" // /* MW 3 */
+ 9960 "00100000" // /* MW 2 */
+ 9961 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 29
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9962 "01011100" // ST r16, [p2]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9963 "10110101" // /* MW 5 */
+ 9964 "01101101" // /* MW 4 */
+ 9965 "00111000" // /* MW 3 */
+ 9966 "11000010" // /* MW 2 */
+ 9967 "01000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9968 "11100100" // SUB r17, r13, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9969 "01000001" // /* MW 5 */
+ 9970 "00111011" // /* MW 4 */
+ 9971 "00110111" // /* MW 3 */
+ 9972 "01100000" // /* MW 2 */
+ 9973 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.delay_slot
+ 9974 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9975 "00010010" // /* MW 9 */
+ 9976 "00000001" // /* MW 8 */
+ 9977 "00000100" // /* MW 7 */
+ 9978 "00000000" // /* MW 6 */
+ 9979 "01011011" // /* MW 5 */
+ 9980 "00000001" // /* MW 4 */
+ 9981 "11110000" // /* MW 3 */
+ 9982 "00101100" // /* MW 2 */
+ 9983 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 562 44
+.return_address
+ 9984 "11100100" // SUB r16, r13, r3; MOV r27, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9985 "01000001" // /* MW 5 */
+ 9986 "10101110" // /* MW 4 */
+ 9987 "00111101" // /* MW 3 */
+ 9988 "00000110" // /* MW 2 */
+ 9989 "01101100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 44
+ 9990 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9991 "00000010" // /* MW 3 */
+ 9992 "11100001" // /* MW 2 */
+ 9993 "00010000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 562 6
+.src_ref 8 "superkernels.cpp" 562 79
+ 9994 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */
+ 9995 "00000001" // /* MW 5 */
+ 9996 "01000000" // /* MW 4 */
+ 9997 "10101000" // /* MW 3 */
+ 9998 "00010011" // /* MW 2 */
+ 9999 "10000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 563 16
+.delay_slot
+ 10000 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10001 "11001000" // /* MW 5 */
+ 10002 "11001001" // /* MW 4 */
+ 10003 "11000100" // /* MW 3 */
+ 10004 "00000111" // /* MW 2 */
+ 10005 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10007 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10013 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 563 16 first
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 10014 "00001100" // LDA r16, [p7, #20]; ST r13, [p2] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10015 "01100011" // /* MW 5 */
+ 10016 "00001011" // /* MW 4 */
+ 10017 "11010100" // /* MW 3 */
+ 10018 "11000010" // /* MW 2 */
+ 10019 "11101010" // /* MW 1 */
+ 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10021 "00000000" // /* MW 1 */
+ 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10023 "00000000" // /* MW 1 */
+ 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10025 "00000000" // /* MW 1 */
+ 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10027 "00000000" // /* MW 1 */
+ 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10029 "00000000" // /* MW 1 */
+ 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10031 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 10032 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10033 "11111000" // /* MW 3 */
+ 10034 "00010000" // /* MW 2 */
+ 10035 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 10036 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10037 "00010110" // /* MW 3 */
+ 10038 "11100110" // /* MW 2 */
+ 10039 "00000110" // /* MW 1 */
+ 10040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10041 "00000000" // /* MW 1 */
+ 10042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10043 "00000000" // /* MW 1 */
+ 10044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10045 "00000000" // /* MW 1 */
+ 10046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10047 "00000000" // /* MW 1 */
+ 10048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10049 "00000000" // /* MW 1 */
+ 10050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10051 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 10052 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10053 "00000001" // /* MW 3 */
+ 10054 "11100001" // /* MW 2 */
+ 10055 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 10056 "00000010" // ST r16, [p6, #-8]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10057 "01110000" // /* MW 7 */
+ 10058 "10100101" // /* MW 6 */
+ 10059 "00000001" // /* MW 5 */
+ 10060 "00000000" // /* MW 4 */
+ 10061 "00110000" // /* MW 3 */
+ 10062 "11000010" // /* MW 2 */
+ 10063 "11011100" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528
+.src_ref 8 "superkernels.cpp" 566 6
+.src_ref 8 "superkernels.cpp" 567 14
+ 10064 "01000100" // MOVXM p6, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10065 "10000000" // /* MW 5 */
+ 10066 "11001001" // /* MW 4 */
+ 10067 "11001100" // /* MW 3 */
+ 10068 "00000111" // /* MW 2 */
+ 10069 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 6 first
+.src_ref 8 "superkernels.cpp" 566 19
+ 10070 "10111010" // LDA r16, [p6]; MOVXM p2, #509160 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10071 "00010000" // /* MW 9 */
+ 10072 "01110100" // /* MW 8 */
+ 10073 "00110010" // /* MW 7 */
+ 10074 "11110001" // /* MW 6 */
+ 10075 "00000001" // /* MW 5 */
+ 10076 "00000000" // /* MW 4 */
+ 10077 "11010000" // /* MW 3 */
+ 10078 "11000010" // /* MW 2 */
+ 10079 "11000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 19
+ 10080 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10081 "00110110" // /* MW 3 */
+ 10082 "00000110" // /* MW 2 */
+ 10083 "00000010" // /* MW 1 */
+ 10084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10085 "00000000" // /* MW 1 */
+ 10086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10087 "00000000" // /* MW 1 */
+ 10088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10089 "00000000" // /* MW 1 */
+ 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10091 "00000000" // /* MW 1 */
+ 10092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10093 "00000000" // /* MW 1 */
+ 10094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10095 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 16
+ 10096 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10097 "00001000" // /* MW 3 */
+ 10098 "01100001" // /* MW 2 */
+ 10099 "00010100" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 566 6
+ 10100 "10000100" // JNZ r16, #10128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10128 delay_slots=5 */
+ 10101 "00000001" // /* MW 5 */
+ 10102 "01000000" // /* MW 4 */
+ 10103 "11001000" // /* MW 3 */
+ 10104 "00010011" // /* MW 2 */
+ 10105 "10000000" // /* MW 1 */
+.delay_slot
+ 10106 "00011000" // LDA p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10107 "10011001" // /* MW 3 */
+ 10108 "11101111" // /* MW 2 */
+ 10109 "00000111" // /* MW 1 */
+.delay_slot
+ 10110 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10111 "11110001" // /* MW 3 */
+ 10112 "11110001" // /* MW 2 */
+ 10113 "00000111" // /* MW 1 */
+.delay_slot
+ 10114 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10115 "11010001" // /* MW 3 */
+ 10116 "11110101" // /* MW 2 */
+ 10117 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10119 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10121 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 567 14 first
+ 10122 "00001100" // NOPA; ST r13, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10123 "01100011" // /* MW 5 */
+ 10124 "00001011" // /* MW 4 */
+ 10125 "11111100" // /* MW 3 */
+ 10126 "00101100" // /* MW 2 */
+ 10127 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592
+.src_ref 8 "superkernels.cpp" 569
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 10128 "11010100" // LDA r11, [sp, #-8]; MOV lr, r11 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10129 "01000001" // /* MW 5 */
+ 10130 "11101011" // /* MW 4 */
+ 10131 "00101110" // /* MW 3 */
+ 10132 "00101110" // /* MW 2 */
+ 10133 "11111111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 10134 "00011000" // LDA r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10135 "10010001" // /* MW 3 */
+ 10136 "11111101" // /* MW 2 */
+ 10137 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10138 "00011000" // LDA r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10139 "10110001" // /* MW 3 */
+ 10140 "11101001" // /* MW 2 */
+ 10141 "00000111" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 569 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 10142 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10143 "00000000" // /* MW 3 */
+ 10144 "00101000" // /* MW 2 */
+ 10145 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10146 "11111000" // MOV p6, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10147 "00100000" // /* MW 3 */
+ 10148 "01100110" // /* MW 2 */
+ 10149 "00011110" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 569
+.delay_slot
+ 10150 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10151 "00000001" // /* MW 5 */
+ 10152 "00000000" // /* MW 4 */
+ 10153 "00000000" // /* MW 3 */
+ 10154 "11110000" // /* MW 2 */
+ 10155 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10157 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10159 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10161 "00000000" // /* MW 15 */
+ 10162 "00000000" // /* MW 14 */
+ 10163 "01111000" // /* MW 13 */
+ 10164 "10100101" // /* MW 12 */
+ 10165 "00000001" // /* MW 11 */
+ 10166 "00000000" // /* MW 10 */
+ 10167 "00000000" // /* MW 9 */
+ 10168 "00000000" // /* MW 8 */
+ 10169 "01011011" // /* MW 7 */
+ 10170 "00000001" // /* MW 6 */
+ 10171 "00100000" // /* MW 5 */
+ 10172 "00000000" // /* MW 4 */
+ 10173 "11110000" // /* MW 3 */
+ 10174 "00101100" // /* MW 2 */
+ 10175 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640
+.src_ref 8 "superkernels.cpp" 554 43
+.src_ref 8 "superkernels.cpp" 555 15
+.src_ref 8 "superkernels.cpp" 558 43
+.src_ref 8 "superkernels.cpp" 559 15
+.src_ref 8 "superkernels.cpp" 562 44
+.src_ref 8 "superkernels.cpp" 563 16
+.src_ref 8 "superkernels.cpp" 567 14
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 10176 "01110110" // MOVA r13, #0; MOVS p6, r12; J #9408 /* MW 12 */ /* control_operation: words=12 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */
+ 10177 "00100000" // /* MW 11 */
+ 10178 "00000000" // /* MW 10 */
+ 10179 "00000000" // /* MW 9 */
+ 10180 "10011000" // /* MW 8 */
+ 10181 "00000100" // /* MW 7 */
+ 10182 "00000000" // /* MW 6 */
+ 10183 "00001011" // /* MW 5 */
+ 10184 "10001100" // /* MW 4 */
+ 10185 "00000110" // /* MW 3 */
+ 10186 "00001101" // /* MW 2 */
+ 10187 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 19
+.src_ref 8 "superkernels.cpp" 558 19
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.delay_slot
+ 10188 "01100100" // MOVX r15, #1; MOV r14, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10189 "00001001" // /* MW 5 */
+ 10190 "00100000" // /* MW 4 */
+ 10191 "10100111" // /* MW 3 */
+ 10192 "11000000" // /* MW 2 */
+ 10193 "00000011" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 552 2
+.delay_slot
+ 10194 "01000100" // MOVXM p2, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10195 "10000000" // /* MW 5 */
+ 10196 "11001000" // /* MW 4 */
+ 10197 "11000100" // /* MW 3 */
+ 10198 "00000111" // /* MW 2 */
+ 10199 "00000000" // /* MW 1 */
+.src_ref 8 "superkernels.cpp" 554 7
+.delay_slot
+ 10200 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10201 "10100000" // /* MW 5 */
+ 10202 "11001001" // /* MW 4 */
+ 10203 "11001110" // /* MW 3 */
+ 10204 "00000111" // /* MW 2 */
+ 10205 "00000000" // /* MW 1 */
+.delay_slot
+ 10206 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10207 "10010001" // /* MW 3 */
+ 10208 "11100101" // /* MW 2 */
+ 10209 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 10211 "00000000" // /* MW 1 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_
+.src_ref 9 "me_div.c" 108 19
+.src_ref 9 "me_div.c" 108 19
+.src_ref 9 "me_div.c" 115 4 first
+.function_start
+ 10224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10225 "01000001" // /* MW 5 */
+ 10226 "10100000" // /* MW 4 */
+ 10227 "00101111" // /* MW 3 */
+ 10228 "11000000" // /* MW 2 */
+ 10229 "00000000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10231 "00011100" // /* MW 3 */
+ 10232 "11000110" // /* MW 2 */
+ 10233 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10235 "00011100" // /* MW 3 */
+ 10236 "11000110" // /* MW 2 */
+ 10237 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10239 "00011100" // /* MW 3 */
+ 10240 "11000110" // /* MW 2 */
+ 10241 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10243 "00011100" // /* MW 3 */
+ 10244 "11000110" // /* MW 2 */
+ 10245 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10247 "00011100" // /* MW 3 */
+ 10248 "11000110" // /* MW 2 */
+ 10249 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10251 "00011100" // /* MW 3 */
+ 10252 "11000110" // /* MW 2 */
+ 10253 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10255 "00011100" // /* MW 3 */
+ 10256 "11000110" // /* MW 2 */
+ 10257 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10259 "00011100" // /* MW 3 */
+ 10260 "11000110" // /* MW 2 */
+ 10261 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10263 "00011100" // /* MW 3 */
+ 10264 "11000110" // /* MW 2 */
+ 10265 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10267 "00011100" // /* MW 3 */
+ 10268 "11000110" // /* MW 2 */
+ 10269 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10271 "00011100" // /* MW 3 */
+ 10272 "11000110" // /* MW 2 */
+ 10273 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10275 "00011100" // /* MW 3 */
+ 10276 "11000110" // /* MW 2 */
+ 10277 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10279 "00011100" // /* MW 3 */
+ 10280 "11000110" // /* MW 2 */
+ 10281 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10283 "00011100" // /* MW 3 */
+ 10284 "11000110" // /* MW 2 */
+ 10285 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10287 "00011100" // /* MW 3 */
+ 10288 "11000110" // /* MW 2 */
+ 10289 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10291 "00011100" // /* MW 3 */
+ 10292 "11000110" // /* MW 2 */
+ 10293 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10295 "00011100" // /* MW 3 */
+ 10296 "11000110" // /* MW 2 */
+ 10297 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10299 "00011100" // /* MW 3 */
+ 10300 "11000110" // /* MW 2 */
+ 10301 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10303 "00011100" // /* MW 3 */
+ 10304 "11000110" // /* MW 2 */
+ 10305 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10307 "00011100" // /* MW 3 */
+ 10308 "11000110" // /* MW 2 */
+ 10309 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10311 "00011100" // /* MW 3 */
+ 10312 "11000110" // /* MW 2 */
+ 10313 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10315 "00011100" // /* MW 3 */
+ 10316 "11000110" // /* MW 2 */
+ 10317 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10319 "00011100" // /* MW 3 */
+ 10320 "11000110" // /* MW 2 */
+ 10321 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10323 "00011100" // /* MW 3 */
+ 10324 "11000110" // /* MW 2 */
+ 10325 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10327 "00011100" // /* MW 3 */
+ 10328 "11000110" // /* MW 2 */
+ 10329 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10331 "00011100" // /* MW 3 */
+ 10332 "11000110" // /* MW 2 */
+ 10333 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10335 "00011100" // /* MW 3 */
+ 10336 "11000110" // /* MW 2 */
+ 10337 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+ 10338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10339 "00011100" // /* MW 3 */
+ 10340 "11000110" // /* MW 2 */
+ 10341 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 119 first
+ 10342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10343 "00000000" // /* MW 3 */
+ 10344 "00101000" // /* MW 2 */
+ 10345 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19 first
+.delay_slot
+ 10346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10347 "00011100" // /* MW 3 */
+ 10348 "11000110" // /* MW 2 */
+ 10349 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10351 "00011100" // /* MW 3 */
+ 10352 "11000110" // /* MW 2 */
+ 10353 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10355 "00011100" // /* MW 3 */
+ 10356 "11000110" // /* MW 2 */
+ 10357 "00010000" // /* MW 1 */
+.src_ref 9 "me_div.c" 108 19
+.delay_slot
+ 10358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10359 "00011100" // /* MW 3 */
+ 10360 "11000110" // /* MW 2 */
+ 10361 "00010000" // /* MW 1 */
+.delay_slot
+ 10362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10363 "10100000" // /* MW 3 */
+ 10364 "10011111" // /* MW 2 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+ 10365 "00011000" // /* MW 1 */
+.label _ZL19propagateFloat32NaNjj
+.function propagateFloat32NaN _ZL19propagateFloat32NaNjj
+.src_ref 10 "softfloat-specialize" 78 24
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 143 4 first
+.function_start
+ 10368 "10111010" // MOVA r3, #-22; MOVXM r18, #-16777216 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10369 "00010000" // /* MW 9 */
+ 10370 "00000000" // /* MW 8 */
+ 10371 "01001000" // /* MW 7 */
+ 10372 "00000010" // /* MW 6 */
+ 10373 "11000000" // /* MW 5 */
+ 10374 "00111111" // /* MW 4 */
+ 10375 "00000000" // /* MW 3 */
+ 10376 "01000011" // /* MW 2 */
+ 10377 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 140 6
+.src_ref 10 "softfloat-specialize" 141 6
+ 10378 "10111010" // MOVA r7, #511; MOVXM r0, #4194304 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10379 "00010000" // /* MW 9 */
+ 10380 "00000000" // /* MW 8 */
+ 10381 "00001000" // /* MW 7 */
+ 10382 "00000000" // /* MW 6 */
+ 10383 "00010000" // /* MW 5 */
+ 10384 "00000000" // /* MW 4 */
+ 10385 "00000000" // /* MW 3 */
+ 10386 "11100111" // /* MW 2 */
+ 10387 "00111111" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 38
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+.src_ref 10 "softfloat-specialize" 140 6 first
+ 10388 "10111010" // MOVA r16, #1; OR r4, r1, r0; MOV r5, #510 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10389 "01011000" // /* MW 9 */
+ 10390 "11111110" // /* MW 8 */
+ 10391 "10101001" // /* MW 7 */
+ 10392 "00101100" // /* MW 6 */
+ 10393 "01000000" // /* MW 5 */
+ 10394 "00000010" // /* MW 4 */
+ 10395 "00000000" // /* MW 3 */
+ 10396 "00110000" // /* MW 2 */
+ 10397 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 141 6 first
+ 10398 "10011000" // OR r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10399 "00000101" // /* MW 3 */
+ 10400 "10000000" // /* MW 2 */
+ 10401 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10402 "10011000" // LSHL r6, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10403 "00111101" // /* MW 3 */
+ 10404 "01001100" // /* MW 2 */
+ 10405 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+ 10406 "10011000" // LSHL r3, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10407 "00111101" // /* MW 3 */
+ 10408 "10000110" // /* MW 2 */
+ 10409 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22
+ 10410 "10011000" // AND r3, r7, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10411 "00110100" // /* MW 3 */
+ 10412 "11000110" // /* MW 2 */
+ 10413 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10414 "10011000" // AND r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10415 "01100100" // /* MW 3 */
+ 10416 "11001100" // /* MW 2 */
+ 10417 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+ 10418 "10011000" // EQ r6, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10419 "01100111" // /* MW 3 */
+ 10420 "01001100" // /* MW 2 */
+ 10421 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 38 first
+ 10422 "10011000" // LSHL r17, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10423 "00001101" // /* MW 3 */
+ 10424 "10100011" // /* MW 2 */
+ 10425 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 78 24
+ 10426 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10427 "00011100" // /* MW 3 */
+ 10428 "10110111" // /* MW 2 */
+ 10429 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 62 first
+ 10430 "00011000" // SEL.EQZ r17, r4, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10431 "00000010" // /* MW 3 */
+ 10432 "00100010" // /* MW 2 */
+ 10433 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+.src_ref 10 "softfloat-specialize" 139 22
+ 10434 "01000100" // MOVXM r16, #4194303 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10435 "11111110" // /* MW 5 */
+ 10436 "00111111" // /* MW 4 */
+ 10437 "11111000" // /* MW 3 */
+ 10438 "00111111" // /* MW 2 */
+ 10439 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+ 10440 "10011000" // AND r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10441 "00000100" // /* MW 3 */
+ 10442 "10000101" // /* MW 2 */
+ 10443 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22
+ 10444 "00011000" // NEZ r2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10445 "11110000" // /* MW 3 */
+ 10446 "10000100" // /* MW 2 */
+ 10447 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+ 10448 "10011000" // AND r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10449 "00000100" // /* MW 3 */
+ 10450 "01000011" // /* MW 2 */
+ 10451 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22
+ 10452 "00011000" // NEZ r1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10453 "11110000" // /* MW 3 */
+ 10454 "01000010" // /* MW 2 */
+ 10455 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 4 first
+ 10456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10457 "00000000" // /* MW 3 */
+ 10458 "00101000" // /* MW 2 */
+ 10459 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 137 22 first
+.delay_slot
+ 10460 "10011000" // AND r27, r1, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10461 "01100100" // /* MW 3 */
+ 10462 "01110110" // /* MW 2 */
+ 10463 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+.delay_slot
+ 10464 "10011000" // EQ r1, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10465 "01010111" // /* MW 3 */
+ 10466 "11000010" // /* MW 2 */
+ 10467 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 49 first
+.delay_slot
+ 10468 "00011000" // SEL.EQZ r3, r17, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10469 "01000010" // /* MW 3 */
+ 10470 "01000110" // /* MW 2 */
+ 10471 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 139 22 first
+.delay_slot
+ 10472 "10011000" // AND r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10473 "00100100" // /* MW 3 */
+ 10474 "01110110" // /* MW 2 */
+ 10475 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-specialize" 143 27 first
+.delay_slot
+ 10476 "00011000" // SEL.EQZ r0, r3, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10477 "00000010" // /* MW 3 */
+ 10478 "11000000" // /* MW 2 */
+.label _ZL19propagateFloat32NaNjj__end
+ 10479 "00010000" // /* MW 1 */
+.label _ZL19roundAndPackFloat32iij
+.function roundAndPackFloat32 _ZL19roundAndPackFloat32iij
+.src_ref 10 "softfloat.c" 154 first
+.src_ref 10 "softfloat.c" 161 19
+.src_ref 10 "softfloat.c" 203 30
+.function_start
+ 10480 "10111010" // MOVA r0, #64; MOVXM p0, #509172 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10481 "00010000" // /* MW 9 */
+ 10482 "01111010" // /* MW 8 */
+ 10483 "00110010" // /* MW 7 */
+ 10484 "11110000" // /* MW 6 */
+ 10485 "00000001" // /* MW 5 */
+ 10486 "00000000" // /* MW 4 */
+ 10487 "00000000" // /* MW 3 */
+ 10488 "00000000" // /* MW 2 */
+ 10489 "00001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 161 19 first
+.src_ref 10 "softfloat.c" 171 16
+.src_ref 10 "softfloat.c" 174 16
+.src_ref 10 "softfloat.c" 178 21
+.src_ref 10 "softfloat.c" 194 29
+ 10490 "00101100" // LDA r4, [p0]; MOVX r6, #127 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10491 "11111010" // /* MW 5 */
+ 10492 "10011001" // /* MW 4 */
+ 10493 "11010000" // /* MW 3 */
+ 10494 "10010010" // /* MW 2 */
+ 10495 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10497 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10499 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10501 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10503 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10505 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10507 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 162 36 first
+.src_ref 10 "softfloat.c" 164 4 first
+ 10508 "10000100" // JZ r4, #10576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10576 delay_slots=5 */
+ 10509 "00000001" // /* MW 5 */
+ 10510 "00000000" // /* MW 4 */
+ 10511 "10101000" // /* MW 3 */
+ 10512 "00010100" // /* MW 2 */
+ 10513 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 40
+.src_ref 10 "softfloat.c" 185 68
+.src_ref 10 "softfloat.c" 202 18
+.delay_slot
+ 10514 "00011000" // MOVX r5, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10515 "00000001" // /* MW 3 */
+ 10516 "01001010" // /* MW 2 */
+ 10517 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10519 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10521 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10525 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 8
+.src_ref 10 "softfloat.c" 171 16
+.src_ref 10 "softfloat.c" 171 34
+.src_ref 10 "softfloat.c" 174 16
+.src_ref 10 "softfloat.c" 174 34
+ 10526 "10111010" // MOVA r16, #3; MOVX r7, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10527 "01011000" // /* MW 9 */
+ 10528 "00000000" // /* MW 8 */
+ 10529 "00001000" // /* MW 7 */
+ 10530 "01001011" // /* MW 6 */
+ 10531 "01110000" // /* MW 5 */
+ 10532 "00000000" // /* MW 4 */
+ 10533 "00000000" // /* MW 3 */
+ 10534 "01110000" // /* MW 2 */
+ 10535 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 26
+.src_ref 10 "softfloat.c" 171 34 first
+ 10536 "01100100" // EQ r27, r7, r4; MOV r5, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10537 "00000101" // /* MW 5 */
+ 10538 "10100000" // /* MW 4 */
+ 10539 "11110010" // /* MW 3 */
+ 10540 "11001000" // /* MW 2 */
+ 10541 "00111110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 171 16
+ 10542 "00011000" // SEL.EQZ r7, r6, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10543 "10000010" // /* MW 3 */
+ 10544 "10001111" // /* MW 2 */
+ 10545 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 174 34 first
+ 10546 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10547 "00000111" // /* MW 3 */
+ 10548 "00110111" // /* MW 2 */
+ 10549 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 170 12
+.src_ref 10 "softfloat.c" 174 16
+ 10550 "11100100" // SEL.EQZ r16, r6, r24, r27; MOV r27, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10551 "01000001" // /* MW 5 */
+ 10552 "10100001" // /* MW 4 */
+ 10553 "01001101" // /* MW 3 */
+ 10554 "00110000" // /* MW 2 */
+ 10555 "00110100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 170 12 first
+.src_ref 10 "softfloat.c" 170 12 first
+ 10556 "00011000" // SEL.EQZ r7, r16, r7, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10557 "01110010" // /* MW 3 */
+ 10558 "00001110" // /* MW 2 */
+ 10559 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 26 first
+ 10560 "10011000" // EQ r27, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10561 "01000111" // /* MW 3 */
+ 10562 "01110110" // /* MW 2 */
+ 10563 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 165 8
+ 10564 "00110110" // NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10565 "10000001" // /* MW 11 */
+ 10566 "10101101" // /* MW 10 */
+ 10567 "00000000" // /* MW 9 */
+ 10568 "00010000" // /* MW 8 */
+ 10569 "01011100" // /* MW 7 */
+ 10570 "00001110" // /* MW 6 */
+ 10571 "00100000" // /* MW 5 */
+ 10572 "00000000" // /* MW 4 */
+ 10573 "11110000" // /* MW 3 */
+ 10574 "00101100" // /* MW 2 */
+ 10575 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_96
+.src_ref 10 "softfloat.c" 179 14
+.src_ref 10 "softfloat.c" 179 17 first
+.src_ref 10 "softfloat.c" 180 23
+.src_ref 10 "softfloat.c" 181 28
+ 10576 "01100100" // EXTEND.u16 r18, r2; MOV r16, #253 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10577 "11110101" // /* MW 5 */
+ 10578 "00100011" // /* MW 4 */
+ 10579 "00001000" // /* MW 3 */
+ 10580 "10010110" // /* MW 2 */
+ 10581 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 179 14
+ 10582 "10011000" // LT r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10583 "00001010" // /* MW 3 */
+ 10584 "10100101" // /* MW 2 */
+ 10585 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 179 4
+ 10586 "10000100" // JNZ r18, #10768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10768 delay_slots=5 */
+ 10587 "00000001" // /* MW 5 */
+ 10588 "01000000" // /* MW 4 */
+ 10589 "00001000" // /* MW 3 */
+ 10590 "00010101" // /* MW 2 */
+ 10591 "10010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 178 21 first
+.delay_slot
+ 10592 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10593 "01100100" // /* MW 3 */
+ 10594 "11100010" // /* MW 2 */
+ 10595 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+.src_ref 10 "softfloat.c" 128 31
+.delay_slot
+ 10596 "00011000" // MOVX r7, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10597 "01111101" // /* MW 3 */
+ 10598 "00001110" // /* MW 2 */
+ 10599 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 10600 "10011000" // LSHL r1, r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10601 "01111101" // /* MW 3 */
+ 10602 "01000010" // /* MW 2 */
+ 10603 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10605 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10607 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 181 28 first
+.src_ref 10 "softfloat.c" 182 40 first
+.src_ref 10 "softfloat.c" 182 59
+ 10608 "10111010" // MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10609 "10101000" // /* MW 9 */
+ 10610 "11001010" // /* MW 8 */
+ 10611 "10001000" // /* MW 7 */
+ 10612 "00111110" // /* MW 6 */
+ 10613 "00111000" // /* MW 5 */
+ 10614 "00000101" // /* MW 4 */
+ 10615 "00000000" // /* MW 3 */
+ 10616 "00010010" // /* MW 2 */
+ 10617 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 59
+ 10618 "10011000" // LT r20, r20, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10619 "00101010" // /* MW 3 */
+ 10620 "00101001" // /* MW 2 */
+ 10621 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 180 23 first
+ 10622 "10011000" // LT r16, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10623 "00101010" // /* MW 3 */
+ 10624 "00100000" // /* MW 2 */
+ 10625 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 182 18 first
+ 10626 "10011000" // AND r19, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10627 "01000100" // /* MW 3 */
+ 10628 "11100111" // /* MW 2 */
+ 10629 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 181 13 first
+ 10630 "10011000" // OR r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10631 "00000101" // /* MW 3 */
+ 10632 "11100111" // /* MW 2 */
+ 10633 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 180 8 first
+ 10634 "10000100" // JNZ r19, #10848 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10848 delay_slots=5 */
+ 10635 "00000001" // /* MW 5 */
+ 10636 "01000000" // /* MW 4 */
+ 10637 "00110000" // /* MW 3 */
+ 10638 "00010101" // /* MW 2 */
+ 10639 "10011000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 187 18
+.src_ref 10 "softfloat.c" 192 39
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+.delay_slot
+ 10640 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10641 "00000001" // /* MW 3 */
+ 10642 "00100000" // /* MW 2 */
+ 10643 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10647 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10649 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10651 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 187 18 first
+ 10652 "10011000" // GE r19, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10653 "00001001" // /* MW 3 */
+ 10654 "10100111" // /* MW 2 */
+ 10655 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 187 8
+ 10656 "10000100" // JNZ r19, #10784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10784 delay_slots=5 */
+ 10657 "00000001" // /* MW 5 */
+ 10658 "01000000" // /* MW 4 */
+ 10659 "00010000" // /* MW 3 */
+ 10660 "00010101" // /* MW 2 */
+ 10661 "10011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10663 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10665 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10671 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 192 39 first
+ 10672 "10011000" // SUB r2, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10673 "00100001" // /* MW 3 */
+ 10674 "00000100" // /* MW 2 */
+ 10675 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 10676 "10000100" // JZ r2, #10736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10736 delay_slots=5 */
+ 10677 "00000001" // /* MW 5 */
+ 10678 "00000000" // /* MW 4 */
+ 10679 "11111000" // /* MW 3 */
+ 10680 "00010100" // /* MW 2 */
+ 10681 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10683 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10685 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10687 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10689 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10691 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 10692 "01100100" // SUB r17, r16, r2; MOV r19, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10693 "10000001" // /* MW 5 */
+ 10694 "10100000" // /* MW 4 */
+ 10695 "00111001" // /* MW 3 */
+ 10696 "01000100" // /* MW 2 */
+ 10697 "10000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 10698 "10011000" // AND r7, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10699 "00010100" // /* MW 3 */
+ 10700 "11001111" // /* MW 2 */
+ 10701 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 10702 "10011000" // LSHL r7, r3, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10703 "01111101" // /* MW 3 */
+ 10704 "11001110" // /* MW 2 */
+ 10705 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 10706 "10011000" // LSHL r17, r3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10707 "00011101" // /* MW 3 */
+ 10708 "11100011" // /* MW 2 */
+ 10709 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 10710 "10011000" // LT r27, r2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10711 "00111010" // /* MW 3 */
+ 10712 "10110111" // /* MW 2 */
+ 10713 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 10714 "00011000" // NEZ r7, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10715 "11110000" // /* MW 3 */
+ 10716 "11001110" // /* MW 2 */
+ 10717 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 10718 "00011000" // NEZ r3, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10719 "11110000" // /* MW 3 */
+ 10720 "11000110" // /* MW 2 */
+ 10721 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 10722 "10011000" // OR r2, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10723 "00010101" // /* MW 3 */
+ 10724 "11000101" // /* MW 2 */
+ 10725 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 10726 "01111010" // NOPA; NOPS; SEL.EQZ r3, r3, r2, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10727 "00100010" // /* MW 9 */
+ 10728 "11000110" // /* MW 8 */
+ 10729 "00000000" // /* MW 7 */
+ 10730 "00000000" // /* MW 6 */
+ 10731 "01011011" // /* MW 5 */
+ 10732 "00000001" // /* MW 4 */
+ 10733 "11110000" // /* MW 3 */
+ 10734 "00101100" // /* MW 2 */
+ 10735 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_256
+ 10736 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */
+ 10737 "00000000" // /* MW 5 */
+ 10738 "00000000" // /* MW 4 */
+ 10739 "00010000" // /* MW 3 */
+ 10740 "00010101" // /* MW 2 */
+ 10741 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 194 29 first
+.delay_slot
+ 10742 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10743 "01100100" // /* MW 3 */
+ 10744 "11100010" // /* MW 2 */
+ 10745 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+.delay_slot
+ 10746 "00011000" // MOVX r2, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10747 "00000001" // /* MW 3 */
+ 10748 "00000100" // /* MW 2 */
+ 10749 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10751 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10754 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 10755 "00011100" // /* MW 13 */
+ 10756 "00000000" // /* MW 12 */
+ 10757 "00000000" // /* MW 11 */
+ 10758 "01010111" // /* MW 10 */
+ 10759 "00011010" // /* MW 9 */
+ 10760 "01000000" // /* MW 8 */
+ 10761 "00000000" // /* MW 7 */
+ 10762 "00000000" // /* MW 6 */
+ 10763 "10110110" // /* MW 5 */
+ 10764 "00000010" // /* MW 4 */
+ 10765 "11110000" // /* MW 3 */
+ 10766 "00101100" // /* MW 2 */
+ 10767 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_288
+.src_ref 10 "softfloat.c" 204 4
+.src_ref 10 "softfloat.c" 204 14
+ 10768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10769 "00000000" // /* MW 15 */
+ 10770 "00000000" // /* MW 14 */
+ 10771 "01111000" // /* MW 13 */
+ 10772 "10100101" // /* MW 12 */
+ 10773 "00000001" // /* MW 11 */
+ 10774 "00001000" // /* MW 10 */
+ 10775 "00000000" // /* MW 9 */
+ 10776 "00000001" // /* MW 8 */
+ 10777 "01011011" // /* MW 7 */
+ 10778 "00000001" // /* MW 6 */
+ 10779 "00100000" // /* MW 5 */
+ 10780 "00000000" // /* MW 4 */
+ 10781 "11110000" // /* MW 3 */
+ 10782 "00101100" // /* MW 2 */
+ 10783 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_304
+.src_ref 10 "softfloat.c" 202 18 first
+.src_ref 10 "softfloat.c" 202 36
+.src_ref 10 "softfloat.c" 203 30 first
+ 10784 "10111010" // MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10785 "10101000" // /* MW 9 */
+ 10786 "11001010" // /* MW 8 */
+ 10787 "10101000" // /* MW 7 */
+ 10788 "00110100" // /* MW 6 */
+ 10789 "00110000" // /* MW 5 */
+ 10790 "00100010" // /* MW 4 */
+ 10791 "00000000" // /* MW 3 */
+ 10792 "00100000" // /* MW 2 */
+ 10793 "11111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 59
+.src_ref 10 "softfloat.c" 203 12
+.src_ref 10 "softfloat.c" 203 46
+ 10794 "10111010" // MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10795 "01011000" // /* MW 9 */
+ 10796 "11111111" // /* MW 8 */
+ 10797 "10001111" // /* MW 7 */
+ 10798 "00101100" // /* MW 6 */
+ 10799 "01100010" // /* MW 5 */
+ 10800 "00000110" // /* MW 4 */
+ 10801 "00000000" // /* MW 3 */
+ 10802 "11100011" // /* MW 2 */
+ 10803 "00000010" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 46
+ 10804 "00011000" // EQZ r6, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10805 "11010000" // /* MW 3 */
+ 10806 "10001100" // /* MW 2 */
+ 10807 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 202 36
+ 10808 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10809 "00001101" // /* MW 3 */
+ 10810 "01000000" // /* MW 2 */
+ 10811 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 12
+ 10812 "10011000" // XOR r4, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10813 "01000110" // /* MW 3 */
+ 10814 "10001000" // /* MW 2 */
+ 10815 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 205 4 first
+ 10816 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10817 "00000000" // /* MW 3 */
+ 10818 "00101000" // /* MW 2 */
+ 10819 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 203 9 first
+.delay_slot
+ 10820 "10011000" // AND r27, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10821 "00000100" // /* MW 3 */
+ 10822 "00110110" // /* MW 2 */
+ 10823 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 204 4 first
+.src_ref 10 "softfloat.c" 204 14 first
+.delay_slot
+ 10824 "00011000" // SEL.EQZ r2, r16, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10825 "00100010" // /* MW 3 */
+ 10826 "00000100" // /* MW 2 */
+ 10827 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 59 first
+.delay_slot
+ 10828 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10829 "00111101" // /* MW 3 */
+ 10830 "10000100" // /* MW 2 */
+ 10831 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 10832 "10011000" // ADD r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10833 "00100000" // /* MW 3 */
+ 10834 "01000100" // /* MW 2 */
+ 10835 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 66
+.delay_slot
+ 10836 "00110110" // NOPA; NOPB; NOPS; ADD r0, r27, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10837 "10000001" // /* MW 11 */
+ 10838 "10101101" // /* MW 10 */
+ 10839 "00000000" // /* MW 9 */
+ 10840 "00000100" // /* MW 8 */
+ 10841 "00000001" // /* MW 7 */
+ 10842 "00110110" // /* MW 6 */
+ 10843 "00100000" // /* MW 5 */
+ 10844 "00000000" // /* MW 4 */
+ 10845 "11110000" // /* MW 3 */
+ 10846 "00101100" // /* MW 2 */
+ 10847 "00000000" // /* MW 1 */
+.label TGT_F_ZL19roundAndPackFloat32iij_368
+.src_ref 10 "softfloat.c" 185 12 first
+ 10848 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10849 "00000000" // /* MW 3 */
+ 10850 "00101000" // /* MW 2 */
+ 10851 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 10852 "01000100" // MOVXM r2, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10853 "00000000" // /* MW 5 */
+ 10854 "00100000" // /* MW 4 */
+ 10855 "00000001" // /* MW 3 */
+ 10856 "10000000" // /* MW 2 */
+ 10857 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38 first
+.delay_slot
+ 10858 "10011000" // ADD r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10859 "00100000" // /* MW 3 */
+ 10860 "01000110" // /* MW 2 */
+ 10861 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 185 68 first
+.delay_slot
+ 10862 "00011000" // EQZ r2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10863 "11010000" // /* MW 3 */
+ 10864 "01000100" // /* MW 2 */
+ 10865 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 185 49
+.delay_slot
+ 10866 "10011000" // SUB r0, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10867 "00100001" // /* MW 3 */
+ 10868 "11000000" // /* MW 2 */
+ 10869 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL19roundAndPackFloat32iij__end
+ 10871 "00000000" // /* MW 1 */
+.label _ZL28normalizeRoundAndPackFloat32iij
+.function normalizeRoundAndPackFloat32 _ZL28normalizeRoundAndPackFloat32iij
+.src_ref 10 "softfloat.c" 218 first
+.src_ref 10 "softfloat.c" 224 11 first
+.tail_call
+.function_start
+ 10880 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 10881 "00000000" // /* MW 5 */
+ 10882 "00000000" // /* MW 4 */
+ 10883 "01111000" // /* MW 3 */
+ 10884 "00010100" // /* MW 2 */
+ 10885 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 552 53 first
+.delay_slot
+ 10886 "00011000" // CLZ r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10887 "00110000" // /* MW 3 */
+ 10888 "11100000" // /* MW 2 */
+ 10889 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 223 45 first
+.delay_slot
+ 10890 "00011000" // ADD r16, r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10891 "11111111" // /* MW 3 */
+ 10892 "00100001" // /* MW 2 */
+ 10893 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 224 44 first
+.delay_slot
+ 10894 "10011000" // SUB r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10895 "00000001" // /* MW 3 */
+ 10896 "10000101" // /* MW 2 */
+ 10897 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 224 62
+.delay_slot
+ 10898 "10011000" // LSHL r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10899 "00001101" // /* MW 3 */
+ 10900 "11000111" // /* MW 2 */
+ 10901 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL28normalizeRoundAndPackFloat32iij__end
+ 10903 "00000000" // /* MW 1 */
+.label int32_to_float32
+.function int32_to_float32 int32_to_float32
+.src_ref 10 "softfloat.c" 477 first
+.src_ref 10 "softfloat.c" 481 4
+.src_ref 10 "softfloat.c" 481 11 first
+.function_start
+ 10912 "10000100" // JZ r1, #10992 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10992 delay_slots=5 */
+ 10913 "00000001" // /* MW 5 */
+ 10914 "00000000" // /* MW 4 */
+ 10915 "01111000" // /* MW 3 */
+ 10916 "00010101" // /* MW 2 */
+ 10917 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10919 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10927 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 11
+ 10928 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10929 "00000000" // /* MW 5 */
+ 10930 "00100000" // /* MW 4 */
+ 10931 "00001000" // /* MW 3 */
+ 10932 "00000000" // /* MW 2 */
+ 10933 "10000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 11 first
+ 10934 "10011000" // EQ r16, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10935 "00000111" // /* MW 3 */
+ 10936 "01100001" // /* MW 2 */
+ 10937 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 482 4
+ 10938 "10000100" // JNZ r16, #11008 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11008 delay_slots=5 */
+ 10939 "00000001" // /* MW 5 */
+ 10940 "01000000" // /* MW 4 */
+ 10941 "10000000" // /* MW 3 */
+ 10942 "00010101" // /* MW 2 */
+ 10943 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10945 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10947 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10949 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10951 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10953 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 484 11
+.src_ref 10 "softfloat.c" 484 11 first
+.tail_call
+ 10954 "10111010" // MOVA r2, #156; J #10880 /* MW 10 */ /* control_operation: words=10 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */
+ 10955 "00100000" // /* MW 9 */
+ 10956 "00000000" // /* MW 8 */
+ 10957 "00000000" // /* MW 7 */
+ 10958 "01010000" // /* MW 6 */
+ 10959 "00000101" // /* MW 5 */
+ 10960 "00000000" // /* MW 4 */
+ 10961 "00000000" // /* MW 3 */
+ 10962 "10000010" // /* MW 2 */
+ 10963 "00010011" // /* MW 1 */
+.src_ref 10 "softfloat.c" 484 60
+.src_ref 10 "softfloat.c" 484 62
+.delay_slot
+ 10964 "00011000" // ABS r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10965 "00010000" // /* MW 3 */
+ 10966 "01000111" // /* MW 2 */
+ 10967 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 483 16
+.delay_slot
+ 10968 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10969 "00000001" // /* MW 3 */
+ 10970 "00100000" // /* MW 2 */
+ 10971 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 483 16 first
+.delay_slot
+ 10972 "10011000" // LT r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10973 "00001010" // /* MW 3 */
+ 10974 "01000011" // /* MW 2 */
+ 10975 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10977 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10978 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 10979 "00011100" // /* MW 13 */
+ 10980 "00000000" // /* MW 12 */
+ 10981 "00000000" // /* MW 11 */
+ 10982 "01010111" // /* MW 10 */
+ 10983 "00011010" // /* MW 9 */
+ 10984 "01000000" // /* MW 8 */
+ 10985 "00000000" // /* MW 7 */
+ 10986 "00000000" // /* MW 6 */
+ 10987 "10110110" // /* MW 5 */
+ 10988 "00000010" // /* MW 4 */
+ 10989 "11110000" // /* MW 3 */
+ 10990 "00101100" // /* MW 2 */
+ 10991 "00000000" // /* MW 1 */
+.label TGT_Fint32_to_float32_80
+.src_ref 10 "softfloat.c" 481 18 first
+.return_address
+ 10992 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10993 "00000000" // /* MW 3 */
+ 10994 "00101000" // /* MW 2 */
+ 10995 "00010000" // /* MW 1 */
+.delay_slot
+ 10996 "00011000" // MOVX r0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10997 "00000001" // /* MW 3 */
+ 10998 "00000000" // /* MW 2 */
+ 10999 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11001 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11003 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11005 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11007 "00000000" // /* MW 1 */
+.label TGT_Fint32_to_float32_96
+.src_ref 10 "softfloat.c" 482 37 first
+ 11008 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11009 "00000000" // /* MW 3 */
+ 11010 "00101000" // /* MW 2 */
+ 11011 "00010000" // /* MW 1 */
+.delay_slot
+ 11012 "01000100" // MOVXM r0, #-822083584 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11013 "00000000" // /* MW 5 */
+ 11014 "00100000" // /* MW 4 */
+ 11015 "00000000" // /* MW 3 */
+ 11016 "00000000" // /* MW 2 */
+ 11017 "11001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11019 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11023 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label int32_to_float32__end
+ 11025 "00000000" // /* MW 1 */
+.label _ZL14addFloat32Sigsjji
+.function addFloat32Sigs _ZL14addFloat32Sigsjji
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 81 14
+.src_ref 10 "softfloat.c" 734 first
+.function_start
+ 11040 "10111010" // MOVA r18, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11041 "10010000" // /* MW 9 */
+ 11042 "11111111" // /* MW 8 */
+ 11043 "00001111" // /* MW 7 */
+ 11044 "11111110" // /* MW 6 */
+ 11045 "00011111" // /* MW 5 */
+ 11046 "00000000" // /* MW 4 */
+ 11047 "00000000" // /* MW 3 */
+ 11048 "00110010" // /* MW 2 */
+ 11049 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14 first
+ 11050 "10011000" // LSHL r17, r1, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11051 "00101101" // /* MW 3 */
+ 11052 "01100011" // /* MW 2 */
+ 11053 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14
+ 11054 "10011000" // LSHL r4, r2, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11055 "00101101" // /* MW 3 */
+ 11056 "10001001" // /* MW 2 */
+ 11057 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11058 "00011000" // EXTEND.u8 r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11059 "10010000" // /* MW 3 */
+ 11060 "01110110" // /* MW 2 */
+ 11061 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11062 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11063 "10010000" // /* MW 3 */
+ 11064 "00110010" // /* MW 2 */
+ 11065 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 744 19 first
+.src_ref 10 "softfloat.c" 747 11
+.src_ref 10 "softfloat.c" 761 22
+.src_ref 10 "softfloat.c" 772 35
+.src_ref 10 "softfloat.c" 788 24
+ 11066 "01100100" // SUB r17, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11067 "00000001" // /* MW 5 */
+ 11068 "00100000" // /* MW 4 */
+ 11069 "00111100" // /* MW 3 */
+ 11070 "01110010" // /* MW 2 */
+ 11071 "11011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 747 11 first
+ 11072 "10011000" // LT r4, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11073 "00011010" // /* MW 3 */
+ 11074 "00001001" // /* MW 2 */
+ 11075 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 747 4
+ 11076 "10000100" // JNZ r4, #11248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11248 delay_slots=5 */
+ 11077 "00000001" // /* MW 5 */
+ 11078 "01000000" // /* MW 4 */
+ 11079 "11111000" // /* MW 3 */
+ 11080 "00010101" // /* MW 2 */
+ 11081 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+.delay_slot
+ 11082 "10011000" // AND r19, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11083 "00000100" // /* MW 3 */
+ 11084 "01100111" // /* MW 2 */
+ 11085 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 745 9
+.src_ref 10 "softfloat.c" 746 9
+.delay_slot
+ 11086 "01100100" // AND r16, r2, r16; MOV r0, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11087 "00011001" // /* MW 5 */
+ 11088 "00100000" // /* MW 4 */
+ 11089 "10010000" // /* MW 3 */
+ 11090 "00100000" // /* MW 2 */
+ 11091 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 745 9 first
+.delay_slot
+ 11092 "10011000" // LSHL r19, r19, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11093 "00001101" // /* MW 3 */
+ 11094 "11100110" // /* MW 2 */
+ 11095 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 746 9 first
+.src_ref 10 "softfloat.c" 748 18
+.src_ref 10 "softfloat.c" 762 18
+.delay_slot
+ 11096 "01100100" // LSHL r16, r16, r0; MOV r20, #255 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11097 "11111101" // /* MW 5 */
+ 11098 "00100011" // /* MW 4 */
+ 11099 "10111010" // /* MW 3 */
+ 11100 "00000001" // /* MW 2 */
+ 11101 "10000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+.src_ref 10 "softfloat.c" 128 31
+.src_ref 10 "softfloat.c" 748 18 first
+.delay_slot
+ 11102 "01100100" // EQ r0, r27, r20; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11103 "01111101" // /* MW 5 */
+ 11104 "00100000" // /* MW 4 */
+ 11105 "11111001" // /* MW 3 */
+ 11106 "00101000" // /* MW 2 */
+ 11107 "11011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 761 22 first
+ 11108 "10011000" // GE r5, r17, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11109 "10001001" // /* MW 3 */
+ 11110 "01001011" // /* MW 2 */
+ 11111 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 761 9
+ 11112 "10000100" // JNZ r5, #11440 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11440 delay_slots=5 */
+ 11113 "00000001" // /* MW 5 */
+ 11114 "01000000" // /* MW 4 */
+ 11115 "01011000" // /* MW 3 */
+ 11116 "00010110" // /* MW 2 */
+ 11117 "00101000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 11118 "10011000" // LSHL r4, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11119 "00101101" // /* MW 3 */
+ 11120 "11001001" // /* MW 2 */
+ 11121 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11123 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11125 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11127 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11129 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 762 18 first
+ 11130 "10011000" // EQ r20, r25, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11131 "01000111" // /* MW 3 */
+ 11132 "01101001" // /* MW 2 */
+ 11133 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 762 8
+ 11134 "10000100" // JNZ r20, #11392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11392 delay_slots=5 */
+ 11135 "00000001" // /* MW 5 */
+ 11136 "01000000" // /* MW 4 */
+ 11137 "01000000" // /* MW 3 */
+ 11138 "00010110" // /* MW 2 */
+ 11139 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11141 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11143 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11145 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11147 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11149 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11150 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11151 "10100000" // /* MW 3 */
+ 11152 "01010001" // /* MW 2 */
+ 11153 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 787 4
+ 11154 "11111000" // MOV r2, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11155 "10100000" // /* MW 3 */
+ 11156 "10011100" // /* MW 2 */
+ 11157 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 767 12 first
+ 11158 "00011000" // ADD r0, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11159 "00000111" // /* MW 3 */
+ 11160 "01000000" // /* MW 2 */
+ 11161 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 766 8 first
+ 11162 "00011000" // SEL.EQZ r17, r0, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11163 "00010010" // /* MW 3 */
+ 11164 "00100011" // /* MW 2 */
+ 11165 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 772 35 first
+ 11166 "10011000" // SUB r17, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11167 "00010001" // /* MW 3 */
+ 11168 "00100011" // /* MW 2 */
+ 11169 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11170 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */
+ 11171 "00000001" // /* MW 5 */
+ 11172 "00000000" // /* MW 4 */
+ 11173 "00101000" // /* MW 3 */
+ 11174 "00010110" // /* MW 2 */
+ 11175 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 770 17
+.src_ref 10 "softfloat.c" 785 9
+.delay_slot
+ 11176 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11177 "00000000" // /* MW 5 */
+ 11178 "00100000" // /* MW 4 */
+ 11179 "00001010" // /* MW 3 */
+ 11180 "00000000" // /* MW 2 */
+ 11181 "00100000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 770 17 first
+.delay_slot
+ 11182 "10011000" // OR r3, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11183 "01000101" // /* MW 3 */
+ 11184 "11000111" // /* MW 2 */
+ 11185 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 766 8 first
+.delay_slot
+ 11186 "00011000" // SEL.EQZ r19, r19, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11187 "00110010" // /* MW 3 */
+ 11188 "11100110" // /* MW 2 */
+ 11189 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11191 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11193 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 11194 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11195 "10000001" // /* MW 5 */
+ 11196 "00100000" // /* MW 4 */
+ 11197 "00110000" // /* MW 3 */
+ 11198 "11100010" // /* MW 2 */
+ 11199 "11000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11200 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11201 "00100100" // /* MW 3 */
+ 11202 "11100101" // /* MW 2 */
+ 11203 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11204 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11205 "00101101" // /* MW 3 */
+ 11206 "11100101" // /* MW 2 */
+ 11207 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11208 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11209 "00001010" // /* MW 3 */
+ 11210 "01110110" // /* MW 2 */
+ 11211 "00010100" // /* MW 1 */
+ 11212 "10000100" // J #11344 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11344 delay_slots=5 */
+ 11213 "00000000" // /* MW 5 */
+ 11214 "00000000" // /* MW 4 */
+ 11215 "00101000" // /* MW 3 */
+ 11216 "00010110" // /* MW 2 */
+ 11217 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15 first
+.delay_slot
+ 11218 "10011000" // LSHL r3, r19, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11219 "00111101" // /* MW 3 */
+ 11220 "11000110" // /* MW 2 */
+ 11221 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57
+.delay_slot
+ 11222 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11223 "11110000" // /* MW 3 */
+ 11224 "10100100" // /* MW 2 */
+ 11225 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+.delay_slot
+ 11226 "00011000" // NEZ r17, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11227 "11110000" // /* MW 3 */
+ 11228 "11100010" // /* MW 2 */
+ 11229 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+.delay_slot
+ 11230 "10011000" // OR r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11231 "00100101" // /* MW 3 */
+ 11232 "11100101" // /* MW 2 */
+ 11233 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+.delay_slot
+ 11234 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11235 "01100000" // /* MW 13 */
+ 11236 "00101011" // /* MW 12 */
+ 11237 "00000000" // /* MW 11 */
+ 11238 "10101111" // /* MW 10 */
+ 11239 "00110100" // /* MW 9 */
+ 11240 "00000000" // /* MW 8 */
+ 11241 "00100010" // /* MW 7 */
+ 11242 "01100111" // /* MW 6 */
+ 11243 "00100100" // /* MW 5 */
+ 11244 "00000000" // /* MW 4 */
+ 11245 "11110000" // /* MW 3 */
+ 11246 "00101100" // /* MW 2 */
+ 11247 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_208
+.src_ref 10 "softfloat.c" 748 8 first
+ 11248 "10000100" // JNZ r0, #11504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11504 delay_slots=5 */
+ 11249 "00000001" // /* MW 5 */
+ 11250 "01000000" // /* MW 4 */
+ 11251 "01111000" // /* MW 3 */
+ 11252 "00010110" // /* MW 2 */
+ 11253 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 756 17
+.src_ref 10 "softfloat.c" 785 9
+.delay_slot
+ 11254 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11255 "00000000" // /* MW 5 */
+ 11256 "00100000" // /* MW 4 */
+ 11257 "00001010" // /* MW 3 */
+ 11258 "00000000" // /* MW 2 */
+ 11259 "00100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11261 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11263 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11265 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11267 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11268 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11269 "10100000" // /* MW 3 */
+ 11270 "01010001" // /* MW 2 */
+ 11271 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 753 12 first
+.src_ref 10 "softfloat.c" 787 4
+ 11272 "11100100" // ADD r3, r17, #-1; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11273 "01000001" // /* MW 5 */
+ 11274 "00111011" // /* MW 4 */
+ 11275 "11100001" // /* MW 3 */
+ 11276 "11111111" // /* MW 2 */
+ 11277 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8
+.src_ref 10 "softfloat.c" 752 18
+ 11278 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11279 "10100000" // /* MW 3 */
+ 11280 "11011100" // /* MW 2 */
+ 11281 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8 first
+.src_ref 10 "softfloat.c" 752 18 first
+ 11282 "00011000" // SEL.EQZ r17, r3, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11283 "00010010" // /* MW 3 */
+ 11284 "11100011" // /* MW 2 */
+ 11285 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11286 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */
+ 11287 "00000001" // /* MW 5 */
+ 11288 "00000000" // /* MW 4 */
+ 11289 "00101000" // /* MW 3 */
+ 11290 "00010110" // /* MW 2 */
+ 11291 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 756 17 first
+.delay_slot
+ 11292 "10011000" // OR r0, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11293 "00000101" // /* MW 3 */
+ 11294 "00000001" // /* MW 2 */
+ 11295 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 752 8 first
+.src_ref 10 "softfloat.c" 752 18 first
+.delay_slot
+ 11296 "00011000" // SEL.EQZ r16, r16, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11297 "00000010" // /* MW 3 */
+ 11298 "00100000" // /* MW 2 */
+ 11299 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11301 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11303 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11305 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+ 11306 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11307 "10000001" // /* MW 5 */
+ 11308 "00100000" // /* MW 4 */
+ 11309 "00110000" // /* MW 3 */
+ 11310 "11100010" // /* MW 2 */
+ 11311 "11000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11312 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11313 "00100100" // /* MW 3 */
+ 11314 "11100101" // /* MW 2 */
+ 11315 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11316 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11317 "00101101" // /* MW 3 */
+ 11318 "00100101" // /* MW 2 */
+ 11319 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 11320 "10011000" // LSHL r3, r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11321 "00111101" // /* MW 3 */
+ 11322 "00000110" // /* MW 2 */
+ 11323 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11324 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11325 "00001010" // /* MW 3 */
+ 11326 "01110110" // /* MW 2 */
+ 11327 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 11328 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11329 "11110000" // /* MW 3 */
+ 11330 "10100100" // /* MW 2 */
+ 11331 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11332 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11333 "11110000" // /* MW 3 */
+ 11334 "00100000" // /* MW 2 */
+ 11335 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 11336 "10011000" // OR r17, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11337 "00100101" // /* MW 3 */
+ 11338 "11100011" // /* MW 2 */
+ 11339 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 11340 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11341 "00010010" // /* MW 3 */
+ 11342 "00100001" // /* MW 2 */
+ 11343 "00010100" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_304
+.src_ref 10 "softfloat.c" 785 9 first
+.src_ref 10 "softfloat.c" 786 26
+.src_ref 10 "softfloat.c" 787 4 first
+ 11344 "10111010" // MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11345 "11001000" // /* MW 9 */
+ 11346 "10111111" // /* MW 8 */
+ 11347 "00101000" // /* MW 7 */
+ 11348 "00101110" // /* MW 6 */
+ 11349 "00111010" // /* MW 5 */
+ 11350 "00100111" // /* MW 4 */
+ 11351 "00000000" // /* MW 3 */
+ 11352 "00110010" // /* MW 2 */
+ 11353 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 786 18 first
+.src_ref 10 "softfloat.c" 790 8 first
+ 11354 "00100100" // ADD r19, r19, r16; ADD.NC r16, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11355 "00000001" // /* MW 5 */
+ 11356 "00110001" // /* MW 4 */
+ 11357 "00011000" // /* MW 3 */
+ 11358 "11100000" // /* MW 2 */
+ 11359 "10011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 786 26
+ 11360 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11361 "00101101" // /* MW 3 */
+ 11362 "11100101" // /* MW 2 */
+ 11363 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 24 first
+ 11364 "10011000" // LT r27, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11365 "10001010" // /* MW 3 */
+ 11366 "10110111" // /* MW 2 */
+ 11367 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 4
+ 11368 "00011000" // SEL.EQZ r2, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11369 "00000010" // /* MW 3 */
+ 11370 "01000101" // /* MW 2 */
+ 11371 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 788 4
+ 11372 "00011000" // SEL.EQZ r3, r18, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11373 "00110010" // /* MW 3 */
+ 11374 "10000111" // /* MW 2 */
+ 11375 "00010100" // /* MW 1 */
+.label __ll1__ZL14addFloat32Sigsjji
+.src_ref 10 "softfloat.c" 793 11 first
+.tail_call
+ 11376 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 11377 "00000000" // /* MW 5 */
+ 11378 "00000000" // /* MW 4 */
+ 11379 "01111000" // /* MW 3 */
+ 11380 "00010100" // /* MW 2 */
+ 11381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11383 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11385 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11387 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11391 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_352
+.src_ref 10 "softfloat.c" 763 12 first
+.return_address
+ 11392 "10000100" // JNZ r16, #11536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11536 delay_slots=5 */
+ 11393 "00000001" // /* MW 5 */
+ 11394 "01000000" // /* MW 4 */
+ 11395 "10001000" // /* MW 3 */
+ 11396 "00010110" // /* MW 2 */
+ 11397 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11399 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11407 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 764 12 first
+ 11408 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11409 "00000000" // /* MW 3 */
+ 11410 "00101000" // /* MW 2 */
+ 11411 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 11412 "01000100" // MOVXM r16, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11413 "00000000" // /* MW 5 */
+ 11414 "00100000" // /* MW 4 */
+ 11415 "00001000" // /* MW 3 */
+ 11416 "10000000" // /* MW 2 */
+ 11417 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38 first
+.delay_slot
+ 11418 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11419 "00000000" // /* MW 3 */
+ 11420 "00000001" // /* MW 2 */
+ 11421 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11423 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11427 "00011100" // /* MW 13 */
+ 11428 "00000000" // /* MW 12 */
+ 11429 "00000000" // /* MW 11 */
+ 11430 "01010111" // /* MW 10 */
+ 11431 "00011010" // /* MW 9 */
+ 11432 "01000000" // /* MW 8 */
+ 11433 "00000000" // /* MW 7 */
+ 11434 "00000000" // /* MW 6 */
+ 11435 "10110110" // /* MW 5 */
+ 11436 "00000010" // /* MW 4 */
+ 11437 "11110000" // /* MW 3 */
+ 11438 "00101100" // /* MW 2 */
+ 11439 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_400
+.src_ref 10 "softfloat.c" 776 8 first
+ 11440 "10000100" // JNZ r0, #11552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11552 delay_slots=5 */
+ 11441 "00000001" // /* MW 5 */
+ 11442 "01000000" // /* MW 4 */
+ 11443 "10010000" // /* MW 3 */
+ 11444 "00010110" // /* MW 2 */
+ 11445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11451 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11455 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 8 first
+ 11456 "10000100" // JZ r27, #11600 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11600 delay_slots=5 */
+ 11457 "00000001" // /* MW 5 */
+ 11458 "00000000" // /* MW 4 */
+ 11459 "10101000" // /* MW 3 */
+ 11460 "00010110" // /* MW 2 */
+ 11461 "11011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11463 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11471 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 793 11
+ 11472 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11473 "10100000" // /* MW 3 */
+ 11474 "01010001" // /* MW 2 */
+ 11475 "00011000" // /* MW 1 */
+ 11476 "10000100" // J #11376 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11376 delay_slots=5 */
+ 11477 "00000000" // /* MW 5 */
+ 11478 "00000000" // /* MW 4 */
+ 11479 "00111000" // /* MW 3 */
+ 11480 "00010110" // /* MW 2 */
+ 11481 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 26
+.delay_slot
+ 11482 "01000100" // MOVXM r17, #1073741824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11483 "00000000" // /* MW 5 */
+ 11484 "10100000" // /* MW 4 */
+ 11485 "00001000" // /* MW 3 */
+ 11486 "00000000" // /* MW 2 */
+ 11487 "01000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 26 first
+.src_ref 10 "softfloat.c" 793 11
+.delay_slot
+ 11488 "11100100" // ADD r17, r19, r17; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11489 "01000001" // /* MW 5 */
+ 11490 "00111011" // /* MW 4 */
+ 11491 "00010001" // /* MW 3 */
+ 11492 "01100010" // /* MW 2 */
+ 11493 "10011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 781 33
+.delay_slot
+ 11494 "10011000" // ADD r3, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11495 "00000000" // /* MW 3 */
+ 11496 "01000111" // /* MW 2 */
+ 11497 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11500 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11501 "01100111" // /* MW 3 */
+ 11502 "00000001" // /* MW 2 */
+ 11503 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_464
+.src_ref 10 "softfloat.c" 749 12 first
+ 11504 "10000100" // JNZ r19, #11632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11632 delay_slots=5 */
+ 11505 "00000001" // /* MW 5 */
+ 11506 "01000000" // /* MW 4 */
+ 11507 "10111000" // /* MW 3 */
+ 11508 "00010110" // /* MW 2 */
+ 11509 "10011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11511 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11513 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11515 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11517 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11519 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 750 12 first
+ 11520 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11521 "00000000" // /* MW 3 */
+ 11522 "00101000" // /* MW 2 */
+ 11523 "00010000" // /* MW 1 */
+.delay_slot
+ 11524 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11525 "10100000" // /* MW 3 */
+ 11526 "00010000" // /* MW 2 */
+ 11527 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11535 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_496
+.src_ref 10 "softfloat.c" 763 31 first
+.tail_call
+ 11536 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11537 "00000000" // /* MW 5 */
+ 11538 "00000000" // /* MW 4 */
+ 11539 "01000000" // /* MW 3 */
+ 11540 "00010100" // /* MW 2 */
+ 11541 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11543 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11549 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11551 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_512
+.src_ref 10 "softfloat.c" 777 22 first
+.return_address
+ 11552 "10011000" // OR r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11553 "00000101" // /* MW 3 */
+ 11554 "11100001" // /* MW 2 */
+ 11555 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 777 12
+ 11556 "10000100" // JNZ r16, #11648 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11648 delay_slots=5 */
+ 11557 "00000001" // /* MW 5 */
+ 11558 "01000000" // /* MW 4 */
+ 11559 "11000000" // /* MW 3 */
+ 11560 "00010110" // /* MW 2 */
+ 11561 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11563 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11565 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11567 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11571 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 778 12 first
+ 11572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11573 "00000000" // /* MW 3 */
+ 11574 "00101000" // /* MW 2 */
+ 11575 "00010000" // /* MW 1 */
+.delay_slot
+ 11576 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11577 "10100000" // /* MW 3 */
+ 11578 "00010000" // /* MW 2 */
+ 11579 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11581 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11583 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11585 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11586 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11587 "00011100" // /* MW 13 */
+ 11588 "00000000" // /* MW 12 */
+ 11589 "00000000" // /* MW 11 */
+ 11590 "01010111" // /* MW 10 */
+ 11591 "00011010" // /* MW 9 */
+ 11592 "01000000" // /* MW 8 */
+ 11593 "00000000" // /* MW 7 */
+ 11594 "00000000" // /* MW 6 */
+ 11595 "10110110" // /* MW 5 */
+ 11596 "00000010" // /* MW 4 */
+ 11597 "11110000" // /* MW 3 */
+ 11598 "00101100" // /* MW 2 */
+ 11599 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_560
+.src_ref 10 "softfloat.c" 780 25 first
+.src_ref 10 "softfloat.c" 780 62 first
+ 11600 "10100100" // RET lr; ADD.NC r16, r19, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11601 "10000010" // /* MW 5 */
+ 11602 "00110011" // /* MW 4 */
+ 11603 "00001000" // /* MW 3 */
+ 11604 "00000000" // /* MW 2 */
+ 11605 "00000101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 70
+.delay_slot
+ 11606 "00011000" // MOVX r17, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11607 "11101001" // /* MW 3 */
+ 11608 "11100010" // /* MW 2 */
+ 11609 "00010111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 780 70
+.delay_slot
+ 11610 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11611 "00011101" // /* MW 3 */
+ 11612 "00100001" // /* MW 2 */
+ 11613 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 66 first
+.delay_slot
+ 11614 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11615 "00000000" // /* MW 3 */
+ 11616 "00000001" // /* MW 2 */
+ 11617 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11619 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11620 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 11621 "10000001" // /* MW 11 */
+ 11622 "10101101" // /* MW 10 */
+ 11623 "00000000" // /* MW 9 */
+ 11624 "00000000" // /* MW 8 */
+ 11625 "00000000" // /* MW 7 */
+ 11626 "00000000" // /* MW 6 */
+ 11627 "00100000" // /* MW 5 */
+ 11628 "00000000" // /* MW 4 */
+ 11629 "11110000" // /* MW 3 */
+ 11630 "00101100" // /* MW 2 */
+ 11631 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_592
+.src_ref 10 "softfloat.c" 749 31 first
+.tail_call
+ 11632 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11633 "00000000" // /* MW 5 */
+ 11634 "00000000" // /* MW 4 */
+ 11635 "01000000" // /* MW 3 */
+ 11636 "00010100" // /* MW 2 */
+ 11637 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11639 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11641 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11643 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11647 "00000000" // /* MW 1 */
+.label TGT_F_ZL14addFloat32Sigsjji_608
+.src_ref 10 "softfloat.c" 777 38 first
+.tail_call
+.return_address
+ 11648 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 11649 "00000000" // /* MW 5 */
+ 11650 "00000000" // /* MW 4 */
+ 11651 "01000000" // /* MW 3 */
+ 11652 "00010100" // /* MW 2 */
+ 11653 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11655 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11657 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11659 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11661 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL14addFloat32Sigsjji__end
+ 11663 "00000000" // /* MW 1 */
+.label _ZL14subFloat32Sigsjji
+.function subFloat32Sigs _ZL14subFloat32Sigsjji
+.src_ref 10 "softfloat.c" 70 13
+.src_ref 10 "softfloat.c" 81 14
+.src_ref 10 "softfloat.c" 805 first
+.function_start
+ 11664 "10111010" // MOVA r17, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11665 "10010000" // /* MW 9 */
+ 11666 "11111111" // /* MW 8 */
+ 11667 "00001111" // /* MW 7 */
+ 11668 "11111110" // /* MW 6 */
+ 11669 "00011111" // /* MW 5 */
+ 11670 "00000000" // /* MW 4 */
+ 11671 "00000000" // /* MW 3 */
+ 11672 "00110001" // /* MW 2 */
+ 11673 "11111101" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14 first
+ 11674 "10011000" // LSHL r4, r2, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11675 "00011101" // /* MW 3 */
+ 11676 "10001001" // /* MW 2 */
+ 11677 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 14
+ 11678 "10011000" // LSHL r18, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11679 "00011101" // /* MW 3 */
+ 11680 "01100101" // /* MW 2 */
+ 11681 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+ 11682 "10011000" // AND r20, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11683 "00000100" // /* MW 3 */
+ 11684 "01101001" // /* MW 2 */
+ 11685 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21 first
+ 11686 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11687 "10010000" // /* MW 3 */
+ 11688 "00110010" // /* MW 2 */
+ 11689 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 81 21
+ 11690 "00011000" // EXTEND.u8 r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11691 "10010000" // /* MW 3 */
+ 11692 "10110110" // /* MW 2 */
+ 11693 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 70 13 first
+.src_ref 10 "softfloat.c" 816 9
+.src_ref 10 "softfloat.c" 817 9
+ 11694 "01100100" // AND r16, r2, r16; MOV r19, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11695 "00011101" // /* MW 5 */
+ 11696 "10100000" // /* MW 4 */
+ 11697 "10011001" // /* MW 3 */
+ 11698 "00100000" // /* MW 2 */
+ 11699 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 816 9 first
+ 11700 "10011000" // LSHL r17, r20, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11701 "00111101" // /* MW 3 */
+ 11702 "00100011" // /* MW 2 */
+ 11703 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 38
+.src_ref 10 "softfloat.c" 815 19 first
+.src_ref 10 "softfloat.c" 818 11
+.src_ref 10 "softfloat.c" 819 17
+.src_ref 10 "softfloat.c" 843 31
+ 11704 "01100100" // SUB r18, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11705 "00000001" // /* MW 5 */
+ 11706 "00100000" // /* MW 4 */
+ 11707 "00111100" // /* MW 3 */
+ 11708 "10110010" // /* MW 2 */
+ 11709 "11011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 818 11 first
+ 11710 "10011000" // LT r5, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11711 "00101010" // /* MW 3 */
+ 11712 "00001011" // /* MW 2 */
+ 11713 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 818 4
+ 11714 "10000100" // JNZ r5, #11904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11904 delay_slots=5 */
+ 11715 "00000001" // /* MW 5 */
+ 11716 "01000000" // /* MW 4 */
+ 11717 "01000000" // /* MW 3 */
+ 11718 "00010111" // /* MW 2 */
+ 11719 "00101000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 817 9 first
+.delay_slot
+ 11720 "10011000" // LSHL r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11721 "00111101" // /* MW 3 */
+ 11722 "00100001" // /* MW 2 */
+ 11723 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 14
+.src_ref 10 "softfloat.c" 851 14
+.src_ref 10 "softfloat.c" 859 13
+.src_ref 10 "softfloat.c" 862 9
+.delay_slot
+ 11724 "10111010" // MOVA r0, #255; MOVXM r4, #1073741824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11725 "00010000" // /* MW 9 */
+ 11726 "00000000" // /* MW 8 */
+ 11727 "10001000" // /* MW 7 */
+ 11728 "00000000" // /* MW 6 */
+ 11729 "00000000" // /* MW 5 */
+ 11730 "00010000" // /* MW 4 */
+ 11731 "00000000" // /* MW 3 */
+ 11732 "11100000" // /* MW 2 */
+ 11733 "00011111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 851 14 first
+.delay_slot
+ 11734 "10011000" // EQ r20, r27, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11735 "00000111" // /* MW 3 */
+ 11736 "11101000" // /* MW 2 */
+ 11737 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 862 9 first
+.delay_slot
+ 11738 "10011000" // OR r19, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11739 "01000101" // /* MW 3 */
+ 11740 "01100110" // /* MW 2 */
+ 11741 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 859 13 first
+.delay_slot
+ 11742 "10011000" // OR r4, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11743 "00000101" // /* MW 3 */
+ 11744 "00001001" // /* MW 2 */
+ 11745 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 819 17 first
+ 11746 "10011000" // GE r6, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11747 "10001001" // /* MW 3 */
+ 11748 "10001101" // /* MW 2 */
+ 11749 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 819 4
+ 11750 "10000100" // JNZ r6, #12064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12064 delay_slots=5 */
+ 11751 "00000001" // /* MW 5 */
+ 11752 "01000000" // /* MW 4 */
+ 11753 "10010000" // /* MW 3 */
+ 11754 "00010111" // /* MW 2 */
+ 11755 "00110000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.src_ref 10 "softfloat.c" 835 34
+.delay_slot
+ 11756 "00011000" // MOVX r5, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11757 "00000101" // /* MW 3 */
+ 11758 "00001010" // /* MW 2 */
+ 11759 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 835 34 first
+.delay_slot
+ 11760 "10011000" // XOR r7, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11761 "01010110" // /* MW 3 */
+ 11762 "11001110" // /* MW 2 */
+ 11763 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11765 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11767 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11769 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 14 first
+ 11770 "10011000" // EQ r20, r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11771 "00000111" // /* MW 3 */
+ 11772 "01101000" // /* MW 2 */
+ 11773 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 833 4
+ 11774 "10000100" // JNZ r20, #12176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12176 delay_slots=5 */
+ 11775 "00000001" // /* MW 5 */
+ 11776 "01000000" // /* MW 4 */
+ 11777 "11001000" // /* MW 3 */
+ 11778 "00010111" // /* MW 2 */
+ 11779 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11781 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11783 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11789 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+ 11790 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11791 "10100000" // /* MW 3 */
+ 11792 "01010011" // /* MW 2 */
+ 11793 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 838 8 first
+ 11794 "00011000" // ADD r16, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11795 "00000111" // /* MW 3 */
+ 11796 "10100000" // /* MW 2 */
+ 11797 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 837 4 first
+ 11798 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11799 "00100010" // /* MW 3 */
+ 11800 "00100001" // /* MW 2 */
+ 11801 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 843 31 first
+ 11802 "10011000" // SUB r16, r24, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11803 "00000001" // /* MW 3 */
+ 11804 "00100001" // /* MW 2 */
+ 11805 "00010110" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11806 "10000100" // JZ r16, #11872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11872 delay_slots=5 */
+ 11807 "00000001" // /* MW 5 */
+ 11808 "00000000" // /* MW 4 */
+ 11809 "00110000" // /* MW 3 */
+ 11810 "00010111" // /* MW 2 */
+ 11811 "10000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 837 4 first
+.delay_slot
+ 11812 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11813 "00110010" // /* MW 3 */
+ 11814 "01100011" // /* MW 2 */
+ 11815 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11817 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11819 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11821 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11823 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+.src_ref 10 "softfloat-macros" 50 48
+ 11824 "10111010" // MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11825 "01011000" // /* MW 9 */
+ 11826 "00011111" // /* MW 8 */
+ 11827 "01001000" // /* MW 7 */
+ 11828 "00001110" // /* MW 6 */
+ 11829 "00111000" // /* MW 5 */
+ 11830 "00110000" // /* MW 4 */
+ 11831 "00000000" // /* MW 3 */
+ 11832 "00010100" // /* MW 2 */
+ 11833 "00000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11834 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11835 "00100100" // /* MW 3 */
+ 11836 "11100101" // /* MW 2 */
+ 11837 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11838 "10011000" // LSHL r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11839 "00101101" // /* MW 3 */
+ 11840 "01100101" // /* MW 2 */
+ 11841 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11842 "00011000" // NEZ r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11843 "11110000" // /* MW 3 */
+ 11844 "01100110" // /* MW 2 */
+ 11845 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11846 "10011000" // LT r27, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11847 "01001010" // /* MW 3 */
+ 11848 "00110111" // /* MW 2 */
+ 11849 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15 first
+ 11850 "10011000" // LSHL r17, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11851 "00111101" // /* MW 3 */
+ 11852 "01100010" // /* MW 2 */
+ 11853 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57
+ 11854 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11855 "11110000" // /* MW 3 */
+ 11856 "10100100" // /* MW 2 */
+ 11857 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25
+ 11858 "10011000" // OR r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11859 "00100101" // /* MW 3 */
+ 11860 "01100001" // /* MW 2 */
+ 11861 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 11862 "01111010" // NOPA; NOPS; SEL.EQZ r17, r19, r16, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11863 "00000010" // /* MW 9 */
+ 11864 "11100011" // /* MW 8 */
+ 11865 "00000100" // /* MW 7 */
+ 11866 "00000000" // /* MW 6 */
+ 11867 "01011011" // /* MW 5 */
+ 11868 "00000001" // /* MW 4 */
+ 11869 "11110000" // /* MW 3 */
+ 11870 "00101100" // /* MW 2 */
+ 11871 "00000000" // /* MW 1 */
+.label __ll2__ZL14subFloat32Sigsjji
+ 11872 "10000100" // J #12032 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12032 delay_slots=5 */
+ 11873 "00000000" // /* MW 5 */
+ 11874 "00000000" // /* MW 4 */
+ 11875 "10000000" // /* MW 3 */
+ 11876 "00010111" // /* MW 2 */
+ 11877 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 846 16 first
+.delay_slot
+ 11878 "10011000" // SUB r3, r4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11879 "00010001" // /* MW 3 */
+ 11880 "00000111" // /* MW 2 */
+ 11881 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11883 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11885 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11887 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11889 "00000000" // /* MW 15 */
+ 11890 "00000000" // /* MW 14 */
+ 11891 "01111000" // /* MW 13 */
+ 11892 "10100101" // /* MW 12 */
+ 11893 "00000001" // /* MW 11 */
+ 11894 "00000000" // /* MW 10 */
+ 11895 "00000000" // /* MW 9 */
+ 11896 "00000000" // /* MW 8 */
+ 11897 "01011011" // /* MW 7 */
+ 11898 "00000001" // /* MW 6 */
+ 11899 "00100000" // /* MW 5 */
+ 11900 "00000000" // /* MW 4 */
+ 11901 "11110000" // /* MW 3 */
+ 11902 "00101100" // /* MW 2 */
+ 11903 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_240
+.src_ref 10 "softfloat.c" 851 4 first
+ 11904 "10000100" // JNZ r20, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */
+ 11905 "00000001" // /* MW 5 */
+ 11906 "01000000" // /* MW 4 */
+ 11907 "11100000" // /* MW 3 */
+ 11908 "00010111" // /* MW 2 */
+ 11909 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11911 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11913 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11915 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11917 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11919 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+ 11920 "11111000" // MOV r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11921 "10100000" // /* MW 3 */
+ 11922 "00011101" // /* MW 2 */
+ 11923 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+ 11924 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11925 "10100000" // /* MW 3 */
+ 11926 "01010001" // /* MW 2 */
+ 11927 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4
+.src_ref 10 "softfloat.c" 855 14
+ 11928 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11929 "10100000" // /* MW 3 */
+ 11930 "11011100" // /* MW 2 */
+ 11931 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+ 11932 "11111000" // MOV r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11933 "00100000" // /* MW 3 */
+ 11934 "01010000" // /* MW 2 */
+ 11935 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 856 8 first
+ 11936 "00011000" // ADD r17, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11937 "11111111" // /* MW 3 */
+ 11938 "10100011" // /* MW 2 */
+ 11939 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4 first
+.src_ref 10 "softfloat.c" 855 14 first
+ 11940 "00011000" // SEL.EQZ r17, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11941 "00100010" // /* MW 3 */
+ 11942 "01100011" // /* MW 2 */
+ 11943 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 46 4 first
+.src_ref 10 "softfloat-macros" 46 15 first
+ 11944 "10000100" // JZ r17, #12016 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12016 delay_slots=5 */
+ 11945 "00000001" // /* MW 5 */
+ 11946 "00000000" // /* MW 4 */
+ 11947 "01111000" // /* MW 3 */
+ 11948 "00010111" // /* MW 2 */
+ 11949 "10001000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 855 4 first
+.src_ref 10 "softfloat.c" 855 14 first
+.delay_slot
+ 11950 "00011000" // SEL.EQZ r16, r16, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11951 "01000010" // /* MW 3 */
+ 11952 "00100000" // /* MW 2 */
+ 11953 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11955 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11957 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11959 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11961 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20
+.src_ref 10 "softfloat-macros" 50 38 first
+.src_ref 10 "softfloat-macros" 50 48
+ 11962 "10111010" // MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11963 "01011000" // /* MW 9 */
+ 11964 "00011111" // /* MW 8 */
+ 11965 "10001000" // /* MW 7 */
+ 11966 "10001110" // /* MW 6 */
+ 11967 "00101000" // /* MW 5 */
+ 11968 "00110001" // /* MW 4 */
+ 11969 "00000000" // /* MW 3 */
+ 11970 "00000011" // /* MW 2 */
+ 11971 "00000100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 48
+ 11972 "10011000" // AND r20, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11973 "01000100" // /* MW 3 */
+ 11974 "10101001" // /* MW 2 */
+ 11975 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 32
+ 11976 "10011000" // LSHL r20, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11977 "01001101" // /* MW 3 */
+ 11978 "00101001" // /* MW 2 */
+ 11979 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 15
+ 11980 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11981 "00101101" // /* MW 3 */
+ 11982 "00100101" // /* MW 2 */
+ 11983 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 20 first
+ 11984 "10011000" // LT r27, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11985 "00111010" // /* MW 3 */
+ 11986 "01110110" // /* MW 2 */
+ 11987 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 57 first
+ 11988 "00011000" // NEZ r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11989 "11110000" // /* MW 3 */
+ 11990 "00101000" // /* MW 2 */
+ 11991 "00010101" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 53 16 first
+ 11992 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11993 "11110000" // /* MW 3 */
+ 11994 "00100000" // /* MW 2 */
+ 11995 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 50 25 first
+ 11996 "10011000" // OR r17, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11997 "01000101" // /* MW 3 */
+ 11998 "10100011" // /* MW 2 */
+ 11999 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat-macros" 49 9 first
+ 12000 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12001 "00000000" // /* MW 15 */
+ 12002 "00000000" // /* MW 14 */
+ 12003 "01111000" // /* MW 13 */
+ 12004 "10100101" // /* MW 12 */
+ 12005 "00000001" // /* MW 11 */
+ 12006 "10010000" // /* MW 10 */
+ 12007 "00001000" // /* MW 9 */
+ 12008 "00100001" // /* MW 8 */
+ 12009 "01011011" // /* MW 7 */
+ 12010 "00000001" // /* MW 6 */
+ 12011 "00100000" // /* MW 5 */
+ 12012 "00000000" // /* MW 4 */
+ 12013 "11110000" // /* MW 3 */
+ 12014 "00101100" // /* MW 2 */
+ 12015 "00000000" // /* MW 1 */
+.label __ll1__ZL14subFloat32Sigsjji
+.src_ref 10 "softfloat.c" 864 16 first
+ 12016 "11100001" // NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12017 "00000000" // /* MW 15 */
+ 12018 "00000000" // /* MW 14 */
+ 12019 "01111000" // /* MW 13 */
+ 12020 "10100101" // /* MW 12 */
+ 12021 "00000001" // /* MW 11 */
+ 12022 "00001100" // /* MW 10 */
+ 12023 "00111000" // /* MW 9 */
+ 12024 "00100110" // /* MW 8 */
+ 12025 "01011011" // /* MW 7 */
+ 12026 "00000001" // /* MW 6 */
+ 12027 "00100000" // /* MW 5 */
+ 12028 "00000000" // /* MW 4 */
+ 12029 "11110000" // /* MW 3 */
+ 12030 "00101100" // /* MW 2 */
+ 12031 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_368
+.src_ref 10 "softfloat.c" 868 11 first
+.tail_call
+ 12032 "10000100" // J #10880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */
+ 12033 "00000000" // /* MW 5 */
+ 12034 "00000000" // /* MW 4 */
+ 12035 "01000000" // /* MW 3 */
+ 12036 "00010101" // /* MW 2 */
+ 12037 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4 first
+.delay_slot
+ 12038 "00011000" // ADD r2, r25, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12039 "11111111" // /* MW 3 */
+ 12040 "01000101" // /* MW 2 */
+ 12041 "00010110" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12043 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12045 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12047 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12048 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12049 "00000000" // /* MW 15 */
+ 12050 "00000000" // /* MW 14 */
+ 12051 "01111000" // /* MW 13 */
+ 12052 "10100101" // /* MW 12 */
+ 12053 "00000001" // /* MW 11 */
+ 12054 "00000000" // /* MW 10 */
+ 12055 "00000000" // /* MW 9 */
+ 12056 "00000000" // /* MW 8 */
+ 12057 "01011011" // /* MW 7 */
+ 12058 "00000001" // /* MW 6 */
+ 12059 "00100000" // /* MW 5 */
+ 12060 "00000000" // /* MW 4 */
+ 12061 "11110000" // /* MW 3 */
+ 12062 "00101100" // /* MW 2 */
+ 12063 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_400
+.src_ref 10 "softfloat.c" 820 4 first
+.return_address
+ 12064 "10000100" // JNZ r20, #12256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12256 delay_slots=5 */
+ 12065 "00000001" // /* MW 5 */
+ 12066 "01000000" // /* MW 4 */
+ 12067 "11110000" // /* MW 3 */
+ 12068 "00010111" // /* MW 2 */
+ 12069 "10100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12073 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12075 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12077 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12079 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 829 14 first
+ 12080 "10011000" // LTU r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12081 "00011100" // /* MW 3 */
+ 12082 "00100111" // /* MW 2 */
+ 12083 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 829 4
+ 12084 "10000100" // JNZ r19, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */
+ 12085 "00000001" // /* MW 5 */
+ 12086 "01000000" // /* MW 4 */
+ 12087 "00001000" // /* MW 3 */
+ 12088 "00011000" // /* MW 2 */
+ 12089 "10011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4 first
+.delay_slot
+ 12090 "00011000" // SEL.EQZ r24, r5, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12091 "10010010" // /* MW 3 */
+ 12092 "01110001" // /* MW 2 */
+ 12093 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.delay_slot
+ 12094 "11111000" // MOV r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12095 "10100000" // /* MW 3 */
+ 12096 "10011101" // /* MW 2 */
+ 12097 "00011100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 825 4
+.delay_slot
+ 12098 "00011000" // SEL.EQZ r25, r5, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12099 "00100010" // /* MW 3 */
+ 12100 "01110011" // /* MW 2 */
+ 12101 "00010001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12103 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12105 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 830 14 first
+ 12106 "10011000" // LTU r18, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12107 "00001100" // /* MW 3 */
+ 12108 "01100101" // /* MW 2 */
+ 12109 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 830 4
+ 12110 "10000100" // JNZ r18, #12336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12336 delay_slots=5 */
+ 12111 "00000001" // /* MW 5 */
+ 12112 "01000000" // /* MW 4 */
+ 12113 "00011000" // /* MW 3 */
+ 12114 "00011000" // /* MW 2 */
+ 12115 "10010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12117 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12119 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12121 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12123 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12125 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31
+ 12126 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12127 "01111101" // /* MW 3 */
+ 12128 "00100000" // /* MW 2 */
+ 12129 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 24
+ 12130 "01000100" // MOVXM p0, #509172 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12131 "11101000" // /* MW 5 */
+ 12132 "11001001" // /* MW 4 */
+ 12133 "11000000" // /* MW 3 */
+ 12134 "00000111" // /* MW 2 */
+ 12135 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 24 first
+ 12136 "10011000" // LDA r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12137 "01010110" // /* MW 3 */
+ 12138 "00000110" // /* MW 2 */
+ 12139 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12141 "00000000" // /* MW 1 */
+.swstall __RAW__R_1948
+ 12142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12143 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 4
+ 12144 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12145 "00000000" // /* MW 3 */
+ 12146 "00101000" // /* MW 2 */
+ 12147 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 44
+.delay_slot
+ 12148 "00011000" // MOVX r17, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12149 "00001101" // /* MW 3 */
+ 12150 "00100010" // /* MW 2 */
+ 12151 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12153 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12155 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 831 44
+.delay_slot
+ 12156 "10011000" // EQ r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12157 "00100111" // /* MW 3 */
+ 12158 "01100011" // /* MW 2 */
+ 12159 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 12160 "11100001" // NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12161 "00000000" // /* MW 15 */
+ 12162 "00000000" // /* MW 14 */
+ 12163 "01111000" // /* MW 13 */
+ 12164 "10100101" // /* MW 12 */
+ 12165 "00000001" // /* MW 11 */
+ 12166 "01101100" // /* MW 10 */
+ 12167 "00001000" // /* MW 9 */
+ 12168 "00100010" // /* MW 8 */
+ 12169 "01011011" // /* MW 7 */
+ 12170 "00000001" // /* MW 6 */
+ 12171 "00100000" // /* MW 5 */
+ 12172 "00000000" // /* MW 4 */
+ 12173 "11110000" // /* MW 3 */
+ 12174 "00101100" // /* MW 2 */
+ 12175 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_512
+.src_ref 10 "softfloat.c" 834 8 first
+ 12176 "10000100" // JNZ r16, #12368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12368 delay_slots=5 */
+ 12177 "00000001" // /* MW 5 */
+ 12178 "01000000" // /* MW 4 */
+ 12179 "00101000" // /* MW 3 */
+ 12180 "00011000" // /* MW 2 */
+ 12181 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12183 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12185 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12187 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12189 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12191 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31
+ 12192 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12193 "01111101" // /* MW 3 */
+ 12194 "00100000" // /* MW 2 */
+ 12195 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 835 8 first
+ 12196 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12197 "00000000" // /* MW 3 */
+ 12198 "00101000" // /* MW 2 */
+ 12199 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 31 first
+.delay_slot
+ 12200 "10011000" // LSHL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12201 "00001101" // /* MW 3 */
+ 12202 "11100001" // /* MW 2 */
+ 12203 "00010001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 12204 "01000100" // MOVXM r17, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12205 "00000000" // /* MW 5 */
+ 12206 "10100000" // /* MW 4 */
+ 12207 "00001000" // /* MW 3 */
+ 12208 "10000000" // /* MW 2 */
+ 12209 "01111111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 128 38
+.delay_slot
+ 12210 "10011000" // ADD r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12211 "00000000" // /* MW 3 */
+ 12212 "01000001" // /* MW 2 */
+ 12213 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12215 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12216 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 12217 "00011100" // /* MW 7 */
+ 12218 "00000000" // /* MW 6 */
+ 12219 "00000000" // /* MW 5 */
+ 12220 "00000100" // /* MW 4 */
+ 12221 "11110000" // /* MW 3 */
+ 12222 "00101100" // /* MW 2 */
+ 12223 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_560
+.src_ref 10 "softfloat.c" 852 8 first
+ 12224 "10000100" // JNZ r17, #12384 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12384 delay_slots=5 */
+ 12225 "00000001" // /* MW 5 */
+ 12226 "01000000" // /* MW 4 */
+ 12227 "00110000" // /* MW 3 */
+ 12228 "00011000" // /* MW 2 */
+ 12229 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12233 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12235 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12237 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12239 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 853 8 first
+ 12240 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12241 "00000000" // /* MW 3 */
+ 12242 "00101000" // /* MW 2 */
+ 12243 "00010000" // /* MW 1 */
+.delay_slot
+ 12244 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12245 "10100000" // /* MW 3 */
+ 12246 "00010000" // /* MW 2 */
+ 12247 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12249 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12251 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12255 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_592
+.src_ref 10 "softfloat.c" 821 18 first
+ 12256 "10011000" // OR r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12257 "00000101" // /* MW 3 */
+ 12258 "01100001" // /* MW 2 */
+ 12259 "00010100" // /* MW 1 */
+.src_ref 10 "softfloat.c" 821 8
+ 12260 "10000100" // JNZ r16, #12400 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12400 delay_slots=5 */
+ 12261 "00000001" // /* MW 5 */
+ 12262 "01000000" // /* MW 4 */
+ 12263 "00111000" // /* MW 3 */
+ 12264 "00011000" // /* MW 2 */
+ 12265 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12267 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12269 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12271 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12273 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12275 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 823 8 first
+ 12276 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12277 "00000000" // /* MW 3 */
+ 12278 "00101000" // /* MW 2 */
+ 12279 "00010000" // /* MW 1 */
+.delay_slot
+ 12280 "01000100" // MOVXM r0, #2147483647 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12281 "11111110" // /* MW 5 */
+ 12282 "00111111" // /* MW 4 */
+ 12283 "11110000" // /* MW 3 */
+ 12284 "11111111" // /* MW 2 */
+ 12285 "01111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12287 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12289 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12292 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12293 "10000001" // /* MW 11 */
+ 12294 "10101101" // /* MW 10 */
+ 12295 "00000000" // /* MW 9 */
+ 12296 "00000000" // /* MW 8 */
+ 12297 "00000000" // /* MW 7 */
+ 12298 "00000000" // /* MW 6 */
+ 12299 "00100000" // /* MW 5 */
+ 12300 "00000000" // /* MW 4 */
+ 12301 "11110000" // /* MW 3 */
+ 12302 "00101100" // /* MW 2 */
+ 12303 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_640
+ 12304 "10000100" // J #12016 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12016 delay_slots=5 */
+ 12305 "00000000" // /* MW 5 */
+ 12306 "00000000" // /* MW 4 */
+ 12307 "01111000" // /* MW 3 */
+ 12308 "00010111" // /* MW 2 */
+ 12309 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+.delay_slot
+ 12310 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12311 "10100000" // /* MW 3 */
+ 12312 "01010001" // /* MW 2 */
+ 12313 "00011000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 864 16
+.delay_slot
+ 12314 "11111000" // MOV r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12315 "10100000" // /* MW 3 */
+ 12316 "11011000" // /* MW 2 */
+ 12317 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12319 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12321 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12322 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12323 "00011100" // /* MW 13 */
+ 12324 "00000000" // /* MW 12 */
+ 12325 "00000000" // /* MW 11 */
+ 12326 "01010111" // /* MW 10 */
+ 12327 "00011010" // /* MW 9 */
+ 12328 "01000000" // /* MW 8 */
+ 12329 "00000000" // /* MW 7 */
+ 12330 "00000000" // /* MW 6 */
+ 12331 "10110110" // /* MW 5 */
+ 12332 "00000010" // /* MW 4 */
+ 12333 "11110000" // /* MW 3 */
+ 12334 "00101100" // /* MW 2 */
+ 12335 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_672
+ 12336 "10000100" // J #11872 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11872 delay_slots=5 */
+ 12337 "00000000" // /* MW 5 */
+ 12338 "00000000" // /* MW 4 */
+ 12339 "00110000" // /* MW 3 */
+ 12340 "00010111" // /* MW 2 */
+ 12341 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 846 16
+.delay_slot
+ 12342 "11111000" // MOV r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12343 "00100000" // /* MW 3 */
+ 12344 "00011000" // /* MW 2 */
+ 12345 "00011001" // /* MW 1 */
+.src_ref 10 "softfloat.c" 867 4
+.delay_slot
+ 12346 "11111000" // MOV r25, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12347 "00100000" // /* MW 3 */
+ 12348 "01011100" // /* MW 2 */
+ 12349 "00011110" // /* MW 1 */
+.src_ref 10 "softfloat.c" 868 11
+.delay_slot
+ 12350 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12351 "10100000" // /* MW 3 */
+ 12352 "01010011" // /* MW 2 */
+ 12353 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12355 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12356 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12357 "10000001" // /* MW 11 */
+ 12358 "10101101" // /* MW 10 */
+ 12359 "00000000" // /* MW 9 */
+ 12360 "00000000" // /* MW 8 */
+ 12361 "00000000" // /* MW 7 */
+ 12362 "00000000" // /* MW 6 */
+ 12363 "00100000" // /* MW 5 */
+ 12364 "00000000" // /* MW 4 */
+ 12365 "11110000" // /* MW 3 */
+ 12366 "00101100" // /* MW 2 */
+ 12367 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_704
+.src_ref 10 "softfloat.c" 834 27 first
+.tail_call
+ 12368 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12369 "00000000" // /* MW 5 */
+ 12370 "00000000" // /* MW 4 */
+ 12371 "01000000" // /* MW 3 */
+ 12372 "00010100" // /* MW 2 */
+ 12373 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12375 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12377 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12379 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12383 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_720
+.src_ref 10 "softfloat.c" 852 27 first
+.tail_call
+.return_address
+ 12384 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12385 "00000000" // /* MW 5 */
+ 12386 "00000000" // /* MW 4 */
+ 12387 "01000000" // /* MW 3 */
+ 12388 "00010100" // /* MW 2 */
+ 12389 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12391 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12393 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12395 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12397 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12399 "00000000" // /* MW 1 */
+.label TGT_F_ZL14subFloat32Sigsjji_736
+.src_ref 10 "softfloat.c" 821 34 first
+.tail_call
+.return_address
+ 12400 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */
+ 12401 "00000000" // /* MW 5 */
+ 12402 "00000000" // /* MW 4 */
+ 12403 "01000000" // /* MW 3 */
+ 12404 "00010100" // /* MW 2 */
+ 12405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL14subFloat32Sigsjji__end
+ 12415 "00000000" // /* MW 1 */
+.label float32_add
+.function float32_add float32_add
+.src_ref 10 "softfloat.c" 92 12
+.src_ref 10 "softfloat.c" 878 first
+.function_start
+ 12416 "00011000" // MOVX r16, #-31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12417 "10000101" // /* MW 3 */
+ 12418 "11100000" // /* MW 2 */
+ 12419 "00010111" // /* MW 1 */
+.src_ref 10 "softfloat.c" 92 12 first
+ 12420 "10011000" // LSHL r3, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12421 "00001101" // /* MW 3 */
+ 12422 "01000111" // /* MW 2 */
+ 12423 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 92 12
+ 12424 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12425 "00001101" // /* MW 3 */
+ 12426 "10100001" // /* MW 2 */
+ 12427 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 884 15 first
+ 12428 "10011000" // EQ r16, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12429 "00000111" // /* MW 3 */
+ 12430 "11100001" // /* MW 2 */
+ 12431 "00010000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 884 4
+ 12432 "10000100" // JNZ r16, #12464 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12464 delay_slots=5 */
+ 12433 "00000001" // /* MW 5 */
+ 12434 "01000000" // /* MW 4 */
+ 12435 "01011000" // /* MW 3 */
+ 12436 "00011000" // /* MW 2 */
+ 12437 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12447 "00000000" // /* MW 1 */
+.src_ref 10 "softfloat.c" 888 15 first
+.tail_call
+ 12448 "10000100" // J #11664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11664 delay_slots=5 */
+ 12449 "00000000" // /* MW 5 */
+ 12450 "00000000" // /* MW 4 */
+ 12451 "11001000" // /* MW 3 */
+ 12452 "00010110" // /* MW 2 */
+ 12453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12455 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12457 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12459 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12461 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12463 "00000000" // /* MW 1 */
+.label TGT_Ffloat32_add_48
+.src_ref 10 "softfloat.c" 885 15 first
+.tail_call
+.return_address
+ 12464 "10000100" // J #11040 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */
+ 12465 "00000000" // /* MW 5 */
+ 12466 "00000000" // /* MW 4 */
+ 12467 "10010000" // /* MW 3 */
+ 12468 "00010101" // /* MW 2 */
+ 12469 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12471 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12473 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12475 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label float32_add__end
+ 12479 "00000000" // /* MW 1 */
+.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src"
+.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer"
+.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common"
+.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc"
+.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail"
+.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2"
+.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib"
+.dir 7 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p"
+.dir 8 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend"
+.dir 9 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib"
+.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/softfloat"
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d1d5946a6747db932adeab9e7d141d4fd318d32
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/Release/3_3_reloadable14.txt
@@ -0,0 +1,2975 @@
+Contents of the .debug_line section:
+
+sigmoid_carf_templated_lut.h:
+File name Line number Starting address View Stmt
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 218 0xa10 x
+reduce_base_c8.h 220 0xa10 1 x
+reduce_base_c8.h 290 0xa10 2
+reduce_base_c8.h 348 0xa10 3
+reduce_base_c8.h 287 0xa1c
+reduce_base_c8.h 287 0xa1c 1
+reduce_base_c8.h 348 0xa1c 2 x
+reduce_base_c8.h 236 0xa26
+reduce_base_c8.h 293 0xa26 1
+reduce_base_c8.h 298 0xa26 2
+reduce_base_c8.h 299 0xa26 3
+reduce_base_c8.h 300 0xa26 4
+reduce_base_c8.h 326 0xa26 5
+reduce_base_c8.h 276 0xa30
+reduce_base_c8.h 301 0xa30 1
+reduce_base_c8.h 305 0xa30 2
+reduce_base_c8.h 218 0xa3a
+reduce_base_c8.h 280 0xa3a 1
+reduce_base_c8.h 312 0xa3a 2
+reduce_base_c8.h 298 0xa44 x
+reduce_base_c8.h 220 0xa4a x
+reduce_base_c8.h 221 0xa4e x
+reduce_base_c8.h 221 0xa5e
+reduce_base_c8.h 301 0xa5e 1 x
+reduce_base_c8.h 222 0xa64 x
+reduce_base_c8.h 293 0xa64 1 x
+reduce_base_c8.h 301 0xa64 2
+reduce_base_c8.h 290 0xa6e x
+reduce_base_c8.h 293 0xa72 x
+reduce_base_c8.h 290 0xa76 x
+reduce_base_c8.h 300 0xa76 1 x
+reduce_base_c8.h 222 0xa82 x
+reduce_base_c8.h 287 0xa82 1 x
+reduce_base_c8.h 223 0xa88 x
+reduce_base_c8.h 312 0xa88 1 x
+reduce_base_c8.h 305 0xa92 x
+reduce_base_c8.h 312 0xa96 x
+reduce_base_c8.h 299 0xa9a x
+reduce_base_c8.h 276 0xa9e x
+reduce_base_c8.h 299 0xa9e 1
+reduce_base_c8.h 276 0xaa4
+reduce_base_c8.h 301 0xaa8 x
+reduce_base_c8.h 223 0xaac x
+reduce_base_c8.h 236 0xaac 1 x
+reduce_base_c8.h 224 0xab2 x
+reduce_base_c8.h 224 0xac2
+reduce_base_c8.h 318 0xac2 1
+reduce_base_c8.h 225 0xaca x
+reduce_base_c8.h 225 0xada
+reduce_base_c8.h 318 0xada 1
+reduce_base_c8.h 226 0xae2 x
+reduce_base_c8.h 236 0xae8 x
+reduce_base_c8.h 312 0xaee x
+reduce_base_c8.h 318 0xaf2 x
+reduce_base_c8.h 300 0xaf6 x
+reduce_base_c8.h 305 0xaf6 1 x
+reduce_base_c8.h 280 0xafc x
+reduce_base_c8.h 226 0xb00 x
+reduce_base_c8.h 318 0xb00 1 x
+reduce_base_c8.h 236 0xb06
+reduce_base_c8.h 236 0xb0a x
+reduce_base_c8.h 236 0xb0e
+reduce_base_c8.h 242 0xb1c x
+reduce_base_c8.h 236 0xb20
+reduce_base_c8.h 236 0xb24 x
+reduce_base_c8.h 236 0xb28
+reduce_base_c8.h 236 0xb36
+reduce_base_c8.h 236 0xb3a
+reduce_base_c8.h 236 0xb3e
+reduce_base_c8.h 329 0xb54
+reduce_base_c8.h 236 0xb60
+reduce_base_c8.h 236 0xb64
+reduce_base_c8.h 236 0xb68
+reduce_base_c8.h 236 0xb76
+reduce_base_c8.h 316 0xb76 1
+reduce_base_c8.h 329 0xb76 2
+reduce_base_c8.h 236 0xb7a
+reduce_base_c8.h 236 0xb7e
+reduce_base_c8.h 236 0xb8e
+reduce_base_c8.h 236 0xb92
+reduce_base_c8.h 286 0xba2 x
+reduce_base_c8.h 289 0xba2 1
+reduce_base_c8.h 291 0xba2 2
+reduce_base_c8.h 291 0xba2 3
+reduce_base_c8.h 287 0xbba x
+reduce_base_c8.h 288 0xbca x
+reduce_base_c8.h 289 0xbda x
+reduce_base_c8.h 290 0xbea x
+reduce_base_c8.h 291 0xbfa x
+reduce_base_c8.h 292 0xc0e x
+reduce_base_c8.h 293 0xc12 x
+reduce_base_c8.h 274 0xc20 x
+reduce_base_c8.h 275 0xc20 1
+reduce_base_c8.h 275 0xc20 2
+reduce_base_c8.h 275 0xc2a x
+reduce_base_c8.h 279 0xc2a 1
+reduce_base_c8.h 275 0xc3e
+reduce_base_c8.h 276 0xc4e x
+reduce_base_c8.h 275 0xc5e x
+reduce_base_c8.h 277 0xc5e 1 x
+reduce_base_c8.h 278 0xc6e x
+reduce_base_c8.h 279 0xc7e x
+reduce_base_c8.h 279 0xc8c
+reduce_base_c8.h 281 0xc94 x
+reduce_base_c8.h 280 0xc98 x
+reduce_base_c8.h 236 0xca0
+reduce_base_c8.h 301 0xca0 1
+reduce_base_c8.h 302 0xca0 2
+reduce_base_c8.h 236 0xca6 x
+reduce_base_c8.h 236 0xcaa
+reduce_base_c8.h 298 0xcb0
+reduce_base_c8.h 303 0xcb0 1
+reduce_base_c8.h 310 0xcb0 2
+reduce_base_c8.h 311 0xcb0 3
+reduce_base_c8.h 236 0xcbc
+reduce_base_c8.h 236 0xcc0
+reduce_base_c8.h 236 0xcc4
+reduce_base_c8.h 310 0xcd4 x
+reduce_base_c8.h 312 0xcd4 1 x
+reduce_base_c8.h 315 0xcd4 2
+reduce_base_c8.h 313 0xcde
+reduce_base_c8.h 317 0xcde 1
+reduce_base_c8.h 315 0xce8
+reduce_base_c8.h 317 0xce8 1 x
+reduce_base_c8.h 311 0xcf6 x
+reduce_base_c8.h 312 0xd06 x
+reduce_base_c8.h 313 0xd16 x
+reduce_base_c8.h 315 0xd1a x
+reduce_base_c8.h 316 0xd2a x
+reduce_base_c8.h 317 0xd2e x
+reduce_base_c8.h 298 0xd50 x
+reduce_base_c8.h 301 0xd50 1
+reduce_base_c8.h 301 0xd50 2 x
+reduce_base_c8.h 302 0xd5a
+reduce_base_c8.h 303 0xd5a 1
+reduce_base_c8.h 306 0xd5a 2
+reduce_base_c8.h 302 0xd64 x
+reduce_base_c8.h 302 0xd68
+reduce_base_c8.h 306 0xd68 1 x
+reduce_base_c8.h 299 0xd74 x
+reduce_base_c8.h 300 0xd84 x
+reduce_base_c8.h 301 0xd94 x
+reduce_base_c8.h 302 0xda4 x
+reduce_base_c8.h 303 0xdb4 x
+reduce_base_c8.h 304 0xdc4 x
+reduce_base_c8.h 305 0xdc8 x
+reduce_base_c8.h 326 0xde0 x
+reduce_base_c8.h 329 0xde0 1
+reduce_base_c8.h 329 0xde6
+reduce_base_c8.h 330 0xde6 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 139 0xde6 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 331 0xdf0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 140 0xdf0 1 x
+reduce_mean_c8_impl.h 141 0xdf6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 326 0xdfe x
+reduce_base_c8.h 327 0xe0e x
+reduce_base_c8.h 327 0xe1a
+reduce_base_c8.h 328 0xe1a 1
+reduce_base_c8.h 328 0xe20 x
+reduce_base_c8.h 329 0xe24 x
+reduce_base_c8.h 329 0xe32
+reduce_base_c8.h 329 0xe36
+reduce_base_c8.h 330 0xe36 1
+reduce_base_c8.h 329 0xe3c
+reduce_base_c8.h 330 0xe48 x
+reduce_base_c8.h 331 0xe58 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 139 0xe68 x
+reduce_mean_c8_impl.h 140 0xe78 x
+reduce_mean_c8_impl.h 141 0xe88 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 349 0xe8c x
+reduce_base_c8.h 349 0xe90
+reduce_base_c8.h 262 0xea0 x
+reduce_base_c8.h 263 0xea0 1
+reduce_base_c8.h 263 0xeaa
+reduce_base_c8.h 263 0xeaa 1 x
+reduce_base_c8.h 267 0xeaa 2
+reduce_base_c8.h 265 0xeb4
+reduce_base_c8.h 329 0xeb4 1
+reduce_base_c8.h 265 0xebe x
+reduce_base_c8.h 265 0xec2
+reduce_base_c8.h 267 0xec2 1 x
+reduce_base_c8.h 265 0xec6 x
+reduce_base_c8.h 265 0xec6 1 x
+reduce_base_c8.h 263 0xecc x
+reduce_base_c8.h 263 0xed0
+reduce_base_c8.h 264 0xede x
+reduce_base_c8.h 265 0xeee x
+reduce_base_c8.h 266 0xefe x
+reduce_base_c8.h 267 0xf0e x
+reduce_base_c8.h 267 0xf1c
+reduce_base_c8.h 267 0xf20
+reduce_base_c8.h 270 0xf24
+reduce_base_c8.h 268 0xf28 x
+reduce_base_c8.h 269 0xf30 x
+reduce_base_c8.h 270 0xf30 1 x
+reduce_base_c8.h 250 0xf40
+reduce_base_c8.h 250 0xf40 1 x
+reduce_base_c8.h 255 0xf40 2
+reduce_base_c8.h 255 0xf4a
+reduce_base_c8.h 255 0xf4a 1
+reduce_base_c8.h 255 0xf4a 2
+reduce_base_c8.h 255 0xf4a 3 x
+reduce_base_c8.h 255 0xf54
+reduce_base_c8.h 255 0xf54 1
+reduce_base_c8.h 329 0xf54 2
+reduce_base_c8.h 251 0xf62 x
+reduce_base_c8.h 252 0xf72 x
+reduce_base_c8.h 253 0xf82 x
+reduce_base_c8.h 254 0xf92 x
+reduce_base_c8.h 255 0xfa2 x
+reduce_base_c8.h 255 0xfb0
+reduce_base_c8.h 255 0xfb0 1
+reduce_base_c8.h 256 0xfb8 x
+reduce_base_c8.h 257 0xfbc x
+reduce_base_c8.h 238 0xfc0 x
+reduce_base_c8.h 239 0xfd0 x
+reduce_base_c8.h 240 0xfe0 x
+reduce_base_c8.h 241 0xfea
+reduce_base_c8.h 241 0xfea 1
+reduce_base_c8.h 241 0xff2 x
+reduce_base_c8.h 241 0xff8
+reduce_base_c8.h 241 0xffe
+reduce_base_c8.h 241 0x1002
+reduce_base_c8.h 241 0x1002 1
+reduce_base_c8.h 241 0x1002 2
+reduce_base_c8.h 241 0x1002 3
+reduce_base_c8.h 242 0x100c x
+reduce_base_c8.h 243 0x101a
+reduce_base_c8.h 243 0x101e x
+reduce_base_c8.h 243 0x102c
+reduce_base_c8.h 243 0x102c 1
+reduce_base_c8.h 243 0x102c 2
+reduce_base_c8.h 243 0x102c 3
+reduce_base_c8.h 244 0x1036 x
+reduce_base_c8.h 245 0x103a x
+reduce_base_c8.h 329 0x103a 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 266 0x1050 x
+pad_3d.h 465 0x1050 1 x
+pad_3d.h 468 0x1050 2 x
+pad_3d.h 471 0x1050 3
+pad_3d.h 479 0x1050 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp:
+array_helpers.hpp 950 0x105a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 469 0x105a 1 x
+pad_3d.h 478 0x105a 2
+pad_3d.h 499 0x105a 3
+pad_3d.h 511 0x105a 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp:
+array_helpers.hpp 950 0x1064
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 470 0x1064 1 x
+pad_3d.h 486 0x1064 2
+pad_3d.h 498 0x1064 3
+pad_3d.h 499 0x1064 4
+pad_3d.h 509 0x1064 5
+pad_3d.h 517 0x1064 6
+pad_3d.h 471 0x106e x
+pad_3d.h 472 0x1072 x
+pad_3d.h 473 0x1076 x
+pad_3d.h 475 0x107a x
+pad_3d.h 479 0x107e x
+pad_3d.h 477 0x1082 x
+pad_3d.h 478 0x1086 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x108a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 485 0x1090 x
+pad_3d.h 485 0x1094
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp:
+array_helpers.hpp 998 0x1098 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 486 0x109c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp:
+array_helpers.hpp 950 0x10a0 x
+array_helpers.hpp 950 0x10a4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 486 0x10a4 1 x
+pad_3d.h 486 0x10aa
+pad_3d.h 487 0x10b0
+pad_3d.h 486 0x10bc
+pad_3d.h 486 0x10c2
+pad_3d.h 486 0x10c8
+pad_3d.h 487 0x1130 x
+pad_3d.h 495 0x1140
+pad_3d.h 495 0x1140 1 x
+pad_3d.h 498 0x1140 2
+pad_3d.h 499 0x1140 3 x
+pad_3d.h 495 0x114a
+pad_3d.h 496 0x114a 1 x
+pad_3d.h 495 0x1150 x
+pad_3d.h 495 0x1154
+pad_3d.h 498 0x1154 1 x
+pad_3d.h 499 0x115a x
+pad_3d.h 498 0x115e x
+pad_3d.h 498 0x1162
+pad_3d.h 499 0x1162 1 x
+pad_3d.h 499 0x1168
+pad_3d.h 499 0x116c
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x117c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x117c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 499 0x117c 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x1186
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1186 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 499 0x1186 2
+pad_3d.h 499 0x1190
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x1200 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1200 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 514 0x1210
+pad_3d.h 514 0x1216 x
+pad_3d.h 514 0x121a
+pad_3d.h 514 0x121e
+pad_3d.h 511 0x1222 x
+pad_3d.h 509 0x1226 x
+pad_3d.h 515 0x122a x
+pad_3d.h 509 0x122e x
+pad_3d.h 509 0x1232
+pad_3d.h 514 0x1232 1
+pad_3d.h 517 0x1232 2 x
+pad_3d.h 509 0x1238 x
+pad_3d.h 509 0x123c
+pad_3d.h 517 0x123c 1 x
+pad_3d.h 517 0x1242
+pad_3d.h 514 0x124c x
+pad_3d.h 514 0x1250
+pad_3d.h 515 0x1254 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x1258
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1258 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 517 0x1258 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x1262
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1262 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 517 0x1262 2
+pad_3d.h 517 0x126c
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x12d0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x12d0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 282 0x12e0 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 352 0x12f0
+reduce_base_c8.h 362 0x12f0 1 x
+reduce_base_c8.h 365 0x12f0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 200 0x12f0 3
+reduce_mean_c8_impl.h 223 0x12f0 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 365 0x12f4 x
+reduce_base_c8.h 367 0x12fc x
+reduce_base_c8.h 367 0x130c
+reduce_base_c8.h 367 0x130c 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 101 0x1312
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 80 0x1312 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1312 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 372 0x1312 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 80 0x1316 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 362 0x131e
+reduce_base_c8.h 372 0x1324
+reduce_base_c8.h 372 0x1328 x
+reduce_base_c8.h 372 0x1338
+reduce_base_c8.h 372 0x133c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1342
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 374 0x1342 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x134e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 372 0x134e 1
+reduce_base_c8.h 374 0x134e 2
+reduce_base_c8.h 372 0x135a
+reduce_base_c8.h 372 0x1360
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x13d0 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 374 0x13d0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x13e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 388 0x13e0 1
+reduce_base_c8.h 412 0x13e0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x13e0 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 388 0x13e4 x
+reduce_base_c8.h 388 0x13e8
+reduce_base_c8.h 388 0x13e8 1
+reduce_base_c8.h 388 0x13ee
+reduce_base_c8.h 570 0x13ee 1
+reduce_base_c8.h 570 0x13ee 2
+reduce_base_c8.h 570 0x13ee 3
+reduce_base_c8.h 570 0x13f4 x
+reduce_base_c8.h 594 0x13f4 1
+reduce_base_c8.h 570 0x13fa
+reduce_base_c8.h 594 0x13fa 1 x
+reduce_base_c8.h 594 0x1400
+reduce_base_c8.h 594 0x1404
+reduce_base_c8.h 388 0x1408
+reduce_base_c8.h 595 0x1408 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x140e
+aie_core.h 73 0x140e 1
+aie_core.h 90 0x140e 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x140e 3
+vector.hpp 1139 0x140e 4
+vector.hpp 1159 0x140e 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x140e 6
+accum.hpp 198 0x140e 7
+accum.hpp 198 0x140e 8
+accum.hpp 943 0x140e 9
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 388 0x140e 10
+reduce_base_c8.h 596 0x140e 11 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x1418
+aie_core.h 90 0x1418 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1418 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1418 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 388 0x1418 4 x
+reduce_base_c8.h 570 0x1418 5
+reduce_base_c8.h 570 0x1418 6
+reduce_base_c8.h 570 0x1418 7
+reduce_base_c8.h 570 0x1418 8 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x1424
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 109 0x1424 1
+me_vmult_float_emulated.h 109 0x1424 2
+me_vmult_float_emulated.h 111 0x1424 3
+me_vmult_float_emulated.h 111 0x1424 4
+me_vmult_float_emulated.h 113 0x1424 5
+me_vmult_float_emulated.h 113 0x1424 6
+me_vmult_float_emulated.h 115 0x1424 7
+me_vmult_float_emulated.h 115 0x1424 8
+me_vmult_float_emulated.h 117 0x1424 9
+me_vmult_float_emulated.h 117 0x1424 10
+me_vmult_float_emulated.h 118 0x1424 11
+me_vmult_float_emulated.h 118 0x1424 12
+me_vmult_float_emulated.h 118 0x1424 13
+me_vmult_float_emulated.h 118 0x1424 14
+me_vmult_float_emulated.h 119 0x1424 15
+me_vmult_float_emulated.h 119 0x1424 16
+me_vmult_float_emulated.h 119 0x1424 17
+me_vmult_float_emulated.h 119 0x1424 18
+me_vmult_float_emulated.h 120 0x1424 19
+me_vmult_float_emulated.h 120 0x1424 20
+me_vmult_float_emulated.h 120 0x1424 21
+me_vmult_float_emulated.h 120 0x1424 22
+me_vmult_float_emulated.h 121 0x1424 23
+me_vmult_float_emulated.h 121 0x1424 24
+me_vmult_float_emulated.h 121 0x1424 25
+me_vmult_float_emulated.h 121 0x1424 26
+me_vmult_float_emulated.h 122 0x1424 27
+me_vmult_float_emulated.h 122 0x1424 28
+me_vmult_float_emulated.h 122 0x1424 29
+me_vmult_float_emulated.h 122 0x1424 30
+me_vmult_float_emulated.h 123 0x1424 31
+me_vmult_float_emulated.h 123 0x1424 32
+me_vmult_float_emulated.h 123 0x1424 33
+me_vmult_float_emulated.h 123 0x1424 34
+me_vmult_float_emulated.h 124 0x1424 35
+me_vmult_float_emulated.h 124 0x1424 36
+me_vmult_float_emulated.h 124 0x1424 37
+me_vmult_float_emulated.h 124 0x1424 38
+me_vmult_float_emulated.h 125 0x1424 39
+me_vmult_float_emulated.h 125 0x1424 40
+me_vmult_float_emulated.h 125 0x1424 41
+me_vmult_float_emulated.h 125 0x1424 42
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1424 43
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1424 44
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp:
+add.hpp 28 0x1424 45
+add.hpp 28 0x1424 46
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1424 47
+add_reduce.hpp 324 0x1424 48
+add_reduce.hpp 324 0x1424 49
+add_reduce.hpp 324 0x1424 50
+add_reduce.hpp 324 0x1424 51
+add_reduce.hpp 324 0x1424 52
+add_reduce.hpp 324 0x1424 53
+add_reduce.hpp 324 0x1424 54
+add_reduce.hpp 324 0x1424 55
+add_reduce.hpp 324 0x1424 56
+add_reduce.hpp 324 0x1424 57
+add_reduce.hpp 324 0x1424 58
+add_reduce.hpp 324 0x1424 59
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1424 60
+add_accum.hpp 19 0x1424 61
+add_accum.hpp 19 0x1424 62
+add_accum.hpp 19 0x1424 63
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 388 0x1424 64
+reduce_base_c8.h 595 0x1424 65 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x1430
+aie_core.h 73 0x1430 1
+aie_core.h 73 0x1430 2
+aie_core.h 73 0x1430 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1430 4
+vector.hpp 1139 0x1430 5
+vector.hpp 1139 0x1430 6
+vector.hpp 1159 0x1430 7
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x1430 8
+accum.hpp 198 0x1430 9
+accum.hpp 198 0x1430 10
+accum.hpp 198 0x1430 11
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1430 12 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 200 0x1430 13
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x143c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x143c 1
+vector.hpp 1139 0x143c 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x143c 3
+accum.hpp 198 0x143c 4 x
+accum.hpp 943 0x143c 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x143c 6
+reduce_base_c8.h 570 0x143c 7
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x1446 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1446 1
+vector.hpp 1139 0x1446 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1446 3
+accum.hpp 198 0x1446 4
+accum.hpp 943 0x1446 5
+accum.hpp 943 0x1446 6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1446 7
+reduce_base_c8.h 570 0x1446 8 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x1450
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 391 0x1450 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x1456
+aie_core.h 90 0x1456 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1456 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1456 3
+accum.hpp 943 0x1456 4
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x145c
+aie_core.h 90 0x145c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x145c 2
+vector.hpp 1139 0x145c 3
+vector.hpp 1139 0x145c 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x145c 5
+accum.hpp 198 0x145c 6
+accum.hpp 198 0x145c 7 x
+accum.hpp 943 0x145c 8
+accum.hpp 943 0x145c 9
+accum.hpp 943 0x145c 10 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x145c 11 x
+reduce_base_c8.h 570 0x145c 12 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x1468
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1468 1
+vector.hpp 1139 0x1468 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1468 3
+accum.hpp 198 0x1468 4
+accum.hpp 943 0x1468 5
+accum.hpp 943 0x1468 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1468 7 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1468 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x1472
+aie_core.h 90 0x1472 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1472 2
+vector.hpp 1139 0x1472 3
+vector.hpp 1139 0x1472 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1472 5
+accum.hpp 198 0x1472 6
+accum.hpp 198 0x1472 7 x
+accum.hpp 943 0x1472 8
+accum.hpp 943 0x1472 9
+accum.hpp 943 0x1472 10 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1472 11 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x147c
+aie_core.h 90 0x147c 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x147c 2
+vector.hpp 1159 0x147c 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x147c 4
+accum.hpp 198 0x147c 5
+accum.hpp 943 0x147c 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1482 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1482 1 x
+accum.hpp 943 0x1482 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1482 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1482 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x148a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x148a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x148a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1490 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 391 0x1490 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x149a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x149a 1 x
+accum.hpp 943 0x149a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 412 0x149a 3
+reduce_base_c8.h 570 0x149a 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x14a0
+aie_core.h 73 0x14a0 1
+aie_core.h 73 0x14a0 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x14a0 3
+vector.hpp 1159 0x14a0 4
+vector.hpp 1159 0x14a0 5
+vector.hpp 1285 0x14a0 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x14a0 7
+accum.hpp 153 0x14a0 8
+accum.hpp 153 0x14a0 9
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x14a0 10
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x14a0 11 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x14b0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14b0 1 x
+vector.hpp 1159 0x14b0 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x14b0 3
+accum.hpp 198 0x14b0 4 x
+accum.hpp 943 0x14b0 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x14b0 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 391 0x14b0 7 x
+reduce_base_c8.h 570 0x14b0 8 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x14c0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14c0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x14c0 2
+accum.hpp 943 0x14c0 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x14c4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14c4 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x14c4 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x14c4 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14d0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x14d0 1
+accum.hpp 943 0x14d0 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x14d0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x14f0 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x1500 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1500 1 x
+vector.hpp 1159 0x1500 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x1500 3 x
+accum.hpp 198 0x1500 4 x
+accum.hpp 943 0x1500 5 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1500 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1510 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1520 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1520 1 x
+accum.hpp 943 0x1520 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1520 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 570 0x1520 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 107 0x1530
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 412 0x1530 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 90 0x153a x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 107 0x153a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x153a 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x153a 3 x
+accum.hpp 943 0x153a 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x153a 5 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 101 0x1544 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x154a x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x154e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x154e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x154e 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1554 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 412 0x1554 1 x
+reduce_base_c8.h 412 0x155c
+reduce_base_c8.h 412 0x1560
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 73 0x156c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x156c 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 153 0x156c 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 80 0x1572 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 184 0x1572 1 x
+reduce_mean_c8_impl.h 184 0x1572 2
+reduce_mean_c8_impl.h 184 0x1584
+reduce_mean_c8_impl.h 184 0x1588
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x158e
+add_reduce.hpp 322 0x158e 1
+add_reduce.hpp 322 0x158e 2
+add_reduce.hpp 322 0x158e 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 184 0x158e 4
+reduce_mean_c8_impl.h 184 0x159a
+reduce_mean_c8_impl.h 184 0x159e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x15ae
+blend.hpp 170 0x15b4
+blend.hpp 163 0x15ba
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 109 0x15c0
+me_vmult_float_emulated.h 111 0x15c0 1
+me_vmult_float_emulated.h 113 0x15c0 2
+me_vmult_float_emulated.h 115 0x15c0 3
+me_vmult_float_emulated.h 117 0x15c0 4
+me_vmult_float_emulated.h 118 0x15c0 5
+me_vmult_float_emulated.h 118 0x15c0 6
+me_vmult_float_emulated.h 119 0x15c0 7
+me_vmult_float_emulated.h 119 0x15c0 8
+me_vmult_float_emulated.h 120 0x15c0 9
+me_vmult_float_emulated.h 120 0x15c0 10
+me_vmult_float_emulated.h 121 0x15c0 11
+me_vmult_float_emulated.h 121 0x15c0 12
+me_vmult_float_emulated.h 122 0x15c0 13
+me_vmult_float_emulated.h 122 0x15c0 14
+me_vmult_float_emulated.h 123 0x15c0 15
+me_vmult_float_emulated.h 123 0x15c0 16
+me_vmult_float_emulated.h 124 0x15c0 17
+me_vmult_float_emulated.h 124 0x15c0 18
+me_vmult_float_emulated.h 125 0x15c0 19
+me_vmult_float_emulated.h 125 0x15c0 20
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp:
+add.hpp 28 0x15c0 21
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x15c0 22
+add_reduce.hpp 324 0x15c0 23
+add_reduce.hpp 324 0x15c0 24
+add_reduce.hpp 324 0x15c0 25
+add_reduce.hpp 324 0x15c0 26
+add_reduce.hpp 324 0x15c0 27
+add_reduce.hpp 324 0x15c0 28
+add_reduce.hpp 324 0x15c0 29
+add_reduce.hpp 324 0x15c0 30
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 200 0x15c0 31
+reduce_mean_c8_impl.h 200 0x15c0 32
+reduce_mean_c8_impl.h 200 0x15c0 33
+reduce_mean_c8_impl.h 223 0x15c0 34
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1285 0x15cc
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 200 0x15cc 1 x
+reduce_mean_c8_impl.h 200 0x15e0
+reduce_mean_c8_impl.h 223 0x15f0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1285 0x15fc
+vector.hpp 1289 0x15fc 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 112 0x1608
+me_vmult_float_emulated.h 112 0x1608 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1608 2
+vector.hpp 57 0x1608 3
+vector.hpp 1280 0x1608 4
+vector.hpp 1285 0x1608 5
+vector.hpp 1287 0x1608 6
+vector.hpp 1288 0x1608 7
+vector.hpp 1289 0x1608 8
+vector.hpp 1292 0x1608 9
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 223 0x1608 10 x
+reduce_mean_c8_impl.h 268 0x1608 11
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x1614
+vector.hpp 915 0x1614 1
+vector.hpp 1280 0x1614 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x161e
+add_reduce.hpp 322 0x161e 1
+add_reduce.hpp 322 0x161e 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 223 0x161e 3 x
+reduce_mean_c8_impl.h 223 0x1628
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1286 0x1632
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1638
+me_vmult_float_emulated.h 108 0x1638 1
+me_vmult_float_emulated.h 109 0x1638 2
+me_vmult_float_emulated.h 110 0x1638 3
+me_vmult_float_emulated.h 110 0x1638 4
+me_vmult_float_emulated.h 111 0x1638 5
+me_vmult_float_emulated.h 111 0x1638 6
+me_vmult_float_emulated.h 111 0x1638 7
+me_vmult_float_emulated.h 112 0x1638 8
+me_vmult_float_emulated.h 112 0x1638 9
+me_vmult_float_emulated.h 113 0x1638 10
+me_vmult_float_emulated.h 114 0x1638 11
+me_vmult_float_emulated.h 114 0x1638 12
+me_vmult_float_emulated.h 115 0x1638 13
+me_vmult_float_emulated.h 115 0x1638 14
+me_vmult_float_emulated.h 115 0x1638 15
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1286 0x1638 16
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1108 0x1638 17
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 112 0x163c x
+me_vmult_float_emulated.h 112 0x163c 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 223 0x163c 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1644
+me_vmult_float_emulated.h 108 0x1644 1
+me_vmult_float_emulated.h 109 0x1644 2
+me_vmult_float_emulated.h 110 0x1644 3
+me_vmult_float_emulated.h 110 0x1644 4
+me_vmult_float_emulated.h 111 0x1644 5
+me_vmult_float_emulated.h 111 0x1644 6
+me_vmult_float_emulated.h 111 0x1644 7
+me_vmult_float_emulated.h 113 0x1644 8
+me_vmult_float_emulated.h 114 0x1644 9
+me_vmult_float_emulated.h 114 0x1644 10
+me_vmult_float_emulated.h 115 0x1644 11
+me_vmult_float_emulated.h 115 0x1644 12
+me_vmult_float_emulated.h 115 0x1644 13
+me_vmult_float_emulated.h 108 0x1648
+me_vmult_float_emulated.h 108 0x1648 1
+me_vmult_float_emulated.h 109 0x1648 2
+me_vmult_float_emulated.h 110 0x1648 3
+me_vmult_float_emulated.h 110 0x1648 4
+me_vmult_float_emulated.h 111 0x1648 5
+me_vmult_float_emulated.h 111 0x1648 6
+me_vmult_float_emulated.h 111 0x1648 7
+me_vmult_float_emulated.h 113 0x1648 8 x
+me_vmult_float_emulated.h 115 0x1648 9
+me_vmult_float_emulated.h 115 0x1648 10
+me_vmult_float_emulated.h 115 0x1648 11
+me_vmult_float_emulated.h 108 0x1650
+me_vmult_float_emulated.h 108 0x1650 1
+me_vmult_float_emulated.h 109 0x1650 2
+me_vmult_float_emulated.h 110 0x1650 3
+me_vmult_float_emulated.h 110 0x1650 4
+me_vmult_float_emulated.h 111 0x1650 5
+me_vmult_float_emulated.h 111 0x1650 6
+me_vmult_float_emulated.h 111 0x1650 7
+me_vmult_float_emulated.h 113 0x165c
+me_vmult_float_emulated.h 114 0x165c 1 x
+me_vmult_float_emulated.h 114 0x165c 2 x
+me_vmult_float_emulated.h 115 0x1662 x
+me_vmult_float_emulated.h 115 0x1670
+me_vmult_float_emulated.h 115 0x1670 1
+me_vmult_float_emulated.h 115 0x1670 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x1670 3
+add_reduce.hpp 322 0x1670 4
+add_reduce.hpp 322 0x1670 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1680 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 226 0x1680 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1280 0x168a
+vector.hpp 1280 0x168e x
+vector.hpp 1285 0x1692 x
+vector.hpp 1285 0x1692 1 x
+vector.hpp 1285 0x1698
+vector.hpp 1286 0x169c x
+vector.hpp 1285 0x16a0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x16a0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16a6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 142 0x16aa x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16aa 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 142 0x16ae
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16ae 1 x
+accum.hpp 199 0x16ba x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp:
+add.hpp 28 0x16ba 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x16c2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 142 0x16c6 x
+vector.hpp 243 0x16c6 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x16c6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16ce x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x16d2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16d6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x16d6 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16de
+accum.hpp 151 0x16e2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 243 0x16e6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 151 0x16e6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x16ea x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16ee x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x16ee 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x16f6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x16fa
+add_reduce.hpp 322 0x16fe x
+add_reduce.hpp 324 0x1702 x
+add_reduce.hpp 324 0x1702 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x170a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x170e x
+add_reduce.hpp 324 0x170e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1716 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x171a x
+add_reduce.hpp 322 0x171e x
+add_reduce.hpp 324 0x171e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1726 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x172a x
+add_reduce.hpp 322 0x172e x
+add_reduce.hpp 324 0x172e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1736 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x173a x
+add_reduce.hpp 322 0x173e x
+add_reduce.hpp 324 0x1742 x
+add_reduce.hpp 324 0x1742 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x174a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x174e x
+add_reduce.hpp 324 0x174e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1756 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x175a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x175e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1762 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x1766 x
+vector.hpp 1288 0x1766 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x176c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x1770 x
+vector.hpp 1287 0x1770 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 80 0x1770 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 163 0x1776 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 853 0x177a x
+vector.hpp 853 0x177e
+vector.hpp 142 0x1782 x
+vector.hpp 1413 0x1782 1 x
+vector.hpp 142 0x1786
+vector.hpp 1413 0x1786 1
+vector.hpp 142 0x178a
+vector.hpp 1413 0x178a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x178e x
+blend.hpp 170 0x1792
+blend.hpp 170 0x1796
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x179a
+me_vmult_float_emulated.h 108 0x179a 1
+me_vmult_float_emulated.h 108 0x179e
+me_vmult_float_emulated.h 108 0x179e 1
+me_vmult_float_emulated.h 109 0x179e 2
+me_vmult_float_emulated.h 110 0x179e 3
+me_vmult_float_emulated.h 110 0x179e 4
+me_vmult_float_emulated.h 111 0x179e 5
+me_vmult_float_emulated.h 111 0x179e 6
+me_vmult_float_emulated.h 111 0x179e 7
+me_vmult_float_emulated.h 108 0x17a2 x
+me_vmult_float_emulated.h 108 0x17a2 1 x
+me_vmult_float_emulated.h 109 0x17a2 2 x
+me_vmult_float_emulated.h 108 0x17aa
+me_vmult_float_emulated.h 108 0x17aa 1
+me_vmult_float_emulated.h 109 0x17aa 2
+me_vmult_float_emulated.h 110 0x17aa 3
+me_vmult_float_emulated.h 110 0x17aa 4
+me_vmult_float_emulated.h 111 0x17aa 5
+me_vmult_float_emulated.h 111 0x17aa 6
+me_vmult_float_emulated.h 111 0x17aa 7
+me_vmult_float_emulated.h 109 0x17ae
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1285 0x17ae 1
+vector.hpp 1285 0x17ae 2 x
+vector.hpp 1289 0x17ae 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 120 0x17b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1285 0x17b8 1
+vector.hpp 1289 0x17b8 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x17c0
+me_vmult_float_emulated.h 108 0x17c0 1
+me_vmult_float_emulated.h 109 0x17c0 2
+me_vmult_float_emulated.h 110 0x17c0 3
+me_vmult_float_emulated.h 110 0x17c0 4
+me_vmult_float_emulated.h 111 0x17c0 5
+me_vmult_float_emulated.h 111 0x17c0 6
+me_vmult_float_emulated.h 111 0x17c0 7
+me_vmult_float_emulated.h 124 0x17c0 8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1285 0x17c0 9 x
+vector.hpp 1289 0x17c0 10
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 125 0x17ca x
+me_vmult_float_emulated.h 109 0x17d2 x
+me_vmult_float_emulated.h 110 0x17d2 1 x
+me_vmult_float_emulated.h 110 0x17d2 2 x
+me_vmult_float_emulated.h 111 0x17d8 x
+me_vmult_float_emulated.h 111 0x17e6
+me_vmult_float_emulated.h 111 0x17e6 1
+me_vmult_float_emulated.h 111 0x17e6 2
+me_vmult_float_emulated.h 117 0x17ec x
+me_vmult_float_emulated.h 118 0x17f0 x
+me_vmult_float_emulated.h 119 0x17fa x
+me_vmult_float_emulated.h 117 0x17fe x
+me_vmult_float_emulated.h 118 0x1802 x
+me_vmult_float_emulated.h 118 0x1806
+me_vmult_float_emulated.h 122 0x1810 x
+me_vmult_float_emulated.h 118 0x1814 x
+me_vmult_float_emulated.h 119 0x1818 x
+me_vmult_float_emulated.h 119 0x181c
+me_vmult_float_emulated.h 121 0x1826 x
+me_vmult_float_emulated.h 119 0x182a x
+me_vmult_float_emulated.h 120 0x182e x
+me_vmult_float_emulated.h 120 0x1832
+me_vmult_float_emulated.h 123 0x183c x
+me_vmult_float_emulated.h 120 0x1840 x
+me_vmult_float_emulated.h 121 0x1844 x
+me_vmult_float_emulated.h 121 0x1848
+me_vmult_float_emulated.h 121 0x1854
+me_vmult_float_emulated.h 122 0x1858 x
+me_vmult_float_emulated.h 122 0x185c
+me_vmult_float_emulated.h 122 0x1868
+me_vmult_float_emulated.h 123 0x186c x
+me_vmult_float_emulated.h 123 0x1870
+me_vmult_float_emulated.h 123 0x187c
+me_vmult_float_emulated.h 124 0x1880 x
+me_vmult_float_emulated.h 124 0x1884
+me_vmult_float_emulated.h 124 0x1890
+me_vmult_float_emulated.h 125 0x1894 x
+me_vmult_float_emulated.h 125 0x1898
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1286 0x18a4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1108 0x18a4 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1286 0x18aa
+vector.hpp 1289 0x18ae x
+vector.hpp 57 0x18b4 x
+vector.hpp 1292 0x18b4 1 x
+vector.hpp 57 0x18c0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x18c0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h:
+reduce_base_c8.h 352 0x18f0 x
+reduce_base_c8.h 352 0x18f4
+reduce_base_c8.h 352 0x18fe
+reduce_base_c8.h 353 0x1902 x
+reduce_base_c8.h 352 0x190e x
+reduce_base_c8.h 352 0x1912
+reduce_base_c8.h 420 0x1920
+reduce_base_c8.h 353 0x1928 x
+reduce_base_c8.h 420 0x192c x
+reduce_base_c8.h 420 0x1938
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 163 0x1950
+blend.hpp 170 0x195a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 184 0x1970
+reduce_mean_c8_impl.h 184 0x1974 x
+reduce_mean_c8_impl.h 184 0x1978
+reduce_mean_c8_impl.h 184 0x1988
+reduce_mean_c8_impl.h 184 0x198c
+reduce_mean_c8_impl.h 184 0x1990
+reduce_mean_c8_impl.h 200 0x1996
+reduce_mean_c8_impl.h 200 0x19b0 x
+reduce_mean_c8_impl.h 202 0x19b0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x19ba
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 202 0x19ba 1 x
+reduce_mean_c8_impl.h 202 0x19c0
+reduce_mean_c8_impl.h 200 0x19ce x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x19d2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x19d2 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 206 0x19d2 2 x
+reduce_mean_c8_impl.h 206 0x19d2 3
+reduce_mean_c8_impl.h 209 0x19d2 4
+reduce_mean_c8_impl.h 206 0x19de
+reduce_mean_c8_impl.h 206 0x19de 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x19ea x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x19ea 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 209 0x19ea 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x19f0
+accum.hpp 199 0x19f6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp:
+add.hpp 28 0x19f6 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 206 0x1a00 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1a10 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x1a10 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 209 0x1a10 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 150 0x1a50 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp:
+add.hpp 28 0x1a60 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 199 0x1a70 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1a80
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x1a80 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1a8a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x1a8a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x1a8a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1a94
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 150 0x1a9a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x1aa0
+add_reduce.hpp 322 0x1aa4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1aa8
+me_vmult_float_emulated.h 108 0x1aa8 1
+me_vmult_float_emulated.h 109 0x1aa8 2
+me_vmult_float_emulated.h 110 0x1aa8 3
+me_vmult_float_emulated.h 110 0x1aa8 4
+me_vmult_float_emulated.h 111 0x1aa8 5
+me_vmult_float_emulated.h 111 0x1aa8 6
+me_vmult_float_emulated.h 111 0x1aa8 7
+me_vmult_float_emulated.h 112 0x1aa8 8
+me_vmult_float_emulated.h 112 0x1aa8 9
+me_vmult_float_emulated.h 113 0x1aa8 10
+me_vmult_float_emulated.h 114 0x1aa8 11
+me_vmult_float_emulated.h 114 0x1aa8 12
+me_vmult_float_emulated.h 115 0x1aa8 13
+me_vmult_float_emulated.h 115 0x1aa8 14
+me_vmult_float_emulated.h 115 0x1aa8 15
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1aa8 16
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1aa8 17 x
+accum.hpp 1108 0x1aa8 18
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1aa8 19 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1ab2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 80 0x1ab6 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 112 0x1aba
+me_vmult_float_emulated.h 112 0x1aba 1
+me_vmult_float_emulated.h 113 0x1aba 2
+me_vmult_float_emulated.h 113 0x1ac0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1ac4 x
+add_reduce.hpp 322 0x1ac8 x
+add_reduce.hpp 324 0x1ac8 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1ad0 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1ad4
+me_vmult_float_emulated.h 108 0x1ad4 1
+me_vmult_float_emulated.h 109 0x1ad4 2
+me_vmult_float_emulated.h 110 0x1ad4 3
+me_vmult_float_emulated.h 110 0x1ad4 4
+me_vmult_float_emulated.h 111 0x1ad4 5
+me_vmult_float_emulated.h 111 0x1ad4 6
+me_vmult_float_emulated.h 111 0x1ad4 7
+me_vmult_float_emulated.h 113 0x1ad4 8
+me_vmult_float_emulated.h 114 0x1ad4 9
+me_vmult_float_emulated.h 114 0x1ad4 10
+me_vmult_float_emulated.h 115 0x1ad4 11
+me_vmult_float_emulated.h 115 0x1ad4 12
+me_vmult_float_emulated.h 115 0x1ad4 13
+me_vmult_float_emulated.h 112 0x1ada x
+me_vmult_float_emulated.h 112 0x1ada 1 x
+me_vmult_float_emulated.h 113 0x1ae0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1ae0 1 x
+add_reduce.hpp 322 0x1ae8 x
+add_reduce.hpp 324 0x1ae8 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1af0 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1af4
+me_vmult_float_emulated.h 108 0x1af4 1
+me_vmult_float_emulated.h 109 0x1af4 2
+me_vmult_float_emulated.h 110 0x1af4 3
+me_vmult_float_emulated.h 110 0x1af4 4
+me_vmult_float_emulated.h 111 0x1af4 5
+me_vmult_float_emulated.h 111 0x1af4 6
+me_vmult_float_emulated.h 111 0x1af4 7
+me_vmult_float_emulated.h 115 0x1af4 8
+me_vmult_float_emulated.h 115 0x1af4 9
+me_vmult_float_emulated.h 115 0x1af4 10
+me_vmult_float_emulated.h 113 0x1afc x
+me_vmult_float_emulated.h 114 0x1afc 1 x
+me_vmult_float_emulated.h 114 0x1afc 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1b00 x
+add_reduce.hpp 322 0x1b04 x
+add_reduce.hpp 324 0x1b04 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 198 0x1b0c x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1b10
+me_vmult_float_emulated.h 108 0x1b10 1
+me_vmult_float_emulated.h 109 0x1b10 2
+me_vmult_float_emulated.h 110 0x1b10 3
+me_vmult_float_emulated.h 110 0x1b10 4
+me_vmult_float_emulated.h 111 0x1b10 5
+me_vmult_float_emulated.h 111 0x1b10 6
+me_vmult_float_emulated.h 111 0x1b10 7
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 324 0x1b1a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 915 0x1b1e x
+vector.hpp 856 0x1b24 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1b28 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1b2c
+me_vmult_float_emulated.h 108 0x1b2c 1
+me_vmult_float_emulated.h 109 0x1b30
+me_vmult_float_emulated.h 110 0x1b30 1
+me_vmult_float_emulated.h 110 0x1b30 2
+me_vmult_float_emulated.h 111 0x1b30 3
+me_vmult_float_emulated.h 111 0x1b30 4
+me_vmult_float_emulated.h 111 0x1b30 5
+me_vmult_float_emulated.h 108 0x1b34 x
+me_vmult_float_emulated.h 108 0x1b34 1 x
+me_vmult_float_emulated.h 111 0x1b34 2
+me_vmult_float_emulated.h 111 0x1b34 3
+me_vmult_float_emulated.h 111 0x1b34 4
+me_vmult_float_emulated.h 109 0x1b3e x
+me_vmult_float_emulated.h 124 0x1b42 x
+me_vmult_float_emulated.h 109 0x1b4e x
+me_vmult_float_emulated.h 110 0x1b4e 1 x
+me_vmult_float_emulated.h 110 0x1b4e 2 x
+me_vmult_float_emulated.h 115 0x1b52 x
+me_vmult_float_emulated.h 111 0x1b56 x
+me_vmult_float_emulated.h 115 0x1b62 x
+me_vmult_float_emulated.h 115 0x1b62 1 x
+me_vmult_float_emulated.h 115 0x1b62 2 x
+me_vmult_float_emulated.h 111 0x1b66 x
+me_vmult_float_emulated.h 111 0x1b66 1 x
+me_vmult_float_emulated.h 111 0x1b66 2 x
+me_vmult_float_emulated.h 117 0x1b6c x
+me_vmult_float_emulated.h 118 0x1b70 x
+me_vmult_float_emulated.h 119 0x1b7a x
+me_vmult_float_emulated.h 117 0x1b7e x
+me_vmult_float_emulated.h 118 0x1b82 x
+me_vmult_float_emulated.h 118 0x1b86
+me_vmult_float_emulated.h 120 0x1b90 x
+me_vmult_float_emulated.h 118 0x1b94 x
+me_vmult_float_emulated.h 119 0x1b98 x
+me_vmult_float_emulated.h 119 0x1b9c
+me_vmult_float_emulated.h 121 0x1ba6 x
+me_vmult_float_emulated.h 119 0x1baa x
+me_vmult_float_emulated.h 120 0x1bae x
+me_vmult_float_emulated.h 120 0x1bb2
+me_vmult_float_emulated.h 120 0x1bbe
+me_vmult_float_emulated.h 121 0x1bc2 x
+me_vmult_float_emulated.h 121 0x1bc6
+me_vmult_float_emulated.h 122 0x1bce x
+me_vmult_float_emulated.h 121 0x1bd4 x
+me_vmult_float_emulated.h 122 0x1bd8 x
+me_vmult_float_emulated.h 122 0x1bdc
+me_vmult_float_emulated.h 123 0x1be4 x
+me_vmult_float_emulated.h 122 0x1bea x
+me_vmult_float_emulated.h 123 0x1bee x
+me_vmult_float_emulated.h 123 0x1bf2
+me_vmult_float_emulated.h 123 0x1bfe
+me_vmult_float_emulated.h 124 0x1bfe 1 x
+me_vmult_float_emulated.h 124 0x1c06
+me_vmult_float_emulated.h 125 0x1c06 1 x
+me_vmult_float_emulated.h 125 0x1c14
+me_vmult_float_emulated.h 124 0x1c18 x
+me_vmult_float_emulated.h 125 0x1c2a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1c30 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1108 0x1c30 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 163 0x1c40
+blend.hpp 170 0x1c4a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 184 0x1c80
+reduce_mean_c8_impl.h 184 0x1c84 x
+reduce_mean_c8_impl.h 184 0x1c88
+reduce_mean_c8_impl.h 184 0x1c9c
+reduce_mean_c8_impl.h 184 0x1ca6
+reduce_mean_c8_impl.h 184 0x1caa
+reduce_mean_c8_impl.h 184 0x1cba
+reduce_mean_c8_impl.h 184 0x1cbe
+reduce_mean_c8_impl.h 200 0x1cc4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1ce0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1cea
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1cea 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x1cea 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 163 0x1cf0
+blend.hpp 170 0x1d06
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1d0c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x1d0c 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h:
+me_vmult_float_emulated.h 108 0x1d20
+me_vmult_float_emulated.h 108 0x1d20 1
+me_vmult_float_emulated.h 109 0x1d20 2
+me_vmult_float_emulated.h 110 0x1d20 3
+me_vmult_float_emulated.h 110 0x1d20 4
+me_vmult_float_emulated.h 111 0x1d20 5
+me_vmult_float_emulated.h 111 0x1d20 6
+me_vmult_float_emulated.h 111 0x1d20 7
+me_vmult_float_emulated.h 112 0x1d20 8
+me_vmult_float_emulated.h 112 0x1d20 9
+me_vmult_float_emulated.h 113 0x1d20 10
+me_vmult_float_emulated.h 114 0x1d20 11
+me_vmult_float_emulated.h 114 0x1d20 12
+me_vmult_float_emulated.h 115 0x1d20 13
+me_vmult_float_emulated.h 115 0x1d20 14
+me_vmult_float_emulated.h 115 0x1d20 15
+me_vmult_float_emulated.h 109 0x1d2a
+me_vmult_float_emulated.h 111 0x1d2a 1
+me_vmult_float_emulated.h 113 0x1d2a 2
+me_vmult_float_emulated.h 115 0x1d2a 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp:
+add_reduce.hpp 322 0x1d2a 4
+add_reduce.hpp 322 0x1d2a 5
+add_reduce.hpp 322 0x1d2a 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 163 0x1d2a 7
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1d34
+vector.hpp 57 0x1d34 1
+vector.hpp 1139 0x1d34 2
+vector.hpp 1280 0x1d34 3
+vector.hpp 1287 0x1d34 4
+vector.hpp 1288 0x1d34 5
+vector.hpp 1292 0x1d34 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1d34 7
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 226 0x1d34 8
+reduce_mean_c8_impl.h 268 0x1d34 9
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1d3e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1d3e 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x1d3e 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp:
+blend.hpp 170 0x1d44
+blend.hpp 170 0x1d48
+blend.hpp 170 0x1d5a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 57 0x1d60
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h:
+reduce_mean_c8_impl.h 268 0x1d60 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 472 0x1d70
+superkernels.cpp 472 0x1d70 1 x
+superkernels.cpp 477 0x1d76
+superkernels.cpp 477 0x1d80 x
+superkernels.cpp 474 0x1d8a x
+superkernels.cpp 569 0x1d8a 1
+superkernels.cpp 474 0x1d94
+superkernels.cpp 477 0x1da4 x
+superkernels.cpp 477 0x1da4 1 x
+superkernels.cpp 474 0x1db6
+superkernels.cpp 474 0x1dbc x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1dc0
+io_buffer_main.h 218 0x1dc0 1
+io_buffer_main.h 324 0x1dc0 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x1dc0 3
+tile.hpp 74 0x1dc0 4
+tile.hpp 74 0x1dcc x
+tile.hpp 86 0x1dcc 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 483 0x1dd6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x1dd6 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 480 0x1ddc x
+superkernels.cpp 480 0x1de2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x1dec
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 481 0x1e00
+superkernels.cpp 487 0x1e00 1
+superkernels.cpp 481 0x1e0a
+superkernels.cpp 481 0x1e0a 1 x
+superkernels.cpp 481 0x1e14
+superkernels.cpp 481 0x1e14 1
+superkernels.cpp 481 0x1e1e
+superkernels.cpp 482 0x1e1e 1
+superkernels.cpp 481 0x1e28
+superkernels.cpp 482 0x1e28 1 x
+superkernels.cpp 481 0x1e32 x
+superkernels.cpp 483 0x1e32 1
+superkernels.cpp 483 0x1e38
+superkernels.cpp 487 0x1e3c
+superkernels.cpp 483 0x1e42
+superkernels.cpp 481 0x1e48
+superkernels.cpp 491 0x1e4c
+superkernels.cpp 481 0x1e52
+superkernels.cpp 482 0x1e52 1 x
+superkernels.cpp 481 0x1e5a x
+superkernels.cpp 481 0x1e60
+superkernels.cpp 483 0x1e64 x
+superkernels.cpp 487 0x1e68 x
+superkernels.cpp 487 0x1e6c
+superkernels.cpp 487 0x1e70
+superkernels.cpp 487 0x1e74
+superkernels.cpp 487 0x1e78
+superkernels.cpp 487 0x1e7c
+superkernels.cpp 483 0x1e80 x
+superkernels.cpp 487 0x1e84 x
+superkernels.cpp 487 0x1e88
+superkernels.cpp 487 0x1e8c
+superkernels.cpp 491 0x1e90 x
+superkernels.cpp 491 0x1ea0
+superkernels.cpp 491 0x1ea4
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1eaa
+io_buffer_main.h 218 0x1eaa 1
+io_buffer_main.h 324 0x1eaa 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 491 0x1eb8
+superkernels.cpp 491 0x1ed6
+superkernels.cpp 491 0x1ef0
+superkernels.cpp 491 0x1f00
+superkernels.cpp 491 0x1f10
+superkernels.cpp 491 0x1f16
+superkernels.cpp 491 0x1f1a
+superkernels.cpp 491 0x1f20
+superkernels.cpp 491 0x1f30
+superkernels.cpp 491 0x1f30 1
+superkernels.cpp 491 0x1f30 2
+superkernels.cpp 491 0x1f3a
+superkernels.cpp 492 0x1f3a 1
+superkernels.cpp 492 0x1f3a 2
+superkernels.cpp 498 0x1f44
+superkernels.cpp 498 0x1f44 1
+superkernels.cpp 499 0x1f4e
+superkernels.cpp 505 0x1f54
+superkernels.cpp 508 0x1f54 1
+superkernels.cpp 511 0x1f54 2
+superkernels.cpp 491 0x1f5c
+superkernels.cpp 491 0x1f60
+superkernels.cpp 491 0x1f64
+superkernels.cpp 491 0x1f6a
+superkernels.cpp 492 0x1f72 x
+superkernels.cpp 494 0x1f82 x
+superkernels.cpp 495 0x1f86 x
+superkernels.cpp 496 0x1f8a x
+superkernels.cpp 498 0x1f8e x
+superkernels.cpp 498 0x1f9e
+superkernels.cpp 499 0x1fa2 x
+superkernels.cpp 499 0x1fb2
+superkernels.cpp 500 0x1fb6 x
+superkernels.cpp 500 0x1fc2
+superkernels.cpp 500 0x1fd0
+superkernels.cpp 505 0x1fe0
+superkernels.cpp 508 0x1fe0 1
+superkernels.cpp 511 0x1fe0 2
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1fea
+io_buffer_main.h 218 0x1fea 1
+io_buffer_main.h 324 0x1fea 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 505 0x1ff0 x
+superkernels.cpp 505 0x1ff0 1
+superkernels.cpp 505 0x2002
+superkernels.cpp 505 0x2006
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x200c x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 505 0x2018
+superkernels.cpp 505 0x201e x
+superkernels.cpp 505 0x201e 1
+superkernels.cpp 505 0x2028
+superkernels.cpp 505 0x2030
+superkernels.cpp 505 0x2036
+superkernels.cpp 505 0x203c
+superkernels.cpp 505 0x2040
+superkernels.cpp 505 0x2040 1
+superkernels.cpp 505 0x2046
+superkernels.cpp 505 0x2050
+superkernels.cpp 505 0x2050 1
+superkernels.cpp 505 0x2056
+superkernels.cpp 505 0x205a
+superkernels.cpp 505 0x205a 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x206a
+io_buffer_main.h 395 0x206a 1
+io_buffer_main.h 218 0x2070 x
+io_buffer_main.h 218 0x2074
+io_buffer_main.h 218 0x2078
+io_buffer_main.h 235 0x207e x
+io_buffer_main.h 218 0x208a x
+io_buffer_main.h 218 0x208a 1 x
+io_buffer_main.h 218 0x208e
+io_buffer_main.h 395 0x209a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 508 0x20a6 x
+superkernels.cpp 508 0x20b0
+superkernels.cpp 522 0x20b0 1
+superkernels.cpp 558 0x20b0 2
+superkernels.cpp 508 0x20be
+superkernels.cpp 508 0x20c2
+superkernels.cpp 508 0x20d2
+superkernels.cpp 508 0x20d8
+superkernels.cpp 508 0x20d8 1
+superkernels.cpp 508 0x20e2
+superkernels.cpp 508 0x20ea
+superkernels.cpp 508 0x20f0
+superkernels.cpp 508 0x20f6
+superkernels.cpp 508 0x20fa
+superkernels.cpp 508 0x20fa 1
+superkernels.cpp 508 0x2100
+superkernels.cpp 508 0x2110
+superkernels.cpp 508 0x2110 1
+superkernels.cpp 508 0x2116
+superkernels.cpp 508 0x211a
+superkernels.cpp 508 0x211a 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x212a
+io_buffer_main.h 395 0x212a 1
+io_buffer_main.h 218 0x2130 x
+io_buffer_main.h 218 0x2134
+io_buffer_main.h 218 0x2138
+io_buffer_main.h 235 0x213e x
+io_buffer_main.h 218 0x214a x
+io_buffer_main.h 218 0x214a 1 x
+io_buffer_main.h 218 0x214e
+io_buffer_main.h 395 0x215a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 511 0x2166 x
+superkernels.cpp 511 0x2166 1
+superkernels.cpp 511 0x217a
+superkernels.cpp 511 0x217e
+superkernels.cpp 511 0x2182
+superkernels.cpp 511 0x2188
+superkernels.cpp 511 0x2194
+superkernels.cpp 511 0x2198
+superkernels.cpp 511 0x2198 1
+superkernels.cpp 511 0x219e
+superkernels.cpp 511 0x21a6
+superkernels.cpp 511 0x21b0
+superkernels.cpp 511 0x21b4
+superkernels.cpp 511 0x21b4 1
+superkernels.cpp 511 0x21ba
+superkernels.cpp 511 0x21c0
+superkernels.cpp 511 0x21c0 1
+superkernels.cpp 511 0x21c6
+superkernels.cpp 511 0x21ca
+superkernels.cpp 511 0x21ca 1
+superkernels.cpp 516 0x21da
+superkernels.cpp 522 0x21da 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x21da 2 x
+io_buffer_main.h 395 0x21da 3
+io_buffer_main.h 218 0x21e4
+io_buffer_main.h 218 0x21e8
+io_buffer_main.h 235 0x21ee x
+io_buffer_main.h 218 0x21fa x
+io_buffer_main.h 218 0x21fa 1 x
+io_buffer_main.h 218 0x21fe
+io_buffer_main.h 395 0x220e x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 516 0x2226
+superkernels.cpp 522 0x2226 1
+superkernels.cpp 516 0x2240
+superkernels.cpp 522 0x2240 1
+superkernels.cpp 516 0x2250
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2250 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 516 0x225a x
+superkernels.cpp 522 0x225a 1
+superkernels.cpp 514 0x2264
+superkernels.cpp 522 0x2264 1 x
+superkernels.cpp 514 0x226e x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2278 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 516 0x227c x
+superkernels.cpp 522 0x2280 x
+superkernels.cpp 522 0x2284
+superkernels.cpp 514 0x228a x
+superkernels.cpp 514 0x228e
+superkernels.cpp 516 0x2294 x
+superkernels.cpp 516 0x2298
+superkernels.cpp 522 0x2298 1
+superkernels.cpp 522 0x229e x
+superkernels.cpp 522 0x22a2
+superkernels.cpp 522 0x22b2
+superkernels.cpp 522 0x22b6
+superkernels.cpp 523 0x22bc
+superkernels.cpp 523 0x22ca x
+superkernels.cpp 523 0x22ca 1
+superkernels.cpp 523 0x22d4
+superkernels.cpp 524 0x22d4 1
+superkernels.cpp 524 0x22de
+superkernels.cpp 524 0x22de 1 x
+superkernels.cpp 523 0x22ee x
+superkernels.cpp 524 0x22f4 x
+superkernels.cpp 524 0x22f4 1 x
+superkernels.cpp 524 0x22fa
+superkernels.cpp 524 0x22fe
+superkernels.cpp 524 0x2302
+superkernels.cpp 524 0x2306
+superkernels.cpp 525 0x230a x
+superkernels.cpp 526 0x230e x
+superkernels.cpp 547 0x2312 x
+superkernels.cpp 525 0x2318
+superkernels.cpp 525 0x231e x
+superkernels.cpp 554 0x232e
+superkernels.cpp 558 0x232e 1
+superkernels.cpp 552 0x2338
+superkernels.cpp 554 0x2338 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2338 2
+io_buffer_main.h 327 0x2338 3
+io_buffer_main.h 425 0x2338 4
+io_buffer_main.h 425 0x2338 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 554 0x2342
+superkernels.cpp 555 0x2342 1
+superkernels.cpp 558 0x2342 2
+superkernels.cpp 559 0x2342 3
+superkernels.cpp 562 0x2342 4
+superkernels.cpp 563 0x2342 5
+superkernels.cpp 567 0x2342 6
+superkernels.cpp 554 0x2356
+superkernels.cpp 558 0x2356 1
+superkernels.cpp 552 0x2360
+superkernels.cpp 554 0x2360 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2360 2
+io_buffer_main.h 327 0x2360 3
+io_buffer_main.h 425 0x2360 4
+io_buffer_main.h 425 0x2360 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 554 0x236a
+superkernels.cpp 555 0x236a 1
+superkernels.cpp 558 0x236a 2
+superkernels.cpp 559 0x236a 3
+superkernels.cpp 562 0x236a 4
+superkernels.cpp 563 0x236a 5
+superkernels.cpp 567 0x236a 6
+superkernels.cpp 532 0x2380
+superkernels.cpp 533 0x2380 1
+superkernels.cpp 554 0x2380 2
+superkernels.cpp 555 0x2380 3
+superkernels.cpp 558 0x2380 4
+superkernels.cpp 559 0x2380 5
+superkernels.cpp 562 0x2380 6
+superkernels.cpp 563 0x2380 7
+superkernels.cpp 567 0x2380 8
+superkernels.cpp 532 0x238a x
+superkernels.cpp 532 0x238a 1
+superkernels.cpp 552 0x238a 2
+superkernels.cpp 532 0x2394
+superkernels.cpp 533 0x2394 1
+superkernels.cpp 533 0x239e x
+superkernels.cpp 554 0x239e 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x239e 2
+io_buffer_main.h 327 0x239e 3
+io_buffer_main.h 425 0x239e 4
+io_buffer_main.h 425 0x239e 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 532 0x23ae x
+superkernels.cpp 533 0x23b4 x
+superkernels.cpp 533 0x23b4 1 x
+superkernels.cpp 533 0x23ba
+superkernels.cpp 533 0x23be
+superkernels.cpp 533 0x23c2
+superkernels.cpp 533 0x23c6
+superkernels.cpp 534 0x23ca x
+superkernels.cpp 535 0x23ce x
+superkernels.cpp 547 0x23d2 x
+superkernels.cpp 534 0x23d8
+superkernels.cpp 534 0x23de x
+superkernels.cpp 554 0x23e6
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x23f0
+io_buffer_main.h 324 0x23f0 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 541 0x2410
+superkernels.cpp 541 0x2416 x
+superkernels.cpp 541 0x2416 1
+superkernels.cpp 541 0x2420
+superkernels.cpp 542 0x2420 1
+superkernels.cpp 542 0x242a x
+superkernels.cpp 541 0x2438 x
+superkernels.cpp 542 0x243e x
+superkernels.cpp 542 0x243e 1 x
+superkernels.cpp 542 0x2444
+superkernels.cpp 542 0x2448
+superkernels.cpp 542 0x244c
+superkernels.cpp 542 0x244c 1
+superkernels.cpp 542 0x2452
+superkernels.cpp 543 0x2456 x
+superkernels.cpp 544 0x245a x
+superkernels.cpp 547 0x245e x
+superkernels.cpp 543 0x2464
+superkernels.cpp 543 0x246a x
+superkernels.cpp 554 0x2480
+superkernels.cpp 558 0x2480 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2480 2
+io_buffer_main.h 125 0x2480 3 x
+io_buffer_main.h 324 0x2480 4
+io_buffer_main.h 327 0x2480 5
+io_buffer_main.h 327 0x2480 6
+io_buffer_main.h 425 0x2480 7
+io_buffer_main.h 425 0x2480 8
+io_buffer_main.h 125 0x248c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 287 0x2494 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 552 0x249a
+superkernels.cpp 554 0x249e
+superkernels.cpp 555 0x249e 1
+superkernels.cpp 558 0x249e 2
+superkernels.cpp 559 0x249e 3
+superkernels.cpp 562 0x249e 4
+superkernels.cpp 563 0x249e 5
+superkernels.cpp 567 0x249e 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h:
+pad_3d.h 287 0x24a6
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 552 0x24b0
+superkernels.cpp 552 0x24b0 1
+superkernels.cpp 554 0x24ba
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x24c0 x
+io_buffer_main.h 324 0x24c0 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 552 0x24c4 x
+superkernels.cpp 554 0x24e0 x
+superkernels.cpp 554 0x24f0
+superkernels.cpp 554 0x24f4
+superkernels.cpp 554 0x2504
+superkernels.cpp 555 0x2504 1
+superkernels.cpp 554 0x250a
+superkernels.cpp 554 0x250a 1
+superkernels.cpp 554 0x2514
+superkernels.cpp 554 0x251e
+superkernels.cpp 554 0x2526
+superkernels.cpp 554 0x252a
+superkernels.cpp 554 0x252a 1
+superkernels.cpp 554 0x2530
+superkernels.cpp 554 0x2530 1
+superkernels.cpp 554 0x2536
+superkernels.cpp 554 0x2540
+superkernels.cpp 554 0x2540 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2540 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 554 0x254a
+superkernels.cpp 554 0x254e
+superkernels.cpp 554 0x254e 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2554
+io_buffer_main.h 327 0x2554 1
+io_buffer_main.h 327 0x2554 2
+io_buffer_main.h 425 0x2554 3
+io_buffer_main.h 425 0x2554 4
+io_buffer_main.h 425 0x2554 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 555 0x2560 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2560 1 x
+io_buffer_main.h 425 0x2572 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 558 0x2576
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2576 1 x
+io_buffer_main.h 327 0x2590
+io_buffer_main.h 327 0x2594
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 558 0x25a0
+superkernels.cpp 558 0x25b0 x
+superkernels.cpp 558 0x25c0
+superkernels.cpp 558 0x25ce
+superkernels.cpp 558 0x25d2
+superkernels.cpp 558 0x25d8
+superkernels.cpp 559 0x25d8 1
+superkernels.cpp 558 0x25de
+superkernels.cpp 558 0x25ea
+superkernels.cpp 558 0x25ee
+superkernels.cpp 558 0x25f8
+superkernels.cpp 558 0x2600
+superkernels.cpp 558 0x2604
+superkernels.cpp 558 0x2604 1
+superkernels.cpp 558 0x260a
+superkernels.cpp 558 0x260a 1
+superkernels.cpp 558 0x2610
+superkernels.cpp 558 0x2620
+superkernels.cpp 558 0x2620 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2620 2
+io_buffer_main.h 324 0x2620 3
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 558 0x262a
+superkernels.cpp 558 0x262e
+superkernels.cpp 558 0x262e 1
+superkernels.cpp 562 0x2634
+superkernels.cpp 559 0x2642 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2642 1 x
+io_buffer_main.h 425 0x2654 x
+io_buffer_main.h 327 0x2658 x
+io_buffer_main.h 327 0x2668
+io_buffer_main.h 327 0x266c
+io_buffer_main.h 324 0x2676
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 562 0x2690
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2690 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 562 0x26a0 x
+superkernels.cpp 562 0x26a0 1
+superkernels.cpp 562 0x26b2
+superkernels.cpp 562 0x26b6
+superkernels.cpp 562 0x26bc
+superkernels.cpp 562 0x26ca
+superkernels.cpp 562 0x26ca 1
+superkernels.cpp 562 0x26d4
+superkernels.cpp 562 0x26de
+superkernels.cpp 562 0x26e6
+superkernels.cpp 562 0x26ea
+superkernels.cpp 562 0x26ea 1
+superkernels.cpp 562 0x26f0
+superkernels.cpp 562 0x26f0 1
+superkernels.cpp 562 0x26f6
+superkernels.cpp 562 0x2700
+superkernels.cpp 562 0x2700 1
+superkernels.cpp 562 0x2706
+superkernels.cpp 562 0x270a
+superkernels.cpp 562 0x270a 1
+superkernels.cpp 563 0x2710
+superkernels.cpp 563 0x271e x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x271e 1 x
+io_buffer_main.h 425 0x2730 x
+io_buffer_main.h 327 0x2734 x
+io_buffer_main.h 327 0x2744
+io_buffer_main.h 327 0x2748
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 566 0x2750
+superkernels.cpp 567 0x2750 1
+superkernels.cpp 566 0x2756 x
+superkernels.cpp 566 0x2756 1
+superkernels.cpp 566 0x2760
+superkernels.cpp 566 0x2770
+superkernels.cpp 566 0x2774
+superkernels.cpp 567 0x278a x
+superkernels.cpp 569 0x2790
+superkernels.cpp 569 0x279e x
+superkernels.cpp 569 0x27a6
+superkernels.cpp 554 0x27c0
+superkernels.cpp 555 0x27c0 1
+superkernels.cpp 558 0x27c0 2
+superkernels.cpp 559 0x27c0 3
+superkernels.cpp 562 0x27c0 4
+superkernels.cpp 563 0x27c0 5
+superkernels.cpp 567 0x27c0 6
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x27c0 7
+io_buffer_main.h 324 0x27c0 8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 554 0x27cc
+superkernels.cpp 558 0x27cc 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x27cc 2
+io_buffer_main.h 327 0x27cc 3
+io_buffer_main.h 425 0x27cc 4
+io_buffer_main.h 425 0x27cc 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 552 0x27d2
+superkernels.cpp 554 0x27d8
+superkernels.cpp - 0x27d9
+
+
+superkernels.cpp:
+File name Line number Starting address View Stmt
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc:
+0_0_reloadable2.cc 29 0x930 x
+0_0_reloadable2.cc 31 0x930 1 x
+0_0_reloadable2.cc 29 0x936
+0_0_reloadable2.cc 31 0x93c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x93c 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc:
+0_0_reloadable2.cc 17 0x944
+0_0_reloadable2.cc 31 0x944 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 590 0x956 x
+io_buffer_compiler.h 590 0x95a
+io_buffer_compiler.h 590 0x95e
+io_buffer_compiler.h 590 0x962
+io_buffer_compiler.h 590 0x966
+io_buffer_compiler.h 195 0x976 x
+io_buffer_compiler.h 195 0x976 1 x
+io_buffer_compiler.h 194 0x97a x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x97e
+io_buffer_main.h 410 0x988 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc:
+0_0_reloadable2.cc 17 0x992 x
+0_0_reloadable2.cc 18 0x996 x
+0_0_reloadable2.cc 19 0x99a x
+0_0_reloadable2.cc 16 0x99e x
+0_0_reloadable2.cc 38 0x9b0 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0x9b4
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 605 0x9c2 x
+io_buffer_compiler.h 605 0x9c6
+io_buffer_compiler.h 606 0x9ca
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0x9ca 1
+io_buffer_main.h 440 0x9d8 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc:
+0_0_reloadable2.cc 41 0x9dc
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x9dc 1
+io_buffer_compiler.h 606 0x9e2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc:
+0_0_reloadable2.cc 41 0x9f0 x
+0_0_reloadable2.cc 41 0x9f8
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x9fc x
+io_buffer_compiler.h 606 0xa00
+io_buffer_compiler.h 606 0xa04
+io_buffer_compiler.h - 0xa05
+
+
+CU: me_div.c:
+File name Line number Starting address View Stmt
+
+./me_div.c:[++]
+me_div.c 108 0x27f0
+me_div.c 108 0x27f0 1
+me_div.c 115 0x27f0 2 x
+me_div.c 108 0x27f6
+me_div.c 108 0x27fa
+me_div.c 108 0x27fe
+me_div.c 108 0x2802
+me_div.c 108 0x2806
+me_div.c 108 0x280a
+me_div.c 108 0x280e
+me_div.c 108 0x2812
+me_div.c 108 0x2816
+me_div.c 108 0x281a
+me_div.c 108 0x281e
+me_div.c 108 0x2822
+me_div.c 108 0x2826
+me_div.c 108 0x282a
+me_div.c 108 0x282e
+me_div.c 108 0x2832
+me_div.c 108 0x2836
+me_div.c 108 0x283a
+me_div.c 108 0x283e
+me_div.c 108 0x2842
+me_div.c 108 0x2846
+me_div.c 108 0x284a
+me_div.c 108 0x284e
+me_div.c 108 0x2852
+me_div.c 108 0x2856
+me_div.c 108 0x285a
+me_div.c 108 0x285e
+me_div.c 108 0x2862
+me_div.c 119 0x2866 x
+me_div.c 108 0x286a x
+me_div.c 108 0x286e
+me_div.c 108 0x2872
+me_div.c 108 0x2876
+me_div.c - 0x2877
+
+
+CU: No directory table
+CU: Empty file name table
+ - 0x1
+
+
+CU: softfloat-specialize:
+File name Line number Starting address View Stmt
+
+./softfloat-specialize:[++]
+softfloat-specialize 78 0x2880
+softfloat-specialize 137 0x2880 1
+softfloat-specialize 139 0x2880 2
+softfloat-specialize 143 0x2880 3 x
+softfloat-specialize 137 0x288a
+softfloat-specialize 139 0x288a 1
+softfloat-specialize 140 0x288a 2
+softfloat-specialize 141 0x288a 3
+softfloat-specialize 78 0x2894
+softfloat-specialize 137 0x2894 1
+softfloat-specialize 139 0x2894 2
+softfloat-specialize 140 0x2894 3 x
+softfloat-specialize 141 0x289e x
+softfloat-specialize 137 0x28a2 x
+softfloat-specialize 139 0x28a6 x
+softfloat-specialize 139 0x28aa
+softfloat-specialize 137 0x28ae x
+softfloat-specialize 137 0x28b2
+softfloat-specialize 78 0x28b6 x
+softfloat-specialize 78 0x28ba
+softfloat-specialize 143 0x28be x
+softfloat-specialize 137 0x28c2
+softfloat-specialize 139 0x28c2 1
+softfloat-specialize 139 0x28c8 x
+softfloat-specialize 139 0x28cc
+softfloat-specialize 137 0x28d0 x
+softfloat-specialize 137 0x28d4
+softfloat-specialize 143 0x28d8 x
+softfloat-specialize 137 0x28dc x
+softfloat-specialize 139 0x28e0 x
+softfloat-specialize 143 0x28e4 x
+softfloat-specialize 139 0x28e8 x
+softfloat-specialize 143 0x28ec x
+
+./softfloat.c:[++]
+softfloat.c 154 0x28f0 x
+softfloat.c 161 0x28f0 1
+softfloat.c 203 0x28f0 2
+softfloat.c 161 0x28fa x
+softfloat.c 171 0x28fa 1
+softfloat.c 174 0x28fa 2
+softfloat.c 178 0x28fa 3
+softfloat.c 194 0x28fa 4
+softfloat.c 162 0x290c x
+softfloat.c 164 0x290c 1 x
+softfloat.c 182 0x2912
+softfloat.c 185 0x2912 1
+softfloat.c 202 0x2912 2
+softfloat.c 165 0x291e
+softfloat.c 171 0x291e 1
+softfloat.c 171 0x291e 2
+softfloat.c 174 0x291e 3
+softfloat.c 174 0x291e 4
+softfloat.c 165 0x2928
+softfloat.c 171 0x2928 1 x
+softfloat.c 171 0x292e
+softfloat.c 174 0x2932 x
+softfloat.c 170 0x2936
+softfloat.c 174 0x2936 1
+softfloat.c 170 0x293c x
+softfloat.c 170 0x293c 1 x
+softfloat.c 165 0x2940 x
+softfloat.c 165 0x2944
+softfloat.c 179 0x2950
+softfloat.c 179 0x2950 1 x
+softfloat.c 180 0x2950 2
+softfloat.c 181 0x2950 3
+softfloat.c 179 0x2956
+softfloat.c 179 0x295a
+softfloat.c 178 0x2960 x
+
+./softfloat-macros:[++]
+softfloat-macros 50 0x2964
+
+./softfloat.c:[++]
+softfloat.c 128 0x2964 1
+softfloat.c 128 0x2968 x
+softfloat.c 181 0x2970 x
+softfloat.c 182 0x2970 1 x
+softfloat.c 182 0x2970 2
+softfloat.c 182 0x297a
+softfloat.c 180 0x297e x
+softfloat.c 182 0x2982 x
+softfloat.c 181 0x2986 x
+softfloat.c 180 0x298a x
+
+./softfloat-macros:[++]
+softfloat-macros 50 0x2990
+
+./softfloat.c:[++]
+softfloat.c 187 0x2990 1
+softfloat.c 192 0x2990 2
+softfloat.c 204 0x2990 3
+softfloat.c 204 0x2990 4
+softfloat.c 187 0x299c x
+softfloat.c 187 0x29a0
+softfloat.c 192 0x29b0 x
+
+./softfloat-macros:[++]
+softfloat-macros 46 0x29b4 x
+softfloat-macros 46 0x29b4 1 x
+softfloat-macros 49 0x29c4
+softfloat-macros 50 0x29c4 1 x
+softfloat-macros 50 0x29ca
+softfloat-macros 50 0x29ce
+softfloat-macros 50 0x29d2
+softfloat-macros 49 0x29d6 x
+softfloat-macros 50 0x29da x
+softfloat-macros 53 0x29de x
+softfloat-macros 50 0x29e2 x
+softfloat-macros 49 0x29e6 x
+
+./softfloat.c:[++]
+softfloat.c 194 0x29f6 x
+softfloat.c 204 0x29fa
+softfloat.c 204 0x29fa 1
+softfloat.c 204 0x2a10
+softfloat.c 204 0x2a10 1
+softfloat.c 202 0x2a20 x
+softfloat.c 202 0x2a20 1
+softfloat.c 203 0x2a20 2 x
+softfloat.c 128 0x2a2a
+softfloat.c 203 0x2a2a 1
+softfloat.c 203 0x2a2a 2
+softfloat.c 203 0x2a34
+softfloat.c 202 0x2a38
+softfloat.c 203 0x2a3c
+softfloat.c 205 0x2a40 x
+softfloat.c 203 0x2a44 x
+softfloat.c 204 0x2a48 x
+softfloat.c 204 0x2a48 1 x
+softfloat.c 128 0x2a4c x
+softfloat.c 128 0x2a50
+softfloat.c 128 0x2a54
+softfloat.c 185 0x2a60 x
+softfloat.c 128 0x2a64
+softfloat.c 128 0x2a6a x
+softfloat.c 185 0x2a6e x
+softfloat.c 185 0x2a72
+softfloat.c 218 0x2a80 x
+softfloat.c 224 0x2a80 1 x
+
+./softfloat-macros:[++]
+softfloat-macros 552 0x2a86 x
+
+./softfloat.c:[++]
+softfloat.c 223 0x2a8a x
+softfloat.c 224 0x2a8e x
+softfloat.c 224 0x2a92
+softfloat.c 477 0x2aa0 x
+softfloat.c 481 0x2aa0 1
+softfloat.c 481 0x2aa0 2 x
+softfloat.c 482 0x2ab0
+softfloat.c 482 0x2ab6 x
+softfloat.c 482 0x2aba
+softfloat.c 484 0x2aca
+softfloat.c 484 0x2aca 1 x
+softfloat.c 484 0x2ad4
+softfloat.c 484 0x2ad4 1
+softfloat.c 483 0x2ad8
+softfloat.c 483 0x2adc x
+softfloat.c 481 0x2af0 x
+softfloat.c 482 0x2b00 x
+softfloat.c 70 0x2b20
+softfloat.c 81 0x2b20 1
+softfloat.c 734 0x2b20 2 x
+softfloat.c 81 0x2b2a x
+softfloat.c 81 0x2b2e
+softfloat.c 81 0x2b32
+softfloat.c 81 0x2b36
+
+./softfloat-macros:[++]
+softfloat-macros 50 0x2b3a
+
+./softfloat.c:[++]
+softfloat.c 744 0x2b3a 1 x
+softfloat.c 747 0x2b3a 2
+softfloat.c 761 0x2b3a 3
+softfloat.c 772 0x2b3a 4
+softfloat.c 788 0x2b3a 5
+softfloat.c 747 0x2b40 x
+softfloat.c 747 0x2b44
+softfloat.c 70 0x2b4a x
+softfloat.c 70 0x2b4e
+softfloat.c 745 0x2b4e 1
+softfloat.c 746 0x2b4e 2
+softfloat.c 745 0x2b54 x
+softfloat.c 746 0x2b58 x
+softfloat.c 748 0x2b58 1
+softfloat.c 762 0x2b58 2
+
+./softfloat-macros:[++]
+softfloat-macros 50 0x2b5e
+
+./softfloat.c:[++]
+softfloat.c 128 0x2b5e 1
+softfloat.c 748 0x2b5e 2 x
+softfloat.c 761 0x2b64 x
+softfloat.c 761 0x2b68
+softfloat.c 128 0x2b6e x
+softfloat.c 762 0x2b7a x
+softfloat.c 762 0x2b7e
+softfloat.c 793 0x2b8e
+softfloat.c 787 0x2b92
+softfloat.c 767 0x2b96 x
+softfloat.c 766 0x2b9a x
+softfloat.c 772 0x2b9e x
+
+./softfloat-macros:[++]
+softfloat-macros 46 0x2ba2 x
+softfloat-macros 46 0x2ba2 1 x
+
+./softfloat.c:[++]
+softfloat.c 770 0x2ba8
+softfloat.c 785 0x2ba8 1
+softfloat.c 770 0x2bae x
+softfloat.c 766 0x2bb2 x
+
+./softfloat-macros:[++]
+softfloat-macros 49 0x2bba
+softfloat-macros 50 0x2bba 1 x
+softfloat-macros 50 0x2bc0
+softfloat-macros 50 0x2bc4
+softfloat-macros 49 0x2bc8 x
+softfloat-macros 50 0x2bd2 x
+softfloat-macros 50 0x2bd6
+softfloat-macros 53 0x2bda x
+softfloat-macros 50 0x2bde x
+softfloat-macros 49 0x2be2 x
+
+./softfloat.c:[++]
+softfloat.c 748 0x2bf0 x
+softfloat.c 756 0x2bf6
+softfloat.c 785 0x2bf6 1
+softfloat.c 793 0x2c04
+softfloat.c 753 0x2c08 x
+softfloat.c 787 0x2c08 1
+softfloat.c 752 0x2c0e
+softfloat.c 752 0x2c0e 1
+softfloat.c 752 0x2c12 x
+softfloat.c 752 0x2c12 1 x
+
+./softfloat-macros:[++]
+softfloat-macros 46 0x2c16 x
+softfloat-macros 46 0x2c16 1 x
+
+./softfloat.c:[++]
+softfloat.c 756 0x2c1c x
+softfloat.c 752 0x2c20 x
+softfloat.c 752 0x2c20 1 x
+
+./softfloat-macros:[++]
+softfloat-macros 49 0x2c2a
+softfloat-macros 50 0x2c2a 1 x
+softfloat-macros 50 0x2c30
+softfloat-macros 50 0x2c34
+softfloat-macros 50 0x2c38
+softfloat-macros 49 0x2c3c x
+softfloat-macros 50 0x2c40 x
+softfloat-macros 53 0x2c44 x
+softfloat-macros 50 0x2c48 x
+softfloat-macros 49 0x2c4c x
+
+./softfloat.c:[++]
+softfloat.c 785 0x2c50 x
+softfloat.c 786 0x2c50 1
+softfloat.c 787 0x2c50 2 x
+softfloat.c 786 0x2c5a x
+softfloat.c 790 0x2c5a 1 x
+softfloat.c 786 0x2c60
+softfloat.c 788 0x2c64 x
+softfloat.c 788 0x2c68
+softfloat.c 788 0x2c6c
+softfloat.c 793 0x2c70 x
+softfloat.c 763 0x2c80 x
+softfloat.c 764 0x2c90 x
+softfloat.c 128 0x2c94
+softfloat.c 128 0x2c9a x
+softfloat.c 776 0x2cb0 x
+softfloat.c 780 0x2cc0 x
+softfloat.c 793 0x2cd0
+softfloat.c 781 0x2cda
+softfloat.c 781 0x2ce0 x
+softfloat.c 793 0x2ce0 1
+softfloat.c 781 0x2ce6
+softfloat.c 749 0x2cf0 x
+softfloat.c 750 0x2d00 x
+softfloat.c 763 0x2d10 x
+softfloat.c 777 0x2d20 x
+softfloat.c 777 0x2d24
+softfloat.c 778 0x2d34 x
+softfloat.c 780 0x2d50 x
+softfloat.c 780 0x2d50 1 x
+softfloat.c 780 0x2d56
+softfloat.c 780 0x2d5a
+softfloat.c 128 0x2d5e x
+softfloat.c 749 0x2d70 x
+softfloat.c 777 0x2d80 x
+softfloat.c 70 0x2d90
+softfloat.c 81 0x2d90 1
+softfloat.c 805 0x2d90 2 x
+softfloat.c 81 0x2d9a x
+softfloat.c 81 0x2d9e
+softfloat.c 70 0x2da2 x
+softfloat.c 81 0x2da6 x
+softfloat.c 81 0x2daa
+softfloat.c 70 0x2dae x
+softfloat.c 816 0x2dae 1
+softfloat.c 817 0x2dae 2
+softfloat.c 816 0x2db4 x
+
+./softfloat-macros:[++]
+softfloat-macros 50 0x2db8
+
+./softfloat.c:[++]
+softfloat.c 815 0x2db8 1 x
+softfloat.c 818 0x2db8 2
+softfloat.c 819 0x2db8 3
+softfloat.c 843 0x2db8 4
+softfloat.c 818 0x2dbe x
+softfloat.c 818 0x2dc2
+softfloat.c 817 0x2dc8 x
+softfloat.c 833 0x2dcc
+softfloat.c 851 0x2dcc 1
+softfloat.c 859 0x2dcc 2
+softfloat.c 862 0x2dcc 3
+softfloat.c 851 0x2dd6 x
+softfloat.c 862 0x2dda x
+softfloat.c 859 0x2dde x
+softfloat.c 819 0x2de2 x
+softfloat.c 819 0x2de6
+softfloat.c 825 0x2dec
+softfloat.c 835 0x2dec 1
+softfloat.c 835 0x2df0 x
+softfloat.c 833 0x2dfa x
+softfloat.c 833 0x2dfe
+softfloat.c 868 0x2e0e
+softfloat.c 838 0x2e12 x
+softfloat.c 837 0x2e16 x
+softfloat.c 843 0x2e1a x
+
+./softfloat-macros:[++]
+softfloat-macros 46 0x2e1e x
+softfloat-macros 46 0x2e1e 1 x
+
+./softfloat.c:[++]
+softfloat.c 837 0x2e24 x
+
+./softfloat-macros:[++]
+softfloat-macros 49 0x2e30
+softfloat-macros 50 0x2e30 1 x
+softfloat-macros 50 0x2e30 2
+softfloat-macros 50 0x2e3a
+softfloat-macros 50 0x2e3e
+softfloat-macros 53 0x2e42 x
+softfloat-macros 49 0x2e46 x
+softfloat-macros 50 0x2e4a x
+softfloat-macros 50 0x2e4e
+softfloat-macros 50 0x2e52
+softfloat-macros 49 0x2e56 x
+
+./softfloat.c:[++]
+softfloat.c 846 0x2e66 x
+softfloat.c 851 0x2e80 x
+softfloat.c 867 0x2e90
+softfloat.c 868 0x2e94
+softfloat.c 855 0x2e98
+softfloat.c 855 0x2e98 1
+softfloat.c 867 0x2e9c
+softfloat.c 856 0x2ea0 x
+softfloat.c 855 0x2ea4 x
+softfloat.c 855 0x2ea4 1 x
+
+./softfloat-macros:[++]
+softfloat-macros 46 0x2ea8 x
+softfloat-macros 46 0x2ea8 1 x
+
+./softfloat.c:[++]
+softfloat.c 855 0x2eae x
+softfloat.c 855 0x2eae 1 x
+
+./softfloat-macros:[++]
+softfloat-macros 49 0x2eba
+softfloat-macros 50 0x2eba 1 x
+softfloat-macros 50 0x2eba 2
+softfloat-macros 50 0x2ec4
+softfloat-macros 50 0x2ec8
+softfloat-macros 50 0x2ecc
+softfloat-macros 49 0x2ed0 x
+softfloat-macros 50 0x2ed4 x
+softfloat-macros 53 0x2ed8 x
+softfloat-macros 50 0x2edc x
+softfloat-macros 49 0x2ee0 x
+
+./softfloat.c:[++]
+softfloat.c 864 0x2ef0 x
+softfloat.c 868 0x2f00 x
+softfloat.c 867 0x2f06 x
+softfloat.c 820 0x2f20 x
+softfloat.c 829 0x2f30 x
+softfloat.c 829 0x2f34
+softfloat.c 825 0x2f3a x
+softfloat.c 825 0x2f3e
+softfloat.c 825 0x2f42
+softfloat.c 830 0x2f4a x
+softfloat.c 830 0x2f4e
+softfloat.c 128 0x2f5e
+softfloat.c 831 0x2f62
+softfloat.c 831 0x2f68 x
+softfloat.c 831 0x2f70
+softfloat.c 831 0x2f74
+softfloat.c 831 0x2f7c
+softfloat.c 128 0x2f80 x
+softfloat.c 834 0x2f90 x
+softfloat.c 128 0x2fa0
+softfloat.c 835 0x2fa4 x
+softfloat.c 128 0x2fa8 x
+softfloat.c 128 0x2fac
+softfloat.c 128 0x2fb2
+softfloat.c 852 0x2fc0 x
+softfloat.c 853 0x2fd0 x
+softfloat.c 821 0x2fe0 x
+softfloat.c 821 0x2fe4
+softfloat.c 823 0x2ff4 x
+softfloat.c 868 0x3016
+softfloat.c 864 0x301a
+softfloat.c 846 0x3036
+softfloat.c 867 0x303a
+softfloat.c 868 0x303e
+softfloat.c 834 0x3050 x
+softfloat.c 852 0x3060 x
+softfloat.c 821 0x3070 x
+softfloat.c 92 0x3080
+softfloat.c 878 0x3080 1 x
+softfloat.c 92 0x3084 x
+softfloat.c 92 0x3088
+softfloat.c 884 0x308c x
+softfloat.c 884 0x3090
+softfloat.c 888 0x30a0 x
+softfloat.c 885 0x30b0 x
+softfloat.c - 0x30b1
+
+
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/scripts/3_3_reloadable14.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/scripts/3_3_reloadable14.bcf
new file mode 100644
index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/scripts/3_3_reloadable14.bcf
@@ -0,0 +1,16 @@
+_reserved DMb 0x0 0x40000
+
+_reserved PM 0x0 0x930 //reserved for main elf
+
+_entry_point _Z13kernelWrapperPPvjjjj
+_symbol _Z13kernelWrapperPPvjjjj 0x930
+
+_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers
+_reserved DMb 0x7ba80 0x40 //reserved for sync buffer
+_stack DM_stack 0x7bac0 0x940 //stack for core
+_reserved DMb 0x7c400 0x40 //reserved for main elf heap
+//space for synopsys compiler at 0x7c440 0x880//heap
+_reserved DMb 0x40000 0x3b280
+
+_reserved DMb 0x7ccc0 0x3340
+_reserved DMb 0x80000 0x80000 // And everything else the core can't see
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/scripts/3_3_reloadable14.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/scripts/3_3_reloadable14.prx
new file mode 100644
index 0000000000000000000000000000000000000000..400b4874e5a4e7fe440c17f993c7251dcaf49ed6
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/scripts/3_3_reloadable14.prx
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/src/3_3_reloadable14.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/src/3_3_reloadable14.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7b211124072bdc08c2e3d113228cd9b65f8857a3
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable14/src/3_3_reloadable14.cc
@@ -0,0 +1,41 @@
+// Automatically generated processor driver using AIEngine tool-chain
+
+#include
+#include
+#include
+
+
+// Declare Kernel functions and initializers
+void superkernel_reduce_mean_c8(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+
+// Declare Kernel objects and external arrays
+
+
+void _b961_wrapper(void* args[])
+{
+ superkernel_reduce_mean_c8(
+ *reinterpret_cast*>(args[0]),
+ *reinterpret_cast(args[2]),
+ *reinterpret_cast*>(args[1]));
+}
+
+using UniformKernelFunc = void (*)(void **);
+
+static UniformKernelFunc g_uniformKernelFuncs[1] = {
+ _b961_wrapper
+};
+
+__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut)
+{
+ uint32 idx = 0;
+ reinterpret_cast(args[idx])->acquire(numSyncIn > 0);
+ idx += (numSyncIn > 0) ? 1 : 0;
+ idx += numAsyncIn;
+
+ (*(g_uniformKernelFuncs[kernelId]))(args);
+
+ idx = 0;
+ reinterpret_cast(args[idx])->release(numSyncIn > 0);
+ idx += (numSyncIn > 0) ? 1 : 0;
+ idx += numAsyncIn;
+}
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.calltree
new file mode 100644
index 0000000000000000000000000000000000000000..a9aa937024e08d6db65ac17b5f174a0a1241e359
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.calltree
@@ -0,0 +1,108 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:20 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme
+
+
+// Release: ipp V-2024.06-TGT-241219
+
+_Z13kernelWrapperPPvjjjj
+ _Z13_b896_wrapperPPv (referenced text)
+ _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+ _Z13_b901_wrapperPPv (referenced text)
+ _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+ _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ _Z13_b906_wrapperPPv (referenced text)
+ _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+ _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+ _Z13_b881_wrapperPPv (referenced text)
+ _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+ _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+ _Z13_b891_wrapperPPv (referenced text)
+ _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+ _Z13_b924_wrapperPPv (referenced text)
+ _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+ _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (*)
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (*)
+ _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+ _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (*)
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (*)
+ _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (*)
+ _Z13_b919_wrapperPPv (referenced text)
+ _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+ _ZN12me_primitive10udiv_dstepEjjRjS0_
+ _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+
+
+Call tree stack and functions sizes:
+
+stack stack stack call func func function name
+ desc level level desc
+----- ----- ----- ----- ----- ----- --------------------------------------------------------------
+ 64 320 0 0 390 13150 _Z13kernelWrapperPPvjjjj
+ 0 192 1 1 36 4714 _Z13_b896_wrapperPPv
+ 64 192 1 2 568 4678 _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ 0 0 3 4 270 270 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+ 0 192 1 1 32 1252 _Z13_b901_wrapperPPv
+ 64 192 1 2 488 1220 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 128 2 3 62 304 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+ 64 64 3 4 162 186 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+ 0 0 4 5 24 24 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+ 0 0 2 4 56 56 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+ 128 128 2 3 114 428 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+ 0 0 3 4 314 314 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ 0 64 1 1 32 862 _Z13_b906_wrapperPPv
+ 64 64 1 2 488 830 _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 0 0 2 3 100 100 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+ 0 0 2 3 242 242 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+ 0 256 1 1 32 1394 _Z13_b881_wrapperPPv
+ 64 256 1 2 488 1362 _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 3 74 190 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+ 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+ 64 192 2 3 150 684 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+ 128 128 3 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+ 0 128 1 1 36 1092 _Z13_b891_wrapperPPv
+ 64 128 1 2 602 1056 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+ 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+ 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+ 0 192 1 1 40 6494 _Z13_b924_wrapperPPv
+ 64 192 1 2 1126 6454 _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+ 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (*)
+ 64 64 2 3 98 214 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+ 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+ 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (*)
+ 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+ 0 128 2 3 16 550 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ 128 128 2 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+ 0 192 1 1 36 2050 _Z13_b919_wrapperPPv
+ 128 192 1 2 478 2014 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+ 64 64 2 3 672 814 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+ 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_
+ 0 0 2 3 722 722 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+
+
+Maximum call level : 5
+Maximum stack level: 4
+Maximum stack size : 320
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.cmic2
new file mode 100644
index 0000000000000000000000000000000000000000..cc24263e196c609ab062129e37812e382b48d43f
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.cmic2
@@ -0,0 +1,19187 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me
+
+// Release: ipp V-2024.06-TGT-241219
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function kernelWrapper _Z13kernelWrapperPPvjjjj
+.src_ref 0 "0_0_reloadable5.cc" 94 first
+.src_ref 0 "0_0_reloadable5.cc" 96 60 first
+.src_ref 0 "0_0_reloadable5.cc" 96 110
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.function_start
+ 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2353 "01000001" // /* MW 5 */
+ 2354 "00100001" // /* MW 4 */
+ 2355 "11010001" // /* MW 3 */
+ 2356 "11000110" // /* MW 2 */
+ 2357 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 94
+ 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2359 "00000001" // /* MW 5 */
+ 2360 "00000000" // /* MW 4 */
+ 2361 "00000000" // /* MW 3 */
+ 2362 "00001000" // /* MW 2 */
+ 2363 "00000000" // /* MW 1 */
+ 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2365 "01110000" // /* MW 7 */
+ 2366 "11010000" // /* MW 6 */
+ 2367 "00101011" // /* MW 5 */
+ 2368 "00000000" // /* MW 4 */
+ 2369 "10110000" // /* MW 3 */
+ 2370 "11110011" // /* MW 2 */
+ 2371 "11111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 96 110
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2373 "01110000" // /* MW 7 */
+ 2374 "10010000" // /* MW 6 */
+ 2375 "11101000" // /* MW 5 */
+ 2376 "00000001" // /* MW 4 */
+ 2377 "10110000" // /* MW 3 */
+ 2378 "10000111" // /* MW 2 */
+ 2379 "11111111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 96 110 first
+ 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2381 "11100000" // /* MW 5 */
+ 2382 "11000001" // /* MW 4 */
+ 2383 "10110111" // /* MW 3 */
+ 2384 "00000110" // /* MW 2 */
+ 2385 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2387 "00100000" // /* MW 3 */
+ 2388 "10011000" // /* MW 2 */
+ 2389 "00011110" // /* MW 1 */
+ 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2391 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2393 "10000010" // /* MW 3 */
+ 2394 "01101000" // /* MW 2 */
+ 2395 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2397 "00110110" // /* MW 3 */
+ 2398 "00011110" // /* MW 2 */
+ 2399 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2401 "01110110" // /* MW 3 */
+ 2402 "00111110" // /* MW 2 */
+ 2403 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2405 "01010110" // /* MW 3 */
+ 2406 "11101110" // /* MW 2 */
+ 2407 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2409 "01110110" // /* MW 3 */
+ 2410 "00000111" // /* MW 2 */
+ 2411 "00000111" // /* MW 1 */
+ 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2413 "00000000" // /* MW 1 */
+ 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2415 "00000000" // /* MW 1 */
+ 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2417 "00000000" // /* MW 1 */
+ 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2419 "00000000" // /* MW 1 */
+ 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2421 "00000000" // /* MW 1 */
+ 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2423 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2425 "00110010" // /* MW 3 */
+ 2426 "01100011" // /* MW 2 */
+ 2427 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2429 "00110001" // /* MW 3 */
+ 2430 "11010110" // /* MW 2 */
+ 2431 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2433 "11111101" // /* MW 3 */
+ 2434 "11100010" // /* MW 2 */
+ 2435 "00010111" // /* MW 1 */
+ 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2437 "00000000" // /* MW 1 */
+ 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2439 "00000000" // /* MW 1 */
+ 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2441 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2443 "00011000" // /* MW 3 */
+ 2444 "10010111" // /* MW 2 */
+ 2445 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2447 "00001001" // /* MW 3 */
+ 2448 "00100100" // /* MW 2 */
+ 2449 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60 first
+ 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2451 "00101101" // /* MW 3 */
+ 2452 "00101001" // /* MW 2 */
+ 2453 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2455 "00100000" // /* MW 3 */
+ 2456 "10001010" // /* MW 2 */
+ 2457 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2459 "10001011" // /* MW 5 */
+ 2460 "11011000" // /* MW 4 */
+ 2461 "11011111" // /* MW 3 */
+ 2462 "01001110" // /* MW 2 */
+ 2463 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2465 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2467 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2469 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2471 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2473 "00000101" // /* MW 3 */
+ 2474 "00100110" // /* MW 2 */
+ 2475 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2477 "11111100" // /* MW 3 */
+ 2478 "11110100" // /* MW 2 */
+ 2479 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2481 "00000000" // /* MW 7 */
+ 2482 "11000001" // /* MW 6 */
+ 2483 "10110100" // /* MW 5 */
+ 2484 "00000011" // /* MW 4 */
+ 2485 "10110000" // /* MW 3 */
+ 2486 "01101010" // /* MW 2 */
+ 2487 "11111110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2489 "01110110" // /* MW 3 */
+ 2490 "00011110" // /* MW 2 */
+ 2491 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2493 "10110110" // /* MW 3 */
+ 2494 "00111110" // /* MW 2 */
+ 2495 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2497 "10010110" // /* MW 3 */
+ 2498 "11101110" // /* MW 2 */
+ 2499 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2501 "01110110" // /* MW 3 */
+ 2502 "00000111" // /* MW 2 */
+ 2503 "00000111" // /* MW 1 */
+ 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2505 "00000000" // /* MW 1 */
+ 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2507 "00000000" // /* MW 1 */
+ 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2509 "00000000" // /* MW 1 */
+ 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2511 "00000000" // /* MW 1 */
+ 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2513 "00000000" // /* MW 1 */
+ 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2515 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2517 "01010010" // /* MW 3 */
+ 2518 "11100111" // /* MW 2 */
+ 2519 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2521 "01110001" // /* MW 3 */
+ 2522 "11010110" // /* MW 2 */
+ 2523 "00001111" // /* MW 1 */
+ 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2525 "00000000" // /* MW 1 */
+ 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2527 "00000000" // /* MW 1 */
+ 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2529 "00000000" // /* MW 1 */
+ 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2531 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2533 "00011000" // /* MW 3 */
+ 2534 "00010111" // /* MW 2 */
+ 2535 "00010101" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7 first
+ 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2537 "00101101" // /* MW 3 */
+ 2538 "00100011" // /* MW 2 */
+ 2539 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2541 "10100000" // /* MW 3 */
+ 2542 "10001000" // /* MW 2 */
+ 2543 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2545 "00000000" // /* MW 5 */
+ 2546 "11001001" // /* MW 4 */
+ 2547 "11001110" // /* MW 3 */
+ 2548 "00000111" // /* MW 2 */
+ 2549 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2551 "00101011" // /* MW 5 */
+ 2552 "11010100" // /* MW 4 */
+ 2553 "11011111" // /* MW 3 */
+ 2554 "00010011" // /* MW 2 */
+ 2555 "11100000" // /* MW 1 */
+ 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2557 "00000000" // /* MW 1 */
+ 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2559 "00000000" // /* MW 1 */
+ 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2561 "00000000" // /* MW 1 */
+ 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2563 "00000000" // /* MW 1 */
+ 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2565 "00000000" // /* MW 1 */
+ 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2567 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 4
+.no_stack_arguments
+ 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */
+ 2569 "01000000" // /* MW 3 */
+ 2570 "00110000" // /* MW 2 */
+ 2571 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 105 60
+.src_ref 0 "0_0_reloadable5.cc" 107 60
+.delay_slot
+ 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2573 "11000000" // /* MW 3 */
+ 2574 "01100000" // /* MW 2 */
+ 2575 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2577 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2579 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2581 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2583 "01111110" // /* MW 9 */
+ 2584 "10100101" // /* MW 8 */
+ 2585 "00000001" // /* MW 7 */
+ 2586 "00000000" // /* MW 6 */
+ 2587 "00010000" // /* MW 5 */
+ 2588 "00000000" // /* MW 4 */
+ 2589 "11110000" // /* MW 3 */
+ 2590 "00101100" // /* MW 2 */
+ 2591 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 105 60 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+.src_ref 1 "io_buffer_main.h" 440 8
+.return_address
+ 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2593 "00001010" // /* MW 5 */
+ 2594 "01000000" // /* MW 4 */
+ 2595 "11010000" // /* MW 3 */
+ 2596 "11000110" // /* MW 2 */
+ 2597 "11100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2599 "01010001" // /* MW 3 */
+ 2600 "11101011" // /* MW 2 */
+ 2601 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 107 60
+ 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2603 "01000001" // /* MW 3 */
+ 2604 "11101100" // /* MW 2 */
+ 2605 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2607 "00101001" // /* MW 3 */
+ 2608 "11110000" // /* MW 2 */
+ 2609 "00000111" // /* MW 1 */
+ 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2611 "00000000" // /* MW 1 */
+ 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2613 "00000000" // /* MW 1 */
+ 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+ 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2617 "10001000" // /* MW 3 */
+ 2618 "01101000" // /* MW 2 */
+ 2619 "00011001" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+ 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2621 "00110110" // /* MW 3 */
+ 2622 "00000110" // /* MW 2 */
+ 2623 "00000001" // /* MW 1 */
+ 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2625 "00000000" // /* MW 1 */
+ 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2627 "00000000" // /* MW 1 */
+ 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2629 "00000000" // /* MW 1 */
+ 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2631 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2633 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2635 "00011100" // /* MW 3 */
+ 2636 "10100000" // /* MW 2 */
+ 2637 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2639 "00001000" // /* MW 3 */
+ 2640 "01010101" // /* MW 2 */
+ 2641 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2643 "01000001" // /* MW 5 */
+ 2644 "10101111" // /* MW 4 */
+ 2645 "11011101" // /* MW 3 */
+ 2646 "11000110" // /* MW 2 */
+ 2647 "00111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 107 60 first
+ 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2649 "01010110" // /* MW 3 */
+ 2650 "00000010" // /* MW 2 */
+ 2651 "00000111" // /* MW 1 */
+ 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2653 "00000000" // /* MW 1 */
+ 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2655 "00000000" // /* MW 1 */
+ 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2657 "00000000" // /* MW 1 */
+ 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2659 "00000000" // /* MW 1 */
+ 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2661 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2663 "00010001" // /* MW 3 */
+ 2664 "00100111" // /* MW 2 */
+ 2665 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2667 "00010000" // /* MW 5 */
+ 2668 "11010010" // /* MW 4 */
+ 2669 "01000000" // /* MW 3 */
+ 2670 "01100110" // /* MW 2 */
+ 2671 "10001100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+.src_ref 1 "io_buffer_compiler.h" 606 22 first
+ 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2673 "01100011" // /* MW 5 */
+ 2674 "11101100" // /* MW 4 */
+ 2675 "11010011" // /* MW 3 */
+ 2676 "11000110" // /* MW 2 */
+ 2677 "00000000" // /* MW 1 */
+ 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2679 "00000000" // /* MW 1 */
+ 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2681 "00000000" // /* MW 1 */
+ 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2683 "00000000" // /* MW 1 */
+ 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2685 "00000000" // /* MW 1 */
+ 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2687 "00000000" // /* MW 1 */
+ 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2689 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+ 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2691 "00001000" // /* MW 3 */
+ 2692 "01010101" // /* MW 2 */
+ 2693 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110
+ 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2695 "00111001" // /* MW 3 */
+ 2696 "11111100" // /* MW 2 */
+ 2697 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2699 "00110110" // /* MW 3 */
+ 2700 "11110110" // /* MW 2 */
+ 2701 "00000000" // /* MW 1 */
+ 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2703 "10011001" // /* MW 3 */
+ 2704 "11110111" // /* MW 2 */
+ 2705 "00000111" // /* MW 1 */
+ 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2707 "11110001" // /* MW 3 */
+ 2708 "11111001" // /* MW 2 */
+ 2709 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110 first
+ 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2711 "00000001" // /* MW 5 */
+ 2712 "00000000" // /* MW 4 */
+ 2713 "00000000" // /* MW 3 */
+ 2714 "11111000" // /* MW 2 */
+ 2715 "11111111" // /* MW 1 */
+ 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2717 "00000000" // /* MW 1 */
+ 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2719 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110
+ 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 2721 "00000000" // /* MW 3 */
+ 2722 "00101000" // /* MW 2 */
+ 2723 "00010000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2725 "00011100" // /* MW 3 */
+ 2726 "11100000" // /* MW 2 */
+ 2727 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+.delay_slot
+ 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2729 "00010001" // /* MW 3 */
+ 2730 "00100001" // /* MW 2 */
+ 2731 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2733 "00000010" // /* MW 3 */
+ 2734 "01100001" // /* MW 2 */
+ 2735 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 22
+.delay_slot
+ 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2737 "00010001" // /* MW 3 */
+ 2738 "11110110" // /* MW 2 */
+ 2739 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+ 2741 "00000000" // /* MW 1 */
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 432 first
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.function_start
+ 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2753 "01111000" // /* MW 9 */
+ 2754 "01100000" // /* MW 8 */
+ 2755 "01001001" // /* MW 7 */
+ 2756 "10001000" // /* MW 6 */
+ 2757 "01000000" // /* MW 5 */
+ 2758 "00000000" // /* MW 4 */
+ 2759 "11010000" // /* MW 3 */
+ 2760 "10000101" // /* MW 2 */
+ 2761 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2763 "01001000" // /* MW 9 */
+ 2764 "10000010" // /* MW 8 */
+ 2765 "00110000" // /* MW 7 */
+ 2766 "11101001" // /* MW 6 */
+ 2767 "01010111" // /* MW 5 */
+ 2768 "00111110" // /* MW 4 */
+ 2769 "11010000" // /* MW 3 */
+ 2770 "10000001" // /* MW 2 */
+ 2771 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 432
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+ 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2773 "01110000" // /* MW 9 */
+ 2774 "00000000" // /* MW 8 */
+ 2775 "00000000" // /* MW 7 */
+ 2776 "00000000" // /* MW 6 */
+ 2777 "00000010" // /* MW 5 */
+ 2778 "00000000" // /* MW 4 */
+ 2779 "00000000" // /* MW 3 */
+ 2780 "10000001" // /* MW 2 */
+ 2781 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+ 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2783 "01011000" // /* MW 11 */
+ 2784 "00010000" // /* MW 10 */
+ 2785 "00000000" // /* MW 9 */
+ 2786 "00101000" // /* MW 8 */
+ 2787 "00000000" // /* MW 7 */
+ 2788 "10000001" // /* MW 6 */
+ 2789 "10110101" // /* MW 5 */
+ 2790 "11111101" // /* MW 4 */
+ 2791 "00000111" // /* MW 3 */
+ 2792 "10000110" // /* MW 2 */
+ 2793 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2795 "01011000" // /* MW 11 */
+ 2796 "00001111" // /* MW 10 */
+ 2797 "10001000" // /* MW 9 */
+ 2798 "10101010" // /* MW 8 */
+ 2799 "01010111" // /* MW 7 */
+ 2800 "10111111" // /* MW 6 */
+ 2801 "11010101" // /* MW 5 */
+ 2802 "11111001" // /* MW 4 */
+ 2803 "00000111" // /* MW 3 */
+ 2804 "01100011" // /* MW 2 */
+ 2805 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2807 "00000010" // /* MW 5 */
+ 2808 "01100000" // /* MW 4 */
+ 2809 "10110000" // /* MW 3 */
+ 2810 "10111110" // /* MW 2 */
+ 2811 "11111110" // /* MW 1 */
+ 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2815 "00101001" // /* MW 3 */
+ 2816 "00011100" // /* MW 2 */
+ 2817 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2819 "00001001" // /* MW 3 */
+ 2820 "00011100" // /* MW 2 */
+ 2821 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2823 "00101110" // /* MW 3 */
+ 2824 "00011100" // /* MW 2 */
+ 2825 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2827 "00001110" // /* MW 3 */
+ 2828 "00011100" // /* MW 2 */
+ 2829 "00000000" // /* MW 1 */
+ 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2831 "00000000" // /* MW 1 */
+ 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2833 "00000000" // /* MW 1 */
+ 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2835 "00000000" // /* MW 1 */
+ 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2837 "00000000" // /* MW 1 */
+ 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2839 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2841 "00101001" // /* MW 3 */
+ 2842 "00011100" // /* MW 2 */
+ 2843 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2845 "00001001" // /* MW 3 */
+ 2846 "00011100" // /* MW 2 */
+ 2847 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2849 "00101110" // /* MW 3 */
+ 2850 "00011100" // /* MW 2 */
+ 2851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2853 "00001110" // /* MW 3 */
+ 2854 "00011100" // /* MW 2 */
+ 2855 "00000000" // /* MW 1 */
+ 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2857 "00000000" // /* MW 1 */
+ 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2859 "00000000" // /* MW 1 */
+ 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2861 "00000000" // /* MW 1 */
+ 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2863 "00000000" // /* MW 1 */
+ 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2865 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2867 "00101001" // /* MW 3 */
+ 2868 "00011100" // /* MW 2 */
+ 2869 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2871 "00001001" // /* MW 3 */
+ 2872 "00011100" // /* MW 2 */
+ 2873 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2875 "00001110" // /* MW 3 */
+ 2876 "00000100" // /* MW 2 */
+ 2877 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2879 "00101110" // /* MW 3 */
+ 2880 "00010100" // /* MW 2 */
+ 2881 "00000000" // /* MW 1 */
+ 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2883 "00000000" // /* MW 1 */
+ 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2885 "00000000" // /* MW 1 */
+ 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2887 "00000000" // /* MW 1 */
+ 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2889 "00000000" // /* MW 1 */
+ 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2891 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2893 "00001001" // /* MW 3 */
+ 2894 "00000100" // /* MW 2 */
+ 2895 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2897 "00101001" // /* MW 3 */
+ 2898 "00010100" // /* MW 2 */
+ 2899 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 40 first
+ 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2901 "10101010" // /* MW 3 */
+ 2902 "11011101" // /* MW 2 */
+ 2903 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 447 34 first
+ 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2905 "00101010" // /* MW 3 */
+ 2906 "00011110" // /* MW 2 */
+ 2907 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 448 34 first
+ 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2909 "11001010" // /* MW 3 */
+ 2910 "10111101" // /* MW 2 */
+ 2911 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2913 "11111010" // /* MW 3 */
+ 2914 "11111101" // /* MW 2 */
+ 2915 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+ 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2917 "01101010" // /* MW 3 */
+ 2918 "00001010" // /* MW 2 */
+ 2919 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 20 first
+ 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2921 "11101010" // /* MW 3 */
+ 2922 "10101100" // /* MW 2 */
+ 2923 "00000010" // /* MW 1 */
+ 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2925 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+ 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2927 "00011101" // /* MW 3 */
+ 2928 "01000010" // /* MW 2 */
+ 2929 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+ 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2931 "00000001" // /* MW 5 */
+ 2932 "00110001" // /* MW 4 */
+ 2933 "11111001" // /* MW 3 */
+ 2934 "00100000" // /* MW 2 */
+ 2935 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2937 "01011101" // /* MW 3 */
+ 2938 "10100100" // /* MW 2 */
+ 2939 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2941 "01000111" // /* MW 3 */
+ 2942 "11110110" // /* MW 2 */
+ 2943 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2945 "00111001" // /* MW 5 */
+ 2946 "10110111" // /* MW 4 */
+ 2947 "01000000" // /* MW 3 */
+ 2948 "01001010" // /* MW 2 */
+ 2949 "11000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2951 "00100010" // /* MW 3 */
+ 2952 "01111011" // /* MW 2 */
+ 2953 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+ 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2955 "01100111" // /* MW 3 */
+ 2956 "11001100" // /* MW 2 */
+ 2957 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+ 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2959 "00000100" // /* MW 3 */
+ 2960 "10110111" // /* MW 2 */
+ 2961 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+ 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2963 "01000001" // /* MW 5 */
+ 2964 "10111011" // /* MW 4 */
+ 2965 "10111100" // /* MW 3 */
+ 2966 "11101011" // /* MW 2 */
+ 2967 "01111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+ 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2969 "00000100" // /* MW 5 */
+ 2970 "10011011" // /* MW 4 */
+ 2971 "10110011" // /* MW 3 */
+ 2972 "10111110" // /* MW 2 */
+ 2973 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+ 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 2975 "00000001" // /* MW 5 */
+ 2976 "01000000" // /* MW 4 */
+ 2977 "11111000" // /* MW 3 */
+ 2978 "00000101" // /* MW 2 */
+ 2979 "11001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+.delay_slot
+ 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2981 "01000111" // /* MW 3 */
+ 2982 "10110110" // /* MW 2 */
+ 2983 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+.delay_slot
+ 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2985 "01000100" // /* MW 3 */
+ 2986 "01110001" // /* MW 2 */
+ 2987 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.delay_slot
+ 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2989 "01011101" // /* MW 3 */
+ 2990 "11111100" // /* MW 2 */
+ 2991 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11 first
+.delay_slot
+ 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2993 "01001101" // /* MW 3 */
+ 2994 "11101000" // /* MW 2 */
+ 2995 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.delay_slot
+ 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2997 "00110010" // /* MW 3 */
+ 2998 "10001100" // /* MW 2 */
+ 2999 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+ 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 3001 "00000001" // /* MW 5 */
+ 3002 "01000000" // /* MW 4 */
+ 3003 "11111000" // /* MW 3 */
+ 3004 "00000101" // /* MW 2 */
+ 3005 "11011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3007 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3013 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3015 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */
+ 3017 "00100000" // /* MW 9 */
+ 3018 "00000000" // /* MW 8 */
+ 3019 "00000000" // /* MW 7 */
+ 3020 "10000100" // /* MW 6 */
+ 3021 "00000001" // /* MW 5 */
+ 3022 "00000000" // /* MW 4 */
+ 3023 "00000000" // /* MW 3 */
+ 3024 "00101111" // /* MW 2 */
+ 3025 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3027 "01011000" // /* MW 9 */
+ 3028 "00001100" // /* MW 8 */
+ 3029 "10001000" // /* MW 7 */
+ 3030 "10101011" // /* MW 6 */
+ 3031 "01010111" // /* MW 5 */
+ 3032 "00111110" // /* MW 4 */
+ 3033 "00000000" // /* MW 3 */
+ 3034 "00011010" // /* MW 2 */
+ 3035 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3037 "01000001" // /* MW 5 */
+ 3038 "00100000" // /* MW 4 */
+ 3039 "00100001" // /* MW 3 */
+ 3040 "01000010" // /* MW 2 */
+ 3041 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.delay_slot
+ 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3043 "00001101" // /* MW 3 */
+ 3044 "00011010" // /* MW 2 */
+ 3045 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.delay_slot
+ 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3047 "00111101" // /* MW 3 */
+ 3048 "00001110" // /* MW 2 */
+ 3049 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3051 "11100010" // /* MW 5 */
+ 3052 "10010001" // /* MW 4 */
+ 3053 "11111111" // /* MW 3 */
+ 3054 "00101100" // /* MW 2 */
+ 3055 "00000000" // /* MW 1 */
+.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3057 "01011000" // /* MW 11 */
+ 3058 "11111100" // /* MW 10 */
+ 3059 "10001111" // /* MW 9 */
+ 3060 "10001000" // /* MW 8 */
+ 3061 "01010000" // /* MW 7 */
+ 3062 "00000001" // /* MW 6 */
+ 3063 "00001011" // /* MW 5 */
+ 3064 "10000010" // /* MW 4 */
+ 3065 "10000001" // /* MW 3 */
+ 3066 "00000010" // /* MW 2 */
+ 3067 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3069 "01011000" // /* MW 9 */
+ 3070 "00001100" // /* MW 8 */
+ 3071 "10001000" // /* MW 7 */
+ 3072 "00001011" // /* MW 6 */
+ 3073 "10100000" // /* MW 5 */
+ 3074 "00000001" // /* MW 4 */
+ 3075 "11100000" // /* MW 3 */
+ 3076 "00011000" // /* MW 2 */
+ 3077 "00100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3079 "01011000" // /* MW 9 */
+ 3080 "00000001" // /* MW 8 */
+ 3081 "11101000" // /* MW 7 */
+ 3082 "10101001" // /* MW 6 */
+ 3083 "01010111" // /* MW 5 */
+ 3084 "00111110" // /* MW 4 */
+ 3085 "00000000" // /* MW 3 */
+ 3086 "00000010" // /* MW 2 */
+ 3087 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+ 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 3089 "00000000" // /* MW 15 */
+ 3090 "00000000" // /* MW 14 */
+ 3091 "01011000" // /* MW 13 */
+ 3092 "00000011" // /* MW 12 */
+ 3093 "10101000" // /* MW 11 */
+ 3094 "11101001" // /* MW 10 */
+ 3095 "01110001" // /* MW 9 */
+ 3096 "00000000" // /* MW 8 */
+ 3097 "01011011" // /* MW 7 */
+ 3098 "00000001" // /* MW 6 */
+ 3099 "00100000" // /* MW 5 */
+ 3100 "00000000" // /* MW 4 */
+ 3101 "11110000" // /* MW 3 */
+ 3102 "00101100" // /* MW 2 */
+ 3103 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.src_ref 2 "conv2d_bf16_params.h" 495 68 first
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+ 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3105 "01011000" // /* MW 9 */
+ 3106 "00111100" // /* MW 8 */
+ 3107 "00000000" // /* MW 7 */
+ 3108 "00111100" // /* MW 6 */
+ 3109 "10110011" // /* MW 5 */
+ 3110 "00011011" // /* MW 4 */
+ 3111 "01010000" // /* MW 3 */
+ 3112 "11000101" // /* MW 2 */
+ 3113 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24 first
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+ 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3115 "01011000" // /* MW 9 */
+ 3116 "11001101" // /* MW 8 */
+ 3117 "10000111" // /* MW 7 */
+ 3118 "00010010" // /* MW 6 */
+ 3119 "00101101" // /* MW 5 */
+ 3120 "00000011" // /* MW 4 */
+ 3121 "01010000" // /* MW 3 */
+ 3122 "00000101" // /* MW 2 */
+ 3123 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18 first
+.src_ref 2 "conv2d_bf16_params.h" 496 68
+.src_ref 2 "conv2d_bf16_params.h" 504 35
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 578 47
+ 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3125 "01011000" // /* MW 9 */
+ 3126 "00110111" // /* MW 8 */
+ 3127 "10000000" // /* MW 7 */
+ 3128 "10010001" // /* MW 6 */
+ 3129 "11011010" // /* MW 5 */
+ 3130 "00111011" // /* MW 4 */
+ 3131 "00000000" // /* MW 3 */
+ 3132 "01010111" // /* MW 2 */
+ 3133 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.src_ref 2 "conv2d_bf16_params.h" 504 45 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+.src_ref 2 "conv2d_bf16_params.h" 519 42
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+ 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3135 "01011000" // /* MW 9 */
+ 3136 "10111100" // /* MW 8 */
+ 3137 "00000111" // /* MW 7 */
+ 3138 "00111101" // /* MW 6 */
+ 3139 "10110000" // /* MW 5 */
+ 3140 "00101011" // /* MW 4 */
+ 3141 "00000000" // /* MW 3 */
+ 3142 "00000011" // /* MW 2 */
+ 3143 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 492 25 first
+.src_ref 2 "conv2d_bf16_params.h" 497 46
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+ 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3145 "01011000" // /* MW 9 */
+ 3146 "01110000" // /* MW 8 */
+ 3147 "10000000" // /* MW 7 */
+ 3148 "01101100" // /* MW 6 */
+ 3149 "01101100" // /* MW 5 */
+ 3150 "00011111" // /* MW 4 */
+ 3151 "00000000" // /* MW 3 */
+ 3152 "00010000" // /* MW 2 */
+ 3153 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 520 34 first
+ 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3155 "01011101" // /* MW 5 */
+ 3156 "00011110" // /* MW 4 */
+ 3157 "00001000" // /* MW 3 */
+ 3158 "10010010" // /* MW 2 */
+ 3159 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+ 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3161 "01011001" // /* MW 9 */
+ 3162 "00110001" // /* MW 8 */
+ 3163 "10000000" // /* MW 7 */
+ 3164 "01101111" // /* MW 6 */
+ 3165 "01100001" // /* MW 5 */
+ 3166 "00101101" // /* MW 4 */
+ 3167 "10110000" // /* MW 3 */
+ 3168 "01011010" // /* MW 2 */
+ 3169 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+.src_ref 2 "conv2d_bf16_params.h" 507 42 first
+ 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3171 "00000101" // /* MW 5 */
+ 3172 "00011111" // /* MW 4 */
+ 3173 "00111100" // /* MW 3 */
+ 3174 "10111010" // /* MW 2 */
+ 3175 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 99 first
+ 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3177 "00010001" // /* MW 3 */
+ 3178 "11000010" // /* MW 2 */
+ 3179 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 610 64
+.src_ref 2 "conv2d_bf16_params.h" 709 96
+ 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3181 "00011101" // /* MW 5 */
+ 3182 "10100000" // /* MW 4 */
+ 3183 "11110000" // /* MW 3 */
+ 3184 "11000011" // /* MW 2 */
+ 3185 "10001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+ 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3187 "00100001" // /* MW 3 */
+ 3188 "10100011" // /* MW 2 */
+ 3189 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96 first
+ 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3191 "00011101" // /* MW 3 */
+ 3192 "11111110" // /* MW 2 */
+ 3193 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 506 48
+.src_ref 2 "conv2d_bf16_params.h" 519 42 first
+ 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3195 "01011001" // /* MW 9 */
+ 3196 "01010111" // /* MW 8 */
+ 3197 "10000000" // /* MW 7 */
+ 3198 "11101110" // /* MW 6 */
+ 3199 "11110001" // /* MW 5 */
+ 3200 "00111011" // /* MW 4 */
+ 3201 "00110000" // /* MW 3 */
+ 3202 "01111110" // /* MW 2 */
+ 3203 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 68 first
+.src_ref 2 "conv2d_bf16_params.h" 504 35 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68
+ 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3205 "01011000" // /* MW 9 */
+ 3206 "10110010" // /* MW 8 */
+ 3207 "10000111" // /* MW 7 */
+ 3208 "00111101" // /* MW 6 */
+ 3209 "00110000" // /* MW 5 */
+ 3210 "00101111" // /* MW 4 */
+ 3211 "01010000" // /* MW 3 */
+ 3212 "01010101" // /* MW 2 */
+ 3213 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3215 "01111011" // /* MW 5 */
+ 3216 "11001100" // /* MW 4 */
+ 3217 "10111001" // /* MW 3 */
+ 3218 "01001110" // /* MW 2 */
+ 3219 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53 first
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3221 "01011000" // /* MW 9 */
+ 3222 "11110110" // /* MW 8 */
+ 3223 "00000000" // /* MW 7 */
+ 3224 "00101101" // /* MW 6 */
+ 3225 "01101011" // /* MW 5 */
+ 3226 "00111111" // /* MW 4 */
+ 3227 "11100000" // /* MW 3 */
+ 3228 "01010100" // /* MW 2 */
+ 3229 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 46 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3231 "01011000" // /* MW 9 */
+ 3232 "01010000" // /* MW 8 */
+ 3233 "10000111" // /* MW 7 */
+ 3234 "00010000" // /* MW 6 */
+ 3235 "00111000" // /* MW 5 */
+ 3236 "00100111" // /* MW 4 */
+ 3237 "01010000" // /* MW 3 */
+ 3238 "01000011" // /* MW 2 */
+ 3239 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3241 "01100111" // /* MW 3 */
+ 3242 "11111110" // /* MW 2 */
+ 3243 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3245 "01100111" // /* MW 3 */
+ 3246 "11100000" // /* MW 2 */
+ 3247 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3249 "00000101" // /* MW 3 */
+ 3250 "11110111" // /* MW 2 */
+ 3251 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3253 "01010100" // /* MW 3 */
+ 3254 "11101011" // /* MW 2 */
+ 3255 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3257 "01100001" // /* MW 5 */
+ 3258 "10100000" // /* MW 4 */
+ 3259 "11011000" // /* MW 3 */
+ 3260 "10100011" // /* MW 2 */
+ 3261 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25 first
+.src_ref 2 "conv2d_bf16_params.h" 507 34
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3263 "01001001" // /* MW 9 */
+ 3264 "10000000" // /* MW 8 */
+ 3265 "11001111" // /* MW 7 */
+ 3266 "01101111" // /* MW 6 */
+ 3267 "00101001" // /* MW 5 */
+ 3268 "00011111" // /* MW 4 */
+ 3269 "10110000" // /* MW 3 */
+ 3270 "01000010" // /* MW 2 */
+ 3271 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47 first
+ 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3273 "00111011" // /* MW 5 */
+ 3274 "01000110" // /* MW 4 */
+ 3275 "00111111" // /* MW 3 */
+ 3276 "11101010" // /* MW 2 */
+ 3277 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3279 "01010000" // /* MW 7 */
+ 3280 "10101000" // /* MW 6 */
+ 3281 "00000000" // /* MW 5 */
+ 3282 "00000010" // /* MW 4 */
+ 3283 "00110000" // /* MW 3 */
+ 3284 "01101010" // /* MW 2 */
+ 3285 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77 first
+.src_ref 2 "conv2d_bf16_params.h" 509 19 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3287 "01111000" // /* MW 11 */
+ 3288 "11001110" // /* MW 10 */
+ 3289 "00001101" // /* MW 9 */
+ 3290 "00101100" // /* MW 8 */
+ 3291 "10110000" // /* MW 7 */
+ 3292 "10100111" // /* MW 6 */
+ 3293 "11110101" // /* MW 5 */
+ 3294 "11100111" // /* MW 4 */
+ 3295 "01010111" // /* MW 3 */
+ 3296 "01001001" // /* MW 2 */
+ 3297 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 19 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3299 "00010101" // /* MW 3 */
+ 3300 "11100011" // /* MW 2 */
+ 3301 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3303 "10000001" // /* MW 3 */
+ 3304 "10110111" // /* MW 2 */
+ 3305 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 47 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3307 "10010000" // /* MW 3 */
+ 3308 "10110000" // /* MW 2 */
+ 3309 "00010100" // /* MW 1 */
+ 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3311 "00000000" // /* MW 1 */
+ 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3313 "00000000" // /* MW 1 */
+ 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3315 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 57 first
+ 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3317 "00100001" // /* MW 3 */
+ 3318 "11100101" // /* MW 2 */
+ 3319 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+ 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3321 "01010001" // /* MW 3 */
+ 3322 "11001010" // /* MW 2 */
+ 3323 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 48 first
+ 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3325 "01001010" // /* MW 3 */
+ 3326 "10101010" // /* MW 2 */
+ 3327 "00000010" // /* MW 1 */
+ 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3329 "00000000" // /* MW 1 */
+ 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3331 "00000000" // /* MW 1 */
+ 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3333 "00000000" // /* MW 1 */
+ 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3335 "00000000" // /* MW 1 */
+ 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3337 "00000000" // /* MW 1 */
+ 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3339 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 62
+ 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3341 "11100001" // /* MW 3 */
+ 3342 "10100100" // /* MW 2 */
+ 3343 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+ 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3345 "10111110" // /* MW 3 */
+ 3346 "10100101" // /* MW 2 */
+ 3347 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45 first
+ 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3349 "00101101" // /* MW 3 */
+ 3350 "10100100" // /* MW 2 */
+ 3351 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3353 "00000000" // /* MW 5 */
+ 3354 "10100000" // /* MW 4 */
+ 3355 "00001101" // /* MW 3 */
+ 3356 "00000001" // /* MW 2 */
+ 3357 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3359 "00100000" // /* MW 3 */
+ 3360 "11100101" // /* MW 2 */
+ 3361 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3363 "00000000" // /* MW 5 */
+ 3364 "10100000" // /* MW 4 */
+ 3365 "00001101" // /* MW 3 */
+ 3366 "11111111" // /* MW 2 */
+ 3367 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 642 99
+ 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3369 "11000001" // /* MW 5 */
+ 3370 "00111111" // /* MW 4 */
+ 3371 "10011001" // /* MW 3 */
+ 3372 "11100100" // /* MW 2 */
+ 3373 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 19 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3375 "11100001" // /* MW 5 */
+ 3376 "10111111" // /* MW 4 */
+ 3377 "10111000" // /* MW 3 */
+ 3378 "11100010" // /* MW 2 */
+ 3379 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 512 64 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122 first
+ 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3381 "00111011" // /* MW 5 */
+ 3382 "11001110" // /* MW 4 */
+ 3383 "00111001" // /* MW 3 */
+ 3384 "11101110" // /* MW 2 */
+ 3385 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3387 "00110001" // /* MW 3 */
+ 3388 "10110101" // /* MW 2 */
+ 3389 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3391 "10101101" // /* MW 3 */
+ 3392 "00101001" // /* MW 2 */
+ 3393 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+ 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3395 "01100101" // /* MW 3 */
+ 3396 "10110101" // /* MW 2 */
+ 3397 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 36 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68 first
+ 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3399 "00100000" // /* MW 5 */
+ 3400 "01101001" // /* MW 4 */
+ 3401 "00111111" // /* MW 3 */
+ 3402 "01101010" // /* MW 2 */
+ 3403 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 65 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62 first
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3405 "10101000" // /* MW 9 */
+ 3406 "10101000" // /* MW 8 */
+ 3407 "11001110" // /* MW 7 */
+ 3408 "01101111" // /* MW 6 */
+ 3409 "01001001" // /* MW 5 */
+ 3410 "00110111" // /* MW 4 */
+ 3411 "01010000" // /* MW 3 */
+ 3412 "01100101" // /* MW 2 */
+ 3413 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3415 "11111001" // /* MW 5 */
+ 3416 "10100011" // /* MW 4 */
+ 3417 "10111000" // /* MW 3 */
+ 3418 "10100011" // /* MW 2 */
+ 3419 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 45 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3421 "00011111" // /* MW 5 */
+ 3422 "01101011" // /* MW 4 */
+ 3423 "11101101" // /* MW 3 */
+ 3424 "01100100" // /* MW 2 */
+ 3425 "01000101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3427 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3429 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3431 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3433 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 48 first
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3435 "11001010" // /* MW 5 */
+ 3436 "10110101" // /* MW 4 */
+ 3437 "10111101" // /* MW 3 */
+ 3438 "01011111" // /* MW 2 */
+ 3439 "10000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3441 "00000001" // /* MW 5 */
+ 3442 "01000000" // /* MW 4 */
+ 3443 "11111000" // /* MW 3 */
+ 3444 "00000110" // /* MW 2 */
+ 3445 "11111000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 76 first
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3447 "11110010" // /* MW 5 */
+ 3448 "10111011" // /* MW 4 */
+ 3449 "11101101" // /* MW 3 */
+ 3450 "01000001" // /* MW 2 */
+ 3451 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3453 "01011101" // /* MW 3 */
+ 3454 "11101011" // /* MW 2 */
+ 3455 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93 first
+.delay_slot
+ 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3457 "00010100" // /* MW 3 */
+ 3458 "01100011" // /* MW 2 */
+ 3459 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.src_ref 2 "conv2d_bf16_params.h" 539 139 first
+.src_ref 2 "conv2d_bf16_params.h" 555 59
+.src_ref 2 "conv2d_bf16_params.h" 559 59
+.src_ref 2 "conv2d_bf16_params.h" 700 17
+.delay_slot
+ 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3461 "01011001" // /* MW 9 */
+ 3462 "00000001" // /* MW 8 */
+ 3463 "00101000" // /* MW 7 */
+ 3464 "00111110" // /* MW 6 */
+ 3465 "10111110" // /* MW 5 */
+ 3466 "00001101" // /* MW 4 */
+ 3467 "00110000" // /* MW 3 */
+ 3468 "01000110" // /* MW 2 */
+ 3469 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3471 "10011100" // /* MW 3 */
+ 3472 "10011011" // /* MW 2 */
+ 3473 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3475 "10010001" // /* MW 3 */
+ 3476 "11100011" // /* MW 2 */
+ 3477 "00000111" // /* MW 1 */
+ 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3479 "00000000" // /* MW 1 */
+ 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3481 "00000000" // /* MW 1 */
+ 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3483 "00000000" // /* MW 1 */
+ 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3485 "00000000" // /* MW 1 */
+ 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3487 "00000000" // /* MW 1 */
+ 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3491 "00000001" // /* MW 5 */
+ 3492 "01000000" // /* MW 4 */
+ 3493 "11111000" // /* MW 3 */
+ 3494 "00000110" // /* MW 2 */
+ 3495 "11100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3501 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3503 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3505 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3507 "01011000" // /* MW 9 */
+ 3508 "01000000" // /* MW 8 */
+ 3509 "00101000" // /* MW 7 */
+ 3510 "10001011" // /* MW 6 */
+ 3511 "00010000" // /* MW 5 */
+ 3512 "00000001" // /* MW 4 */
+ 3513 "00000000" // /* MW 3 */
+ 3514 "10111100" // /* MW 2 */
+ 3515 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3517 "11010010" // /* MW 3 */
+ 3518 "01111110" // /* MW 2 */
+ 3519 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3521 "01100111" // /* MW 3 */
+ 3522 "01110110" // /* MW 2 */
+ 3523 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3525 "00000001" // /* MW 5 */
+ 3526 "10100000" // /* MW 4 */
+ 3527 "01001111" // /* MW 3 */
+ 3528 "00111000" // /* MW 2 */
+ 3529 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 46
+ 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3531 "01010000" // /* MW 3 */
+ 3532 "00110010" // /* MW 2 */
+ 3533 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 44
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3535 "11101111" // /* MW 3 */
+ 3536 "01111101" // /* MW 2 */
+ 3537 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3539 "00111001" // /* MW 5 */
+ 3540 "11000100" // /* MW 4 */
+ 3541 "01011101" // /* MW 3 */
+ 3542 "11100011" // /* MW 2 */
+ 3543 "11001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3545 "10000010" // /* MW 3 */
+ 3546 "11100011" // /* MW 2 */
+ 3547 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 79
+ 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3549 "11101111" // /* MW 3 */
+ 3550 "01100011" // /* MW 2 */
+ 3551 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3553 "11000001" // /* MW 3 */
+ 3554 "11111001" // /* MW 2 */
+ 3555 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3557 "11001110" // /* MW 3 */
+ 3558 "01100011" // /* MW 2 */
+ 3559 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 55 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3561 "00011100" // /* MW 7 */
+ 3562 "00000000" // /* MW 6 */
+ 3563 "00000000" // /* MW 5 */
+ 3564 "10000001" // /* MW 4 */
+ 3565 "00010100" // /* MW 3 */
+ 3566 "00100011" // /* MW 2 */
+ 3567 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3569 "01111000" // /* MW 9 */
+ 3570 "00001110" // /* MW 8 */
+ 3571 "01110000" // /* MW 7 */
+ 3572 "11101011" // /* MW 6 */
+ 3573 "11000111" // /* MW 5 */
+ 3574 "00111111" // /* MW 4 */
+ 3575 "00000000" // /* MW 3 */
+ 3576 "00011001" // /* MW 2 */
+ 3577 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63 first
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3579 "11000010" // /* MW 3 */
+ 3580 "01111111" // /* MW 2 */
+ 3581 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 34 first
+.src_ref 2 "conv2d_bf16_params.h" 641 32 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3583 "10101000" // /* MW 9 */
+ 3584 "01110100" // /* MW 8 */
+ 3585 "01001111" // /* MW 7 */
+ 3586 "10000011" // /* MW 6 */
+ 3587 "00000100" // /* MW 5 */
+ 3588 "00100001" // /* MW 4 */
+ 3589 "00100000" // /* MW 3 */
+ 3590 "01101110" // /* MW 2 */
+ 3591 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 61 first
+.src_ref 2 "conv2d_bf16_params.h" 640 16
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3593 "01011000" // /* MW 9 */
+ 3594 "00001001" // /* MW 8 */
+ 3595 "10101000" // /* MW 7 */
+ 3596 "10000011" // /* MW 6 */
+ 3597 "01000100" // /* MW 5 */
+ 3598 "00101001" // /* MW 4 */
+ 3599 "00000000" // /* MW 3 */
+ 3600 "00011110" // /* MW 2 */
+ 3601 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3603 "11100010" // /* MW 3 */
+ 3604 "01110011" // /* MW 2 */
+ 3605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 47 first
+ 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3607 "10001000" // /* MW 3 */
+ 3608 "11111001" // /* MW 2 */
+ 3609 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 640 16 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3611 "00111101" // /* MW 3 */
+ 3612 "01111011" // /* MW 2 */
+ 3613 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3615 "00010000" // /* MW 9 */
+ 3616 "00000100" // /* MW 8 */
+ 3617 "00001010" // /* MW 7 */
+ 3618 "00000011" // /* MW 6 */
+ 3619 "00000000" // /* MW 5 */
+ 3620 "00000000" // /* MW 4 */
+ 3621 "00100000" // /* MW 3 */
+ 3622 "11011110" // /* MW 2 */
+ 3623 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 641 44 first
+.src_ref 2 "conv2d_bf16_params.h" 642 45 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3625 "11111111" // /* MW 5 */
+ 3626 "00111010" // /* MW 4 */
+ 3627 "10111111" // /* MW 3 */
+ 3628 "11100111" // /* MW 2 */
+ 3629 "11001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3631 "11100110" // /* MW 3 */
+ 3632 "11001111" // /* MW 2 */
+ 3633 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 55 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3635 "00101001" // /* MW 5 */
+ 3636 "10101000" // /* MW 4 */
+ 3637 "00001011" // /* MW 3 */
+ 3638 "11010010" // /* MW 2 */
+ 3639 "10110100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3641 "00000001" // /* MW 5 */
+ 3642 "00100001" // /* MW 4 */
+ 3643 "01001101" // /* MW 3 */
+ 3644 "10110000" // /* MW 2 */
+ 3645 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3647 "00111001" // /* MW 5 */
+ 3648 "11000010" // /* MW 4 */
+ 3649 "00011101" // /* MW 3 */
+ 3650 "10110101" // /* MW 2 */
+ 3651 "00110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 99 first
+ 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3653 "00100100" // /* MW 3 */
+ 3654 "11001111" // /* MW 2 */
+ 3655 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3657 "01000001" // /* MW 5 */
+ 3658 "10100110" // /* MW 4 */
+ 3659 "01001101" // /* MW 3 */
+ 3660 "11011110" // /* MW 2 */
+ 3661 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3663 "01111101" // /* MW 5 */
+ 3664 "00100000" // /* MW 4 */
+ 3665 "01001001" // /* MW 3 */
+ 3666 "00001000" // /* MW 2 */
+ 3667 "00101001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119 first
+ 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3669 "00100100" // /* MW 3 */
+ 3670 "11101111" // /* MW 2 */
+ 3671 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 15 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3673 "01011000" // /* MW 9 */
+ 3674 "01110000" // /* MW 8 */
+ 3675 "01001111" // /* MW 7 */
+ 3676 "01101110" // /* MW 6 */
+ 3677 "01000010" // /* MW 5 */
+ 3678 "00100000" // /* MW 4 */
+ 3679 "00000000" // /* MW 3 */
+ 3680 "00011110" // /* MW 2 */
+ 3681 "11011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3683 "00100010" // /* MW 3 */
+ 3684 "10111101" // /* MW 2 */
+ 3685 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 85 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3687 "01011000" // /* MW 9 */
+ 3688 "00100000" // /* MW 8 */
+ 3689 "00001001" // /* MW 7 */
+ 3690 "11111110" // /* MW 6 */
+ 3691 "10101001" // /* MW 5 */
+ 3692 "00101111" // /* MW 4 */
+ 3693 "00000000" // /* MW 3 */
+ 3694 "00000101" // /* MW 2 */
+ 3695 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3697 "01010010" // /* MW 3 */
+ 3698 "00100000" // /* MW 2 */
+ 3699 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 559 59 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3701 "11110010" // /* MW 5 */
+ 3702 "10111101" // /* MW 4 */
+ 3703 "11111101" // /* MW 3 */
+ 3704 "00001001" // /* MW 2 */
+ 3705 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 41 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3707 "00111001" // /* MW 5 */
+ 3708 "11000100" // /* MW 4 */
+ 3709 "10111101" // /* MW 3 */
+ 3710 "00111111" // /* MW 2 */
+ 3711 "10000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 117 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3713 "01011111" // /* MW 5 */
+ 3714 "01101011" // /* MW 4 */
+ 3715 "10110111" // /* MW 3 */
+ 3716 "11101110" // /* MW 2 */
+ 3717 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+ 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3719 "00110010" // /* MW 3 */
+ 3720 "10000100" // /* MW 2 */
+ 3721 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 52 first
+ 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3723 "00001100" // /* MW 3 */
+ 3724 "01111110" // /* MW 2 */
+ 3725 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 92 first
+ 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3727 "10001111" // /* MW 3 */
+ 3728 "00110001" // /* MW 2 */
+ 3729 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 36 first
+ 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3731 "11000101" // /* MW 3 */
+ 3732 "11110111" // /* MW 2 */
+ 3733 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 64 first
+.src_ref 2 "conv2d_bf16_params.h" 611 47
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 629 82
+ 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3735 "01011000" // /* MW 11 */
+ 3736 "00000000" // /* MW 10 */
+ 3737 "10001001" // /* MW 9 */
+ 3738 "11101110" // /* MW 8 */
+ 3739 "11000000" // /* MW 7 */
+ 3740 "10110111" // /* MW 6 */
+ 3741 "10010101" // /* MW 5 */
+ 3742 "11101110" // /* MW 4 */
+ 3743 "00000111" // /* MW 3 */
+ 3744 "00000011" // /* MW 2 */
+ 3745 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+ 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3747 "00111001" // /* MW 5 */
+ 3748 "10110111" // /* MW 4 */
+ 3749 "01000000" // /* MW 3 */
+ 3750 "00101000" // /* MW 2 */
+ 3751 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3753 "00001100" // /* MW 5 */
+ 3754 "10101100" // /* MW 4 */
+ 3755 "00001111" // /* MW 3 */
+ 3756 "00000000" // /* MW 2 */
+ 3757 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 60 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+ 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3759 "11001001" // /* MW 9 */
+ 3760 "00111111" // /* MW 8 */
+ 3761 "10001001" // /* MW 7 */
+ 3762 "00111100" // /* MW 6 */
+ 3763 "10110000" // /* MW 5 */
+ 3764 "00011111" // /* MW 4 */
+ 3765 "10110000" // /* MW 3 */
+ 3766 "00010010" // /* MW 2 */
+ 3767 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 53
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 555 59 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3769 "11001000" // /* MW 11 */
+ 3770 "01111111" // /* MW 10 */
+ 3771 "11001100" // /* MW 9 */
+ 3772 "10010010" // /* MW 8 */
+ 3773 "11111111" // /* MW 7 */
+ 3774 "10101101" // /* MW 6 */
+ 3775 "10010001" // /* MW 5 */
+ 3776 "00011100" // /* MW 4 */
+ 3777 "10000010" // /* MW 3 */
+ 3778 "10001100" // /* MW 2 */
+ 3779 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 240 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3781 "01111001" // /* MW 9 */
+ 3782 "10001110" // /* MW 8 */
+ 3783 "01110000" // /* MW 7 */
+ 3784 "11101111" // /* MW 6 */
+ 3785 "01010111" // /* MW 5 */
+ 3786 "00101011" // /* MW 4 */
+ 3787 "00110000" // /* MW 3 */
+ 3788 "01011010" // /* MW 2 */
+ 3789 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 53 first
+.src_ref 2 "conv2d_bf16_params.h" 559 53
+.src_ref 2 "conv2d_bf16_params.h" 621 140
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3791 "01011000" // /* MW 11 */
+ 3792 "01011000" // /* MW 10 */
+ 3793 "00000000" // /* MW 9 */
+ 3794 "00001110" // /* MW 8 */
+ 3795 "01001110" // /* MW 7 */
+ 3796 "10101001" // /* MW 6 */
+ 3797 "01010001" // /* MW 5 */
+ 3798 "00011111" // /* MW 4 */
+ 3799 "00000010" // /* MW 3 */
+ 3800 "11011001" // /* MW 2 */
+ 3801 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 53 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3803 "00100100" // /* MW 5 */
+ 3804 "11100011" // /* MW 4 */
+ 3805 "00111111" // /* MW 3 */
+ 3806 "01100010" // /* MW 2 */
+ 3807 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 47 first
+.src_ref 2 "conv2d_bf16_params.h" 621 222
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3809 "01111000" // /* MW 11 */
+ 3810 "10010000" // /* MW 10 */
+ 3811 "01101001" // /* MW 9 */
+ 3812 "00001111" // /* MW 8 */
+ 3813 "11001110" // /* MW 7 */
+ 3814 "10101011" // /* MW 6 */
+ 3815 "10010001" // /* MW 5 */
+ 3816 "11101111" // /* MW 4 */
+ 3817 "00100010" // /* MW 3 */
+ 3818 "01101110" // /* MW 2 */
+ 3819 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 661 61
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3821 "11001000" // /* MW 9 */
+ 3822 "11111111" // /* MW 8 */
+ 3823 "10001100" // /* MW 7 */
+ 3824 "00010010" // /* MW 6 */
+ 3825 "11001110" // /* MW 5 */
+ 3826 "00101001" // /* MW 4 */
+ 3827 "00000000" // /* MW 3 */
+ 3828 "11110011" // /* MW 2 */
+ 3829 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 710 60
+.src_ref 2 "conv2d_bf16_params.h" 710 65
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3831 "01111000" // /* MW 9 */
+ 3832 "10001110" // /* MW 8 */
+ 3833 "01110000" // /* MW 7 */
+ 3834 "01110011" // /* MW 6 */
+ 3835 "11101010" // /* MW 5 */
+ 3836 "00111011" // /* MW 4 */
+ 3837 "00000000" // /* MW 3 */
+ 3838 "00011101" // /* MW 2 */
+ 3839 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3841 "01000100" // /* MW 5 */
+ 3842 "11001010" // /* MW 4 */
+ 3843 "00101110" // /* MW 3 */
+ 3844 "11101110" // /* MW 2 */
+ 3845 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 649 41 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3847 "01111000" // /* MW 9 */
+ 3848 "10010000" // /* MW 8 */
+ 3849 "01101001" // /* MW 7 */
+ 3850 "10010011" // /* MW 6 */
+ 3851 "00111001" // /* MW 5 */
+ 3852 "00111111" // /* MW 4 */
+ 3853 "00000000" // /* MW 3 */
+ 3854 "00011111" // /* MW 2 */
+ 3855 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3857 "00100010" // /* MW 3 */
+ 3858 "11000100" // /* MW 2 */
+ 3859 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 82 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3861 "01010001" // /* MW 3 */
+ 3862 "11101011" // /* MW 2 */
+ 3863 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 611 47 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3865 "01011001" // /* MW 9 */
+ 3866 "11000000" // /* MW 8 */
+ 3867 "01101111" // /* MW 7 */
+ 3868 "10010000" // /* MW 6 */
+ 3869 "00100111" // /* MW 5 */
+ 3870 "00000100" // /* MW 4 */
+ 3871 "00110000" // /* MW 3 */
+ 3872 "10001110" // /* MW 2 */
+ 3873 "01000111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3875 "00110010" // /* MW 3 */
+ 3876 "00111000" // /* MW 2 */
+ 3877 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 643 22 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3879 "01111111" // /* MW 3 */
+ 3880 "11111110" // /* MW 2 */
+ 3881 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3883 "01100100" // /* MW 5 */
+ 3884 "00001100" // /* MW 4 */
+ 3885 "00101110" // /* MW 3 */
+ 3886 "11000110" // /* MW 2 */
+ 3887 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 47 first
+.src_ref 2 "conv2d_bf16_params.h" 629 45
+.src_ref 2 "conv2d_bf16_params.h" 684 30 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3889 "01011001" // /* MW 9 */
+ 3890 "00101000" // /* MW 8 */
+ 3891 "10000000" // /* MW 7 */
+ 3892 "01111100" // /* MW 6 */
+ 3893 "00101001" // /* MW 5 */
+ 3894 "00110101" // /* MW 4 */
+ 3895 "00110000" // /* MW 3 */
+ 3896 "10001110" // /* MW 2 */
+ 3897 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 45 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3899 "11100100" // /* MW 5 */
+ 3900 "00001101" // /* MW 4 */
+ 3901 "00110001" // /* MW 3 */
+ 3902 "01010110" // /* MW 2 */
+ 3903 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 644 22
+.src_ref 2 "conv2d_bf16_params.h" 700 17 first
+.src_ref 2 "conv2d_bf16_params.h" 705 50
+.src_ref 2 "conv2d_bf16_params.h" 705 61
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3905 "10101000" // /* MW 9 */
+ 3906 "11111100" // /* MW 8 */
+ 3907 "10101001" // /* MW 7 */
+ 3908 "11111110" // /* MW 6 */
+ 3909 "00111000" // /* MW 5 */
+ 3910 "00000110" // /* MW 4 */
+ 3911 "00100000" // /* MW 3 */
+ 3912 "00000010" // /* MW 2 */
+ 3913 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 38 first
+.src_ref 2 "conv2d_bf16_params.h" 700 111
+.src_ref 2 "conv2d_bf16_params.h" 700 149
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3915 "00000110" // /* MW 9 */
+ 3916 "00000110" // /* MW 8 */
+ 3917 "00000101" // /* MW 7 */
+ 3918 "10000000" // /* MW 6 */
+ 3919 "00010001" // /* MW 5 */
+ 3920 "00011111" // /* MW 4 */
+ 3921 "00100010" // /* MW 3 */
+ 3922 "11000110" // /* MW 2 */
+ 3923 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14
+.src_ref 2 "conv2d_bf16_params.h" 649 38 first
+.src_ref 2 "conv2d_bf16_params.h" 674 24
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3925 "00010001" // /* MW 9 */
+ 3926 "11111000" // /* MW 8 */
+ 3927 "01101111" // /* MW 7 */
+ 3928 "00111110" // /* MW 6 */
+ 3929 "00000000" // /* MW 5 */
+ 3930 "00000000" // /* MW 4 */
+ 3931 "00110000" // /* MW 3 */
+ 3932 "11001110" // /* MW 2 */
+ 3933 "01001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14 first
+.src_ref 2 "conv2d_bf16_params.h" 662 61
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3935 "11001001" // /* MW 9 */
+ 3936 "10111111" // /* MW 8 */
+ 3937 "01001011" // /* MW 7 */
+ 3938 "10100100" // /* MW 6 */
+ 3939 "01001001" // /* MW 5 */
+ 3940 "00111111" // /* MW 4 */
+ 3941 "00110000" // /* MW 3 */
+ 3942 "11010010" // /* MW 2 */
+ 3943 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 663 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3945 "10011100" // /* MW 5 */
+ 3946 "01010110" // /* MW 4 */
+ 3947 "00110001" // /* MW 3 */
+ 3948 "11000110" // /* MW 2 */
+ 3949 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+ 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3951 "10000001" // /* MW 5 */
+ 3952 "01111010" // /* MW 4 */
+ 3953 "00111111" // /* MW 3 */
+ 3954 "10001010" // /* MW 2 */
+ 3955 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3957 "11100011" // /* MW 5 */
+ 3958 "01110011" // /* MW 4 */
+ 3959 "00111000" // /* MW 3 */
+ 3960 "11111010" // /* MW 2 */
+ 3961 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3963 "01011001" // /* MW 9 */
+ 3964 "00000000" // /* MW 8 */
+ 3965 "01100000" // /* MW 7 */
+ 3966 "00110000" // /* MW 6 */
+ 3967 "11111000" // /* MW 5 */
+ 3968 "00101101" // /* MW 4 */
+ 3969 "00110000" // /* MW 3 */
+ 3970 "11010110" // /* MW 2 */
+ 3971 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3973 "11001001" // /* MW 9 */
+ 3974 "01111111" // /* MW 8 */
+ 3975 "00101100" // /* MW 7 */
+ 3976 "01111110" // /* MW 6 */
+ 3977 "00100000" // /* MW 5 */
+ 3978 "00111110" // /* MW 4 */
+ 3979 "00110000" // /* MW 3 */
+ 3980 "10001100" // /* MW 2 */
+ 3981 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 705 50 first
+.src_ref 2 "conv2d_bf16_params.h" 705 61 first
+ 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3983 "00001100" // /* MW 5 */
+ 3984 "10111000" // /* MW 4 */
+ 3985 "00111000" // /* MW 3 */
+ 3986 "10001100" // /* MW 2 */
+ 3987 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10
+.src_ref 2 "conv2d_bf16_params.h" 674 24 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.src_ref 2 "conv2d_bf16_params.h" 720 50
+ 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3989 "01011001" // /* MW 9 */
+ 3990 "00000000" // /* MW 8 */
+ 3991 "01001000" // /* MW 7 */
+ 3992 "00100100" // /* MW 6 */
+ 3993 "00000001" // /* MW 5 */
+ 3994 "00100111" // /* MW 4 */
+ 3995 "00110000" // /* MW 3 */
+ 3996 "11011010" // /* MW 2 */
+ 3997 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3999 "01111001" // /* MW 9 */
+ 4000 "00001110" // /* MW 8 */
+ 4001 "01110000" // /* MW 7 */
+ 4002 "10001111" // /* MW 6 */
+ 4003 "00011111" // /* MW 5 */
+ 4004 "00000101" // /* MW 4 */
+ 4005 "00110000" // /* MW 3 */
+ 4006 "11110010" // /* MW 2 */
+ 4007 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 707 61 first
+ 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4009 "11011111" // /* MW 5 */
+ 4010 "10111001" // /* MW 4 */
+ 4011 "00111011" // /* MW 3 */
+ 4012 "10010010" // /* MW 2 */
+ 4013 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 674 22 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4015 "01011001" // /* MW 9 */
+ 4016 "00000110" // /* MW 8 */
+ 4017 "00001000" // /* MW 7 */
+ 4018 "10001100" // /* MW 6 */
+ 4019 "00001111" // /* MW 5 */
+ 4020 "00100001" // /* MW 4 */
+ 4021 "00110000" // /* MW 3 */
+ 4022 "11000110" // /* MW 2 */
+ 4023 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4025 "01111000" // /* MW 11 */
+ 4026 "10010000" // /* MW 10 */
+ 4027 "01101001" // /* MW 9 */
+ 4028 "00010011" // /* MW 8 */
+ 4029 "00000000" // /* MW 7 */
+ 4030 "10011011" // /* MW 6 */
+ 4031 "00010001" // /* MW 5 */
+ 4032 "00011110" // /* MW 4 */
+ 4033 "00000010" // /* MW 3 */
+ 4034 "00000000" // /* MW 2 */
+ 4035 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4037 "10100100" // /* MW 5 */
+ 4038 "00010100" // /* MW 4 */
+ 4039 "00100000" // /* MW 3 */
+ 4040 "00010110" // /* MW 2 */
+ 4041 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 691 56 first
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4043 "10101111" // /* MW 3 */
+ 4044 "01100011" // /* MW 2 */
+ 4045 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 709 71 first
+ 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4047 "01011001" // /* MW 9 */
+ 4048 "11001000" // /* MW 8 */
+ 4049 "00000111" // /* MW 7 */
+ 4050 "01101101" // /* MW 6 */
+ 4051 "00001000" // /* MW 5 */
+ 4052 "00000111" // /* MW 4 */
+ 4053 "00110000" // /* MW 3 */
+ 4054 "10001100" // /* MW 2 */
+ 4055 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 706 23 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4057 "11001000" // /* MW 11 */
+ 4058 "11000001" // /* MW 10 */
+ 4059 "10101000" // /* MW 9 */
+ 4060 "11101101" // /* MW 8 */
+ 4061 "11110111" // /* MW 7 */
+ 4062 "10100000" // /* MW 6 */
+ 4063 "01100001" // /* MW 5 */
+ 4064 "01001000" // /* MW 4 */
+ 4065 "00000010" // /* MW 3 */
+ 4066 "01100011" // /* MW 2 */
+ 4067 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 682 38 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4069 "01111011" // /* MW 5 */
+ 4070 "11000000" // /* MW 4 */
+ 4071 "00110110" // /* MW 3 */
+ 4072 "00001010" // /* MW 2 */
+ 4073 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+ 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4075 "01000001" // /* MW 5 */
+ 4076 "10001110" // /* MW 4 */
+ 4077 "00111000" // /* MW 3 */
+ 4078 "11011010" // /* MW 2 */
+ 4079 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+ 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4081 "10011100" // /* MW 5 */
+ 4082 "11001000" // /* MW 4 */
+ 4083 "00111000" // /* MW 3 */
+ 4084 "11001010" // /* MW 2 */
+ 4085 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4087 "11011011" // /* MW 5 */
+ 4088 "10010100" // /* MW 4 */
+ 4089 "00110010" // /* MW 3 */
+ 4090 "10010010" // /* MW 2 */
+ 4091 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 706 28 first
+ 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4093 "01011001" // /* MW 9 */
+ 4094 "11111101" // /* MW 8 */
+ 4095 "00001111" // /* MW 7 */
+ 4096 "00000100" // /* MW 6 */
+ 4097 "00111000" // /* MW 5 */
+ 4098 "00011010" // /* MW 4 */
+ 4099 "00110000" // /* MW 3 */
+ 4100 "10001110" // /* MW 2 */
+ 4101 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4103 "00001110" // /* MW 3 */
+ 4104 "11000000" // /* MW 2 */
+ 4105 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 707 66 first
+ 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4107 "00011111" // /* MW 5 */
+ 4108 "00010000" // /* MW 4 */
+ 4109 "00110111" // /* MW 3 */
+ 4110 "11001010" // /* MW 2 */
+ 4111 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 709 96 first
+ 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4113 "00111011" // /* MW 5 */
+ 4114 "00001100" // /* MW 4 */
+ 4115 "00110000" // /* MW 3 */
+ 4116 "10001100" // /* MW 2 */
+ 4117 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 709 90
+ 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4119 "00110001" // /* MW 9 */
+ 4120 "11000110" // /* MW 8 */
+ 4121 "00000011" // /* MW 7 */
+ 4122 "10000000" // /* MW 6 */
+ 4123 "01100001" // /* MW 5 */
+ 4124 "00011100" // /* MW 4 */
+ 4125 "00100010" // /* MW 3 */
+ 4126 "10110110" // /* MW 2 */
+ 4127 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 707 50 first
+.src_ref 2 "conv2d_bf16_params.h" 708 59
+.src_ref 2 "conv2d_bf16_params.h" 710 60 first
+.src_ref 2 "conv2d_bf16_params.h" 710 65 first
+ 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4129 "11001000" // /* MW 11 */
+ 4130 "00111111" // /* MW 10 */
+ 4131 "00101000" // /* MW 9 */
+ 4132 "00110000" // /* MW 8 */
+ 4133 "01110000" // /* MW 7 */
+ 4134 "10111010" // /* MW 6 */
+ 4135 "10010001" // /* MW 5 */
+ 4136 "00011100" // /* MW 4 */
+ 4137 "00100010" // /* MW 3 */
+ 4138 "00111010" // /* MW 2 */
+ 4139 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 708 48 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4141 "10101111" // /* MW 9 */
+ 4142 "01000001" // /* MW 8 */
+ 4143 "00000001" // /* MW 7 */
+ 4144 "10000000" // /* MW 6 */
+ 4145 "00110001" // /* MW 5 */
+ 4146 "00011100" // /* MW 4 */
+ 4147 "00100010" // /* MW 3 */
+ 4148 "10111110" // /* MW 2 */
+ 4149 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 709 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+ 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4151 "00000000" // /* MW 5 */
+ 4152 "01010000" // /* MW 4 */
+ 4153 "00110000" // /* MW 3 */
+ 4154 "10001110" // /* MW 2 */
+ 4155 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 710 50 first
+.delay_slot
+ 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4157 "11110001" // /* MW 3 */
+ 4158 "01011100" // /* MW 2 */
+ 4159 "00001010" // /* MW 1 */
+.delay_slot
+ 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4161 "00010001" // /* MW 3 */
+ 4162 "00011100" // /* MW 2 */
+ 4163 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48 first
+.delay_slot
+ 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4165 "01010001" // /* MW 3 */
+ 4166 "00011100" // /* MW 2 */
+ 4167 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.delay_slot
+ 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4169 "01010001" // /* MW 3 */
+ 4170 "00000100" // /* MW 2 */
+ 4171 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 720 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+.delay_slot
+ 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4173 "01110001" // /* MW 9 */
+ 4174 "00000000" // /* MW 8 */
+ 4175 "00000000" // /* MW 7 */
+ 4176 "00000000" // /* MW 6 */
+ 4177 "11111110" // /* MW 5 */
+ 4178 "00111111" // /* MW 4 */
+ 4179 "00110000" // /* MW 3 */
+ 4180 "10001010" // /* MW 2 */
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0
+ 4181 "01000010" // /* MW 1 */
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 689 first
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 704 12
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.function_start
+ 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4193 "01111000" // /* MW 11 */
+ 4194 "01100000" // /* MW 10 */
+ 4195 "00001010" // /* MW 9 */
+ 4196 "00001000" // /* MW 8 */
+ 4197 "10000000" // /* MW 7 */
+ 4198 "00000001" // /* MW 6 */
+ 4199 "10001011" // /* MW 5 */
+ 4200 "10000100" // /* MW 4 */
+ 4201 "10000010" // /* MW 3 */
+ 4202 "00000011" // /* MW 2 */
+ 4203 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 526 11
+.src_ref 2 "conv2d_bf16.h" 698 28 first
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+ 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4205 "01100000" // /* MW 13 */
+ 4206 "00001001" // /* MW 12 */
+ 4207 "00100000" // /* MW 11 */
+ 4208 "00100001" // /* MW 10 */
+ 4209 "00000000" // /* MW 9 */
+ 4210 "00110110" // /* MW 8 */
+ 4211 "00000001" // /* MW 7 */
+ 4212 "00110100" // /* MW 6 */
+ 4213 "00101000" // /* MW 5 */
+ 4214 "00101000" // /* MW 4 */
+ 4215 "10001000" // /* MW 3 */
+ 4216 "00000110" // /* MW 2 */
+ 4217 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4219 "00010000" // /* MW 9 */
+ 4220 "00110100" // /* MW 8 */
+ 4221 "00110010" // /* MW 7 */
+ 4222 "11110010" // /* MW 6 */
+ 4223 "00000001" // /* MW 5 */
+ 4224 "00000000" // /* MW 4 */
+ 4225 "11010000" // /* MW 3 */
+ 4226 "10010100" // /* MW 2 */
+ 4227 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 43
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+ 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4229 "00010000" // /* MW 9 */
+ 4230 "01111000" // /* MW 8 */
+ 4231 "01111000" // /* MW 7 */
+ 4232 "00000100" // /* MW 6 */
+ 4233 "00000000" // /* MW 5 */
+ 4234 "00000000" // /* MW 4 */
+ 4235 "11010000" // /* MW 3 */
+ 4236 "10010000" // /* MW 2 */
+ 4237 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 699 43 first
+.src_ref 2 "conv2d_bf16.h" 702 4
+ 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4239 "00010000" // /* MW 9 */
+ 4240 "10010000" // /* MW 8 */
+ 4241 "10111000" // /* MW 7 */
+ 4242 "00000101" // /* MW 6 */
+ 4243 "00000000" // /* MW 5 */
+ 4244 "00000000" // /* MW 4 */
+ 4245 "11010000" // /* MW 3 */
+ 4246 "10000000" // /* MW 2 */
+ 4247 "01100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 702 37 first
+ 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4249 "00000001" // /* MW 5 */
+ 4250 "00000000" // /* MW 4 */
+ 4251 "11010001" // /* MW 3 */
+ 4252 "10000010" // /* MW 2 */
+ 4253 "01111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4255 "00100010" // /* MW 3 */
+ 4256 "00000100" // /* MW 2 */
+ 4257 "00000100" // /* MW 1 */
+ 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4259 "00000000" // /* MW 1 */
+ 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4261 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+ 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4263 "00000001" // /* MW 5 */
+ 4264 "10000101" // /* MW 4 */
+ 4265 "10000000" // /* MW 3 */
+ 4266 "00001010" // /* MW 2 */
+ 4267 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+ 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4269 "00010100" // /* MW 3 */
+ 4270 "00110000" // /* MW 2 */
+ 4271 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4273 "00010100" // /* MW 3 */
+ 4274 "00010100" // /* MW 2 */
+ 4275 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4277 "11111101" // /* MW 5 */
+ 4278 "11100000" // /* MW 4 */
+ 4279 "10001010" // /* MW 3 */
+ 4280 "00001010" // /* MW 2 */
+ 4281 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4283 "00000000" // /* MW 5 */
+ 4284 "11110101" // /* MW 4 */
+ 4285 "10000000" // /* MW 3 */
+ 4286 "00000010" // /* MW 2 */
+ 4287 "11000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4289 "00010100" // /* MW 3 */
+ 4290 "00010100" // /* MW 2 */
+ 4291 "00111100" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4293 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4295 "01111110" // /* MW 9 */
+ 4296 "10100101" // /* MW 8 */
+ 4297 "00000001" // /* MW 7 */
+ 4298 "00000000" // /* MW 6 */
+ 4299 "01010100" // /* MW 5 */
+ 4300 "00000000" // /* MW 4 */
+ 4301 "11110000" // /* MW 3 */
+ 4302 "00101100" // /* MW 2 */
+ 4303 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4305 "00000000" // /* MW 15 */
+ 4306 "00000000" // /* MW 14 */
+ 4307 "01111000" // /* MW 13 */
+ 4308 "11000101" // /* MW 12 */
+ 4309 "00000001" // /* MW 11 */
+ 4310 "00000000" // /* MW 10 */
+ 4311 "00000000" // /* MW 9 */
+ 4312 "00000000" // /* MW 8 */
+ 4313 "01011011" // /* MW 7 */
+ 4314 "00000001" // /* MW 6 */
+ 4315 "00101000" // /* MW 5 */
+ 4316 "01100000" // /* MW 4 */
+ 4317 "11111100" // /* MW 3 */
+ 4318 "00101100" // /* MW 2 */
+ 4319 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4321 "00000000" // /* MW 15 */
+ 4322 "00000000" // /* MW 14 */
+ 4323 "01111000" // /* MW 13 */
+ 4324 "11000101" // /* MW 12 */
+ 4325 "01000000" // /* MW 11 */
+ 4326 "00000000" // /* MW 10 */
+ 4327 "00000000" // /* MW 9 */
+ 4328 "00000000" // /* MW 8 */
+ 4329 "01011011" // /* MW 7 */
+ 4330 "00000001" // /* MW 6 */
+ 4331 "00100000" // /* MW 5 */
+ 4332 "00000000" // /* MW 4 */
+ 4333 "11110000" // /* MW 3 */
+ 4334 "00101100" // /* MW 2 */
+ 4335 "00000000" // /* MW 1 */
+.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4337 "00000000" // /* MW 15 */
+ 4338 "00000000" // /* MW 14 */
+ 4339 "01111000" // /* MW 13 */
+ 4340 "10100101" // /* MW 12 */
+ 4341 "00000001" // /* MW 11 */
+ 4342 "00000000" // /* MW 10 */
+ 4343 "00000000" // /* MW 9 */
+ 4344 "00000000" // /* MW 8 */
+ 4345 "01011011" // /* MW 7 */
+ 4346 "00000001" // /* MW 6 */
+ 4347 "00101000" // /* MW 5 */
+ 4348 "00101000" // /* MW 4 */
+ 4349 "11111000" // /* MW 3 */
+ 4350 "00101100" // /* MW 2 */
+ 4351 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4353 "00000000" // /* MW 15 */
+ 4354 "00000000" // /* MW 14 */
+ 4355 "01111000" // /* MW 13 */
+ 4356 "10100101" // /* MW 12 */
+ 4357 "00000001" // /* MW 11 */
+ 4358 "00000000" // /* MW 10 */
+ 4359 "00000000" // /* MW 9 */
+ 4360 "00000000" // /* MW 8 */
+ 4361 "00000011" // /* MW 7 */
+ 4362 "10000000" // /* MW 6 */
+ 4363 "10101101" // /* MW 5 */
+ 4364 "00000000" // /* MW 4 */
+ 4365 "11110000" // /* MW 3 */
+ 4366 "00101100" // /* MW 2 */
+ 4367 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4369 "00000000" // /* MW 15 */
+ 4370 "00000000" // /* MW 14 */
+ 4371 "01111000" // /* MW 13 */
+ 4372 "11000101" // /* MW 12 */
+ 4373 "00000001" // /* MW 11 */
+ 4374 "00000000" // /* MW 10 */
+ 4375 "00000000" // /* MW 9 */
+ 4376 "00000000" // /* MW 8 */
+ 4377 "00000011" // /* MW 7 */
+ 4378 "00000000" // /* MW 6 */
+ 4379 "00101001" // /* MW 5 */
+ 4380 "01100000" // /* MW 4 */
+ 4381 "11111100" // /* MW 3 */
+ 4382 "00101100" // /* MW 2 */
+ 4383 "00000000" // /* MW 1 */
+.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 3 "utils.h" 531 4 first
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4385 "00000000" // /* MW 15 */
+ 4386 "00000000" // /* MW 14 */
+ 4387 "01111000" // /* MW 13 */
+ 4388 "11000101" // /* MW 12 */
+ 4389 "01000000" // /* MW 11 */
+ 4390 "00000000" // /* MW 10 */
+ 4391 "00000000" // /* MW 9 */
+ 4392 "00000000" // /* MW 8 */
+ 4393 "00000011" // /* MW 7 */
+ 4394 "00000000" // /* MW 6 */
+ 4395 "00100011" // /* MW 5 */
+ 4396 "00000000" // /* MW 4 */
+ 4397 "11110000" // /* MW 3 */
+ 4398 "00101100" // /* MW 2 */
+ 4399 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4401 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4403 "00000011" // /* MW 3 */
+ 4404 "10000000" // /* MW 2 */
+ 4405 "00001101" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4407 "01110000" // /* MW 7 */
+ 4408 "11000101" // /* MW 6 */
+ 4409 "00000001" // /* MW 5 */
+ 4410 "00000000" // /* MW 4 */
+ 4411 "01100000" // /* MW 3 */
+ 4412 "00000000" // /* MW 2 */
+ 4413 "00100000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4415 "10001010" // /* MW 3 */
+ 4416 "10000001" // /* MW 2 */
+ 4417 "00011000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4419 "00000011" // /* MW 3 */
+ 4420 "00000000" // /* MW 2 */
+ 4421 "00001011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+ 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4423 "01110000" // /* MW 7 */
+ 4424 "11000101" // /* MW 6 */
+ 4425 "00000001" // /* MW 5 */
+ 4426 "00000000" // /* MW 4 */
+ 4427 "01100000" // /* MW 3 */
+ 4428 "00000000" // /* MW 2 */
+ 4429 "10110000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+ 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4431 "01110000" // /* MW 7 */
+ 4432 "11000101" // /* MW 6 */
+ 4433 "01000000" // /* MW 5 */
+ 4434 "00000000" // /* MW 4 */
+ 4435 "01100000" // /* MW 3 */
+ 4436 "00000000" // /* MW 2 */
+ 4437 "00100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4439 "00000011" // /* MW 3 */
+ 4440 "00000000" // /* MW 2 */
+ 4441 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.src_ref 2 "conv2d_bf16.h" 723 first
+ 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4443 "00000000" // /* MW 5 */
+ 4444 "01010000" // /* MW 4 */
+ 4445 "01100000" // /* MW 3 */
+ 4446 "00000000" // /* MW 2 */
+ 4447 "10110000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.delay_slot
+ 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4449 "00000011" // /* MW 3 */
+ 4450 "00000000" // /* MW 2 */
+ 4451 "00001001" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+.delay_slot
+ 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4453 "00000011" // /* MW 3 */
+ 4454 "00000000" // /* MW 2 */
+ 4455 "00001011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4457 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4459 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0
+ 4461 "00000000" // /* MW 1 */
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.function_start
+ 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4465 "01100000" // /* MW 13 */
+ 4466 "00010001" // /* MW 12 */
+ 4467 "10010001" // /* MW 11 */
+ 4468 "00001110" // /* MW 10 */
+ 4469 "00000000" // /* MW 9 */
+ 4470 "00000000" // /* MW 8 */
+ 4471 "10000000" // /* MW 7 */
+ 4472 "00000000" // /* MW 6 */
+ 4473 "00100000" // /* MW 5 */
+ 4474 "00111111" // /* MW 4 */
+ 4475 "10000110" // /* MW 3 */
+ 4476 "11100000" // /* MW 2 */
+ 4477 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 241 95
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4479 "01111000" // /* MW 11 */
+ 4480 "01100000" // /* MW 10 */
+ 4481 "00101011" // /* MW 9 */
+ 4482 "00001010" // /* MW 8 */
+ 4483 "11000101" // /* MW 7 */
+ 4484 "10111111" // /* MW 6 */
+ 4485 "10010101" // /* MW 5 */
+ 4486 "11110001" // /* MW 4 */
+ 4487 "00000111" // /* MW 3 */
+ 4488 "01110011" // /* MW 2 */
+ 4489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 94
+.src_ref 2 "conv2d_bf16_params.h" 242 100
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 245 28
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4491 "00001000" // /* MW 11 */
+ 4492 "01000111" // /* MW 10 */
+ 4493 "00110100" // /* MW 9 */
+ 4494 "00101001" // /* MW 8 */
+ 4495 "00010000" // /* MW 7 */
+ 4496 "10000001" // /* MW 6 */
+ 4497 "00110101" // /* MW 5 */
+ 4498 "11011010" // /* MW 4 */
+ 4499 "00000111" // /* MW 3 */
+ 4500 "00011001" // /* MW 2 */
+ 4501 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 240 68 first
+ 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4503 "00010000" // /* MW 11 */
+ 4504 "00000000" // /* MW 10 */
+ 4505 "10101000" // /* MW 9 */
+ 4506 "00000011" // /* MW 8 */
+ 4507 "01000000" // /* MW 7 */
+ 4508 "10000000" // /* MW 6 */
+ 4509 "00110101" // /* MW 5 */
+ 4510 "11110101" // /* MW 4 */
+ 4511 "11010111" // /* MW 3 */
+ 4512 "11001010" // /* MW 2 */
+ 4513 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.src_ref 2 "conv2d_bf16_params.h" 245 20
+ 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4515 "10010000" // /* MW 11 */
+ 4516 "11111111" // /* MW 10 */
+ 4517 "11101111" // /* MW 9 */
+ 4518 "11111111" // /* MW 8 */
+ 4519 "01111111" // /* MW 7 */
+ 4520 "10000000" // /* MW 6 */
+ 4521 "11010101" // /* MW 5 */
+ 4522 "11111101" // /* MW 4 */
+ 4523 "10000111" // /* MW 3 */
+ 4524 "00011000" // /* MW 2 */
+ 4525 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4527 "01011000" // /* MW 11 */
+ 4528 "11101100" // /* MW 10 */
+ 4529 "00000111" // /* MW 9 */
+ 4530 "00001010" // /* MW 8 */
+ 4531 "01100001" // /* MW 7 */
+ 4532 "10000001" // /* MW 6 */
+ 4533 "10110101" // /* MW 5 */
+ 4534 "11100001" // /* MW 4 */
+ 4535 "00000111" // /* MW 3 */
+ 4536 "10110100" // /* MW 2 */
+ 4537 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.src_ref 2 "conv2d_bf16_params.h" 250 71
+ 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4539 "01011000" // /* MW 11 */
+ 4540 "11000100" // /* MW 10 */
+ 4541 "10000111" // /* MW 9 */
+ 4542 "11001010" // /* MW 8 */
+ 4543 "01110111" // /* MW 7 */
+ 4544 "10000111" // /* MW 6 */
+ 4545 "11110101" // /* MW 5 */
+ 4546 "11101101" // /* MW 4 */
+ 4547 "00000111" // /* MW 3 */
+ 4548 "10010101" // /* MW 2 */
+ 4549 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44
+ 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4551 "01010000" // /* MW 7 */
+ 4552 "01000000" // /* MW 6 */
+ 4553 "10000000" // /* MW 5 */
+ 4554 "00000011" // /* MW 4 */
+ 4555 "10110000" // /* MW 3 */
+ 4556 "01110011" // /* MW 2 */
+ 4557 "11111111" // /* MW 1 */
+ 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4559 "00111101" // /* MW 3 */
+ 4560 "11100100" // /* MW 2 */
+ 4561 "00001111" // /* MW 1 */
+ 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4563 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+ 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4565 "00100000" // /* MW 3 */
+ 4566 "01011001" // /* MW 2 */
+ 4567 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+ 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4569 "10011011" // /* MW 5 */
+ 4570 "01110111" // /* MW 4 */
+ 4571 "00110110" // /* MW 3 */
+ 4572 "00110010" // /* MW 2 */
+ 4573 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54 first
+.src_ref 2 "conv2d_bf16_params.h" 242 94 first
+ 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4575 "00101111" // /* MW 5 */
+ 4576 "11110010" // /* MW 4 */
+ 4577 "01011110" // /* MW 3 */
+ 4578 "11111001" // /* MW 2 */
+ 4579 "01011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 20 first
+ 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4581 "00101010" // /* MW 3 */
+ 4582 "11001001" // /* MW 2 */
+ 4583 "00000010" // /* MW 1 */
+ 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4585 "00000000" // /* MW 1 */
+ 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4587 "00000000" // /* MW 1 */
+ 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4589 "00000000" // /* MW 1 */
+ 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4591 "00000000" // /* MW 1 */
+ 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4593 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 174 first
+ 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4595 "11101100" // /* MW 3 */
+ 4596 "01110111" // /* MW 2 */
+ 4597 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+ 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4599 "00110010" // /* MW 3 */
+ 4600 "01011101" // /* MW 2 */
+ 4601 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4603 "11001100" // /* MW 3 */
+ 4604 "11110110" // /* MW 2 */
+ 4605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4607 "11001111" // /* MW 5 */
+ 4608 "10110111" // /* MW 4 */
+ 4609 "11101110" // /* MW 3 */
+ 4610 "01110000" // /* MW 2 */
+ 4611 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 100 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4613 "00011101" // /* MW 3 */
+ 4614 "01111111" // /* MW 2 */
+ 4615 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4617 "11100010" // /* MW 3 */
+ 4618 "01011000" // /* MW 2 */
+ 4619 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 98
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4621 "11000101" // /* MW 3 */
+ 4622 "11111001" // /* MW 2 */
+ 4623 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 151
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4625 "01100010" // /* MW 5 */
+ 4626 "00111100" // /* MW 4 */
+ 4627 "10011110" // /* MW 3 */
+ 4628 "11111101" // /* MW 2 */
+ 4629 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4631 "11000010" // /* MW 3 */
+ 4632 "01111001" // /* MW 2 */
+ 4633 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+ 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4635 "11001100" // /* MW 3 */
+ 4636 "01111111" // /* MW 2 */
+ 4637 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 117 first
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+ 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4639 "11010001" // /* MW 5 */
+ 4640 "11110111" // /* MW 4 */
+ 4641 "00111110" // /* MW 3 */
+ 4642 "01111110" // /* MW 2 */
+ 4643 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44 first
+.src_ref 2 "conv2d_bf16_params.h" 245 28 first
+ 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4645 "00110001" // /* MW 5 */
+ 4646 "10110010" // /* MW 4 */
+ 4647 "01010100" // /* MW 3 */
+ 4648 "01111001" // /* MW 2 */
+ 4649 "01011101" // /* MW 1 */
+ 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4651 "00000000" // /* MW 1 */
+ 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4653 "00000000" // /* MW 1 */
+ 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4655 "00000000" // /* MW 1 */
+ 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4657 "00000000" // /* MW 1 */
+ 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4659 "00000000" // /* MW 1 */
+ 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4661 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+ 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4663 "11111100" // /* MW 5 */
+ 4664 "10111110" // /* MW 4 */
+ 4665 "00011111" // /* MW 3 */
+ 4666 "10101101" // /* MW 2 */
+ 4667 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4669 "00000001" // /* MW 5 */
+ 4670 "01000000" // /* MW 4 */
+ 4671 "01000000" // /* MW 3 */
+ 4672 "00001001" // /* MW 2 */
+ 4673 "01100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.delay_slot
+ 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4675 "01001000" // /* MW 3 */
+ 4676 "10010011" // /* MW 2 */
+ 4677 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4679 "10010000" // /* MW 3 */
+ 4680 "11111110" // /* MW 2 */
+ 4681 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4683 "01100100" // /* MW 3 */
+ 4684 "01101101" // /* MW 2 */
+ 4685 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4687 "01111100" // /* MW 3 */
+ 4688 "11101111" // /* MW 2 */
+ 4689 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 132
+.delay_slot
+ 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4691 "01100100" // /* MW 3 */
+ 4692 "11100001" // /* MW 2 */
+ 4693 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4695 "00000001" // /* MW 5 */
+ 4696 "01000000" // /* MW 4 */
+ 4697 "01000000" // /* MW 3 */
+ 4698 "00001001" // /* MW 2 */
+ 4699 "11101000" // /* MW 1 */
+.delay_slot
+ 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4701 "00011101" // /* MW 3 */
+ 4702 "11101011" // /* MW 2 */
+ 4703 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4705 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4707 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4709 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4711 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+ 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */
+ 4713 "00100000" // /* MW 9 */
+ 4714 "00000000" // /* MW 8 */
+ 4715 "00000000" // /* MW 7 */
+ 4716 "01010110" // /* MW 6 */
+ 4717 "00000010" // /* MW 5 */
+ 4718 "00000000" // /* MW 4 */
+ 4719 "00000000" // /* MW 3 */
+ 4720 "00111011" // /* MW 2 */
+ 4721 "00000000" // /* MW 1 */
+.delay_slot
+ 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4723 "10011100" // /* MW 3 */
+ 4724 "00011001" // /* MW 2 */
+ 4725 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1849 12
+.delay_slot
+ 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4727 "00000101" // /* MW 3 */
+ 4728 "00100110" // /* MW 2 */
+ 4729 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4731 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4733 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4735 "00000000" // /* MW 1 */
+.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 2 "conv2d_bf16_params.h" 250 71 first
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4737 "01111000" // /* MW 11 */
+ 4738 "11001110" // /* MW 10 */
+ 4739 "00001100" // /* MW 9 */
+ 4740 "00111100" // /* MW 8 */
+ 4741 "10111111" // /* MW 7 */
+ 4742 "10101011" // /* MW 6 */
+ 4743 "00011101" // /* MW 5 */
+ 4744 "11101011" // /* MW 4 */
+ 4745 "00000111" // /* MW 3 */
+ 4746 "10010101" // /* MW 2 */
+ 4747 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4749 "01011101" // /* MW 3 */
+ 4750 "10101011" // /* MW 2 */
+ 4751 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+ 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4753 "10010010" // /* MW 3 */
+ 4754 "01101011" // /* MW 2 */
+ 4755 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4757 "11100111" // /* MW 3 */
+ 4758 "11110111" // /* MW 2 */
+ 4759 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4761 "01000001" // /* MW 5 */
+ 4762 "10110000" // /* MW 4 */
+ 4763 "01001101" // /* MW 3 */
+ 4764 "11110010" // /* MW 2 */
+ 4765 "10101100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4767 "00110010" // /* MW 3 */
+ 4768 "01100111" // /* MW 2 */
+ 4769 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87 first
+ 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4771 "01000100" // /* MW 3 */
+ 4772 "00101001" // /* MW 2 */
+ 4773 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4775 "11110000" // /* MW 3 */
+ 4776 "00110110" // /* MW 2 */
+ 4777 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 152 first
+ 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4779 "10001011" // /* MW 5 */
+ 4780 "11001111" // /* MW 4 */
+ 4781 "11111001" // /* MW 3 */
+ 4782 "00101100" // /* MW 2 */
+ 4783 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320
+.src_ref 2 "conv2d_bf16_params.h" 258 8 first
+ 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */
+ 4785 "01100000" // /* MW 11 */
+ 4786 "00000000" // /* MW 10 */
+ 4787 "00010000" // /* MW 9 */
+ 4788 "01011100" // /* MW 8 */
+ 4789 "00000010" // /* MW 7 */
+ 4790 "10111010" // /* MW 6 */
+ 4791 "01110001" // /* MW 5 */
+ 4792 "01101111" // /* MW 4 */
+ 4793 "10000010" // /* MW 3 */
+ 4794 "10010000" // /* MW 2 */
+ 4795 "00000001" // /* MW 1 */
+.delay_slot
+ 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4797 "01100111" // /* MW 3 */
+ 4798 "10001010" // /* MW 2 */
+ 4799 "00000010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4801 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4803 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4805 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4807 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+ 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4809 "11111110" // /* MW 5 */
+ 4810 "00111111" // /* MW 4 */
+ 4811 "11111010" // /* MW 3 */
+ 4812 "11111111" // /* MW 2 */
+ 4813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71 first
+ 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4815 "01000100" // /* MW 3 */
+ 4816 "10100101" // /* MW 2 */
+ 4817 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4819 "00011100" // /* MW 13 */
+ 4820 "00000000" // /* MW 12 */
+ 4821 "00000000" // /* MW 11 */
+ 4822 "01010111" // /* MW 10 */
+ 4823 "00011010" // /* MW 9 */
+ 4824 "01000000" // /* MW 8 */
+ 4825 "00000000" // /* MW 7 */
+ 4826 "00000000" // /* MW 6 */
+ 4827 "10100011" // /* MW 5 */
+ 4828 "11101100" // /* MW 4 */
+ 4829 "11110110" // /* MW 3 */
+ 4830 "00101100" // /* MW 2 */
+ 4831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368
+.src_ref 2 "conv2d_bf16.h" 1841 65 first
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16.h" 1849 12 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4833 "01011000" // /* MW 9 */
+ 4834 "11111101" // /* MW 8 */
+ 4835 "11001111" // /* MW 7 */
+ 4836 "10000010" // /* MW 6 */
+ 4837 "01000100" // /* MW 5 */
+ 4838 "00100111" // /* MW 4 */
+ 4839 "11010000" // /* MW 3 */
+ 4840 "11010010" // /* MW 2 */
+ 4841 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1841 34
+.src_ref 2 "conv2d_bf16.h" 1842 36
+.src_ref 2 "conv2d_bf16.h" 1842 67
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4843 "01011000" // /* MW 9 */
+ 4844 "00100100" // /* MW 8 */
+ 4845 "00000000" // /* MW 7 */
+ 4846 "11111010" // /* MW 6 */
+ 4847 "01011111" // /* MW 5 */
+ 4848 "00101001" // /* MW 4 */
+ 4849 "00000000" // /* MW 3 */
+ 4850 "01010010" // /* MW 2 */
+ 4851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 67 first
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4853 "01011000" // /* MW 11 */
+ 4854 "11001100" // /* MW 10 */
+ 4855 "00000111" // /* MW 9 */
+ 4856 "00100110" // /* MW 8 */
+ 4857 "01101011" // /* MW 7 */
+ 4858 "10101011" // /* MW 6 */
+ 4859 "00101101" // /* MW 5 */
+ 4860 "11010000" // /* MW 4 */
+ 4861 "11010111" // /* MW 3 */
+ 4862 "01011010" // /* MW 2 */
+ 4863 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1845 80
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4865 "01011000" // /* MW 11 */
+ 4866 "11000100" // /* MW 10 */
+ 4867 "00000000" // /* MW 9 */
+ 4868 "11101010" // /* MW 8 */
+ 4869 "00110111" // /* MW 7 */
+ 4870 "10111111" // /* MW 6 */
+ 4871 "11010101" // /* MW 5 */
+ 4872 "11011110" // /* MW 4 */
+ 4873 "11010111" // /* MW 3 */
+ 4874 "01011110" // /* MW 2 */
+ 4875 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 63 first
+ 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4877 "10110110" // /* MW 3 */
+ 4878 "11111111" // /* MW 2 */
+ 4879 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52 first
+ 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4881 "11110110" // /* MW 3 */
+ 4882 "10001011" // /* MW 2 */
+ 4883 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4885 "10110110" // /* MW 3 */
+ 4886 "00000110" // /* MW 2 */
+ 4887 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+ 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4889 "01011011" // /* MW 5 */
+ 4890 "00100110" // /* MW 4 */
+ 4891 "11011010" // /* MW 3 */
+ 4892 "11010010" // /* MW 2 */
+ 4893 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4895 "11010110" // /* MW 3 */
+ 4896 "00000111" // /* MW 2 */
+ 4897 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+ 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4899 "00101101" // /* MW 3 */
+ 4900 "10101101" // /* MW 2 */
+ 4901 "00010101" // /* MW 1 */
+ 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4903 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 80 first
+ 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4905 "00111110" // /* MW 3 */
+ 4906 "01100111" // /* MW 2 */
+ 4907 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21 first
+ 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4909 "00011000" // /* MW 3 */
+ 4910 "11100011" // /* MW 2 */
+ 4911 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 12
+ 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */
+ 4913 "00000001" // /* MW 5 */
+ 4914 "01000000" // /* MW 4 */
+ 4915 "11010000" // /* MW 3 */
+ 4916 "00001001" // /* MW 2 */
+ 4917 "10001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+.src_ref 2 "conv2d_bf16.h" 1842 75 first
+.delay_slot
+ 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4919 "10110010" // /* MW 5 */
+ 4920 "10110101" // /* MW 4 */
+ 4921 "10111010" // /* MW 3 */
+ 4922 "10100101" // /* MW 2 */
+ 4923 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4925 "10110010" // /* MW 5 */
+ 4926 "10010101" // /* MW 4 */
+ 4927 "10110000" // /* MW 3 */
+ 4928 "01100101" // /* MW 2 */
+ 4929 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+.delay_slot
+ 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4931 "10100000" // /* MW 7 */
+ 4932 "01101000" // /* MW 6 */
+ 4933 "11001010" // /* MW 5 */
+ 4934 "00000001" // /* MW 4 */
+ 4935 "10110000" // /* MW 3 */
+ 4936 "10000100" // /* MW 2 */
+ 4937 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4939 "10000000" // /* MW 3 */
+ 4940 "11010000" // /* MW 2 */
+ 4941 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4943 "11111001" // /* MW 3 */
+ 4944 "01101010" // /* MW 2 */
+ 4945 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4947 "11010000" // /* MW 5 */
+ 4948 "11001000" // /* MW 4 */
+ 4949 "11001110" // /* MW 3 */
+ 4950 "00000111" // /* MW 2 */
+ 4951 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18 first
+ 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4953 "10000000" // /* MW 5 */
+ 4954 "10110100" // /* MW 4 */
+ 4955 "01010000" // /* MW 3 */
+ 4956 "11000100" // /* MW 2 */
+ 4957 "11100000" // /* MW 1 */
+ 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4959 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4961 "00000000" // /* MW 5 */
+ 4962 "00100000" // /* MW 4 */
+ 4963 "00001010" // /* MW 3 */
+ 4964 "01111111" // /* MW 2 */
+ 4965 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4967 "10010001" // /* MW 3 */
+ 4968 "00000010" // /* MW 2 */
+ 4969 "00011000" // /* MW 1 */
+ 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4971 "11100000" // /* MW 3 */
+ 4972 "00010101" // /* MW 2 */
+ 4973 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4975 "01011111" // /* MW 3 */
+ 4976 "01101010" // /* MW 2 */
+ 4977 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4979 "00100101" // /* MW 5 */
+ 4980 "00000001" // /* MW 4 */
+ 4981 "11100000" // /* MW 3 */
+ 4982 "11000110" // /* MW 2 */
+ 4983 "11100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4985 "10000000" // /* MW 3 */
+ 4986 "01111010" // /* MW 2 */
+ 4987 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4989 "00010110" // /* MW 3 */
+ 4990 "01000000" // /* MW 2 */
+ 4991 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4993 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4995 "00000001" // /* MW 3 */
+ 4996 "01000001" // /* MW 2 */
+ 4997 "00011100" // /* MW 1 */
+ 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4999 "00000000" // /* MW 1 */
+ 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5001 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5003 "00110010" // /* MW 3 */
+ 5004 "00000110" // /* MW 2 */
+ 5005 "00000111" // /* MW 1 */
+ 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5007 "00000000" // /* MW 1 */
+ 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5009 "00000000" // /* MW 1 */
+ 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5011 "00000000" // /* MW 1 */
+ 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5013 "00000000" // /* MW 1 */
+ 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5015 "00000000" // /* MW 1 */
+ 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5017 "00000000" // /* MW 1 */
+ 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5019 "01101011" // /* MW 5 */
+ 5020 "10100100" // /* MW 4 */
+ 5021 "11111111" // /* MW 3 */
+ 5022 "00101100" // /* MW 2 */
+ 5023 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560
+.src_ref 2 "conv2d_bf16.h" 881 76
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5025 "00010000" // /* MW 11 */
+ 5026 "00110100" // /* MW 10 */
+ 5027 "10110010" // /* MW 9 */
+ 5028 "11110001" // /* MW 8 */
+ 5029 "00000001" // /* MW 7 */
+ 5030 "00000000" // /* MW 6 */
+ 5031 "00001011" // /* MW 5 */
+ 5032 "10001110" // /* MW 4 */
+ 5033 "10000001" // /* MW 3 */
+ 5034 "10010000" // /* MW 2 */
+ 5035 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.src_ref 2 "conv2d_bf16.h" 876 51 first
+.src_ref 2 "conv2d_bf16.h" 881 76 first
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5037 "01011000" // /* MW 11 */
+ 5038 "00001011" // /* MW 10 */
+ 5039 "01101000" // /* MW 9 */
+ 5040 "10010010" // /* MW 8 */
+ 5041 "00011001" // /* MW 7 */
+ 5042 "00110011" // /* MW 6 */
+ 5043 "10001011" // /* MW 5 */
+ 5044 "10000100" // /* MW 4 */
+ 5045 "01010000" // /* MW 3 */
+ 5046 "01000101" // /* MW 2 */
+ 5047 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5049 "01111000" // /* MW 9 */
+ 5050 "01100000" // /* MW 8 */
+ 5051 "10101010" // /* MW 7 */
+ 5052 "01100101" // /* MW 6 */
+ 5053 "10111001" // /* MW 5 */
+ 5054 "00111001" // /* MW 4 */
+ 5055 "00000000" // /* MW 3 */
+ 5056 "10010110" // /* MW 2 */
+ 5057 "01100001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 883 4 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5059 "01100111" // /* MW 3 */
+ 5060 "00000110" // /* MW 2 */
+ 5061 "00000011" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5063 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 884 4 first
+.aggressive_scheduled_block_id 4
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5065 "00000001" // /* MW 5 */
+ 5066 "00000000" // /* MW 4 */
+ 5067 "00110000" // /* MW 3 */
+ 5068 "00001000" // /* MW 2 */
+ 5069 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5071 "00101101" // /* MW 3 */
+ 5072 "01101011" // /* MW 2 */
+ 5073 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.delay_slot
+ 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5075 "11111001" // /* MW 3 */
+ 5076 "01101010" // /* MW 2 */
+ 5077 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45 first
+.delay_slot
+ 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5079 "00010001" // /* MW 3 */
+ 5080 "01100011" // /* MW 2 */
+ 5081 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.delay_slot
+ 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5083 "00110101" // /* MW 5 */
+ 5084 "00101100" // /* MW 4 */
+ 5085 "10111010" // /* MW 3 */
+ 5086 "01100101" // /* MW 2 */
+ 5087 "10001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.delay_slot
+ 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5089 "00000000" // /* MW 15 */
+ 5090 "00000000" // /* MW 14 */
+ 5091 "10101000" // /* MW 13 */
+ 5092 "11100010" // /* MW 12 */
+ 5093 "10001011" // /* MW 11 */
+ 5094 "00010001" // /* MW 10 */
+ 5095 "10011010" // /* MW 9 */
+ 5096 "00101100" // /* MW 8 */
+ 5097 "01011011" // /* MW 7 */
+ 5098 "00000001" // /* MW 6 */
+ 5099 "00100000" // /* MW 5 */
+ 5100 "00000000" // /* MW 4 */
+ 5101 "11110000" // /* MW 3 */
+ 5102 "00101100" // /* MW 2 */
+ 5103 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.return_address
+ 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5105 "10011001" // /* MW 3 */
+ 5106 "11010100" // /* MW 2 */
+ 5107 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4 first
+.no_stack_arguments
+ 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5109 "00000001" // /* MW 5 */
+ 5110 "00000000" // /* MW 4 */
+ 5111 "00110000" // /* MW 3 */
+ 5112 "00001000" // /* MW 2 */
+ 5113 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5115 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5117 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.delay_slot
+ 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5119 "10010000" // /* MW 3 */
+ 5120 "01010110" // /* MW 2 */
+ 5121 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5123 "10100000" // /* MW 3 */
+ 5124 "01100110" // /* MW 2 */
+ 5125 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5127 "00000000" // /* MW 9 */
+ 5128 "00000000" // /* MW 8 */
+ 5129 "00000000" // /* MW 7 */
+ 5130 "00000000" // /* MW 6 */
+ 5131 "00001011" // /* MW 5 */
+ 5132 "10001111" // /* MW 4 */
+ 5133 "11110000" // /* MW 3 */
+ 5134 "00101100" // /* MW 2 */
+ 5135 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.return_address
+ 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5137 "00001000" // /* MW 9 */
+ 5138 "01100011" // /* MW 8 */
+ 5139 "00110011" // /* MW 7 */
+ 5140 "11101010" // /* MW 6 */
+ 5141 "00110111" // /* MW 5 */
+ 5142 "00000001" // /* MW 4 */
+ 5143 "10000000" // /* MW 3 */
+ 5144 "10011010" // /* MW 2 */
+ 5145 "11010110" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 886 4
+.src_ref 2 "conv2d_bf16.h" 896 23 first
+.src_ref 2 "conv2d_bf16.h" 1123 71
+ 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5147 "01100010" // /* MW 5 */
+ 5148 "00110100" // /* MW 4 */
+ 5149 "11010000" // /* MW 3 */
+ 5150 "10000100" // /* MW 2 */
+ 5151 "10000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5153 "01000110" // /* MW 3 */
+ 5154 "00011100" // /* MW 2 */
+ 5155 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5157 "00100110" // /* MW 3 */
+ 5158 "00011110" // /* MW 2 */
+ 5159 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5161 "01000110" // /* MW 3 */
+ 5162 "00011110" // /* MW 2 */
+ 5163 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5165 "00000110" // /* MW 3 */
+ 5166 "00011100" // /* MW 2 */
+ 5167 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5169 "01100110" // /* MW 3 */
+ 5170 "00011100" // /* MW 2 */
+ 5171 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5173 "01100110" // /* MW 3 */
+ 5174 "00011110" // /* MW 2 */
+ 5175 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23 first
+ 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5177 "11010110" // /* MW 3 */
+ 5178 "00011110" // /* MW 2 */
+ 5179 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5181 "00110110" // /* MW 3 */
+ 5182 "00011110" // /* MW 2 */
+ 5183 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5185 "10010110" // /* MW 3 */
+ 5186 "00011111" // /* MW 2 */
+ 5187 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5189 "10110110" // /* MW 3 */
+ 5190 "00011110" // /* MW 2 */
+ 5191 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5193 "11110110" // /* MW 3 */
+ 5194 "00011110" // /* MW 2 */
+ 5195 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5197 "10011110" // /* MW 3 */
+ 5198 "00011101" // /* MW 2 */
+ 5199 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5201 "00100110" // /* MW 3 */
+ 5202 "00011101" // /* MW 2 */
+ 5203 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5205 "10100110" // /* MW 3 */
+ 5206 "00011100" // /* MW 2 */
+ 5207 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5209 "11000110" // /* MW 3 */
+ 5210 "00011100" // /* MW 2 */
+ 5211 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5213 "10100110" // /* MW 3 */
+ 5214 "00011110" // /* MW 2 */
+ 5215 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5217 "11010110" // /* MW 3 */
+ 5218 "00011111" // /* MW 2 */
+ 5219 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5221 "10110110" // /* MW 3 */
+ 5222 "00011111" // /* MW 2 */
+ 5223 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5225 "11100110" // /* MW 3 */
+ 5226 "00011100" // /* MW 2 */
+ 5227 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5229 "01001010" // /* MW 3 */
+ 5230 "11000010" // /* MW 2 */
+ 5231 "00000100" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25
+ 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5233 "10010001" // /* MW 3 */
+ 5234 "11010010" // /* MW 2 */
+ 5235 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5237 "01010110" // /* MW 3 */
+ 5238 "00000100" // /* MW 2 */
+ 5239 "00000100" // /* MW 1 */
+ 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5241 "00000000" // /* MW 1 */
+ 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5243 "00000000" // /* MW 1 */
+ 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5245 "00000000" // /* MW 1 */
+ 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5247 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5249 "00101100" // /* MW 3 */
+ 5250 "11100111" // /* MW 2 */
+ 5251 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 12
+ 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */
+ 5253 "00000001" // /* MW 5 */
+ 5254 "01000000" // /* MW 4 */
+ 5255 "00010000" // /* MW 3 */
+ 5256 "00001100" // /* MW 2 */
+ 5257 "10011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4
+.delay_slot
+ 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5259 "11010000" // /* MW 5 */
+ 5260 "11001000" // /* MW 4 */
+ 5261 "11000100" // /* MW 3 */
+ 5262 "00000111" // /* MW 2 */
+ 5263 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4 first
+.delay_slot
+ 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5265 "10100111" // /* MW 3 */
+ 5266 "00000101" // /* MW 2 */
+ 5267 "00000010" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5269 "01110010" // /* MW 3 */
+ 5270 "11010001" // /* MW 2 */
+ 5271 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5273 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5275 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 71 first
+ 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5277 "01011000" // /* MW 9 */
+ 5278 "10000100" // /* MW 8 */
+ 5279 "10000000" // /* MW 7 */
+ 5280 "00111111" // /* MW 6 */
+ 5281 "10111001" // /* MW 5 */
+ 5282 "00011011" // /* MW 4 */
+ 5283 "00100000" // /* MW 3 */
+ 5284 "01000011" // /* MW 2 */
+ 5285 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+.src_ref 2 "conv2d_bf16.h" 1154 80
+ 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5287 "01011000" // /* MW 9 */
+ 5288 "00111100" // /* MW 8 */
+ 5289 "00000000" // /* MW 7 */
+ 5290 "00001010" // /* MW 6 */
+ 5291 "00100000" // /* MW 5 */
+ 5292 "00111101" // /* MW 4 */
+ 5293 "00000000" // /* MW 3 */
+ 5294 "00010011" // /* MW 2 */
+ 5295 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5297 "01111000" // /* MW 9 */
+ 5298 "11010000" // /* MW 8 */
+ 5299 "11100100" // /* MW 7 */
+ 5300 "00001011" // /* MW 6 */
+ 5301 "10100000" // /* MW 5 */
+ 5302 "00000001" // /* MW 4 */
+ 5303 "10000000" // /* MW 3 */
+ 5304 "00010100" // /* MW 2 */
+ 5305 "11111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 746 83
+ 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5307 "01111000" // /* MW 11 */
+ 5308 "11000000" // /* MW 10 */
+ 5309 "10100111" // /* MW 9 */
+ 5310 "00000001" // /* MW 8 */
+ 5311 "11010100" // /* MW 7 */
+ 5312 "00011011" // /* MW 6 */
+ 5313 "01001011" // /* MW 5 */
+ 5314 "00011100" // /* MW 4 */
+ 5315 "10000010" // /* MW 3 */
+ 5316 "10011000" // /* MW 2 */
+ 5317 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.src_ref 2 "conv2d_bf16.h" 1199 26
+.src_ref 2 "conv2d_bf16.h" 1200 26
+.src_ref 2 "conv2d_bf16.h" 1201 26
+.src_ref 2 "conv2d_bf16.h" 1202 26
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5319 "01011000" // /* MW 11 */
+ 5320 "00000111" // /* MW 10 */
+ 5321 "11101000" // /* MW 9 */
+ 5322 "10001001" // /* MW 8 */
+ 5323 "11110111" // /* MW 7 */
+ 5324 "00000001" // /* MW 6 */
+ 5325 "01001011" // /* MW 5 */
+ 5326 "00011100" // /* MW 4 */
+ 5327 "00100110" // /* MW 3 */
+ 5328 "10010110" // /* MW 2 */
+ 5329 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 12
+.src_ref 2 "conv2d_bf16.h" 1218 20
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5331 "00010000" // /* MW 9 */
+ 5332 "10100000" // /* MW 8 */
+ 5333 "00110010" // /* MW 7 */
+ 5334 "00000101" // /* MW 6 */
+ 5335 "00000000" // /* MW 5 */
+ 5336 "00000000" // /* MW 4 */
+ 5337 "00100000" // /* MW 3 */
+ 5338 "11001010" // /* MW 2 */
+ 5339 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 749 26
+.src_ref 2 "conv2d_bf16.h" 750 26
+.src_ref 2 "conv2d_bf16.h" 751 26
+.src_ref 2 "conv2d_bf16.h" 752 26
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5341 "01011000" // /* MW 9 */
+ 5342 "00001100" // /* MW 8 */
+ 5343 "10001011" // /* MW 7 */
+ 5344 "00010010" // /* MW 6 */
+ 5345 "01101001" // /* MW 5 */
+ 5346 "00110100" // /* MW 4 */
+ 5347 "00100000" // /* MW 3 */
+ 5348 "00110110" // /* MW 2 */
+ 5349 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1873
+ 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5351 "01011000" // /* MW 11 */
+ 5352 "00000000" // /* MW 10 */
+ 5353 "00001000" // /* MW 9 */
+ 5354 "00001011" // /* MW 8 */
+ 5355 "10010000" // /* MW 7 */
+ 5356 "00000001" // /* MW 6 */
+ 5357 "00100000" // /* MW 5 */
+ 5358 "11010111" // /* MW 4 */
+ 5359 "00101001" // /* MW 3 */
+ 5360 "10000111" // /* MW 2 */
+ 5361 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5363 "00010110" // /* MW 3 */
+ 5364 "10001000" // /* MW 2 */
+ 5365 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5367 "00100110" // /* MW 3 */
+ 5368 "10101011" // /* MW 2 */
+ 5369 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5371 "01110110" // /* MW 3 */
+ 5372 "00101111" // /* MW 2 */
+ 5373 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 80 first
+ 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5375 "10000110" // /* MW 3 */
+ 5376 "00011110" // /* MW 2 */
+ 5377 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 80 first
+ 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5379 "11000110" // /* MW 3 */
+ 5380 "10001010" // /* MW 2 */
+ 5381 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 87 first
+ 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5383 "00000110" // /* MW 3 */
+ 5384 "10011110" // /* MW 2 */
+ 5385 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 83 first
+ 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5387 "00110110" // /* MW 3 */
+ 5388 "00011100" // /* MW 2 */
+ 5389 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 83 first
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5391 "00000010" // /* MW 5 */
+ 5392 "00000110" // /* MW 4 */
+ 5393 "11011101" // /* MW 3 */
+ 5394 "00000010" // /* MW 2 */
+ 5395 "10011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 66 first
+ 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5397 "01110110" // /* MW 3 */
+ 5398 "00010100" // /* MW 2 */
+ 5399 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1206 63 first
+ 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5401 "10010110" // /* MW 3 */
+ 5402 "00000100" // /* MW 2 */
+ 5403 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89
+ 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5405 "00000000" // /* MW 3 */
+ 5406 "11011010" // /* MW 2 */
+ 5407 "00011001" // /* MW 1 */
+ 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5409 "10011001" // /* MW 3 */
+ 5410 "10000011" // /* MW 2 */
+ 5411 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89
+ 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5413 "00000000" // /* MW 3 */
+ 5414 "00011011" // /* MW 2 */
+ 5415 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5417 "10011001" // /* MW 3 */
+ 5418 "00001101" // /* MW 2 */
+ 5419 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89 first
+ 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5421 "11100000" // /* MW 3 */
+ 5422 "00000011" // /* MW 2 */
+ 5423 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89 first
+ 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5425 "11000000" // /* MW 5 */
+ 5426 "00010000" // /* MW 4 */
+ 5427 "11101110" // /* MW 3 */
+ 5428 "11111111" // /* MW 2 */
+ 5429 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5431 "01111110" // /* MW 9 */
+ 5432 "10000000" // /* MW 8 */
+ 5433 "10000010" // /* MW 7 */
+ 5434 "00000000" // /* MW 6 */
+ 5435 "00010000" // /* MW 5 */
+ 5436 "00000000" // /* MW 4 */
+ 5437 "11110000" // /* MW 3 */
+ 5438 "00101100" // /* MW 2 */
+ 5439 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1147 31 first
+.src_ref 2 "conv2d_bf16.h" 1187 40 first
+.loop_nesting 1
+ 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5441 "01111000" // /* MW 11 */
+ 5442 "10010000" // /* MW 10 */
+ 5443 "00110011" // /* MW 9 */
+ 5444 "11101100" // /* MW 8 */
+ 5445 "11100111" // /* MW 7 */
+ 5446 "00000100" // /* MW 6 */
+ 5447 "00001011" // /* MW 5 */
+ 5448 "10000101" // /* MW 4 */
+ 5449 "01110001" // /* MW 3 */
+ 5450 "10000101" // /* MW 2 */
+ 5451 "11000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1188 50 first
+ 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5453 "10100000" // /* MW 11 */
+ 5454 "10011000" // /* MW 10 */
+ 5455 "00110011" // /* MW 9 */
+ 5456 "00000010" // /* MW 8 */
+ 5457 "01001011" // /* MW 7 */
+ 5458 "00001110" // /* MW 6 */
+ 5459 "00101011" // /* MW 5 */
+ 5460 "00101000" // /* MW 4 */
+ 5461 "01111000" // /* MW 3 */
+ 5462 "10000001" // /* MW 2 */
+ 5463 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+ 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5465 "01110000" // /* MW 11 */
+ 5466 "10000000" // /* MW 10 */
+ 5467 "11000110" // /* MW 9 */
+ 5468 "00000011" // /* MW 8 */
+ 5469 "01001011" // /* MW 7 */
+ 5470 "01011010" // /* MW 6 */
+ 5471 "00101111" // /* MW 5 */
+ 5472 "00101000" // /* MW 4 */
+ 5473 "01111000" // /* MW 3 */
+ 5474 "00111001" // /* MW 2 */
+ 5475 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1149 31 first
+ 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5477 "01110000" // /* MW 11 */
+ 5478 "00000000" // /* MW 10 */
+ 5479 "10000010" // /* MW 9 */
+ 5480 "00000001" // /* MW 8 */
+ 5481 "00001011" // /* MW 7 */
+ 5482 "01010011" // /* MW 6 */
+ 5483 "00101011" // /* MW 5 */
+ 5484 "00000011" // /* MW 4 */
+ 5485 "01110100" // /* MW 3 */
+ 5486 "00001101" // /* MW 2 */
+ 5487 "11011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+ 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5489 "01011110" // /* MW 9 */
+ 5490 "00000000" // /* MW 8 */
+ 5491 "11000000" // /* MW 7 */
+ 5492 "00000001" // /* MW 6 */
+ 5493 "11010100" // /* MW 5 */
+ 5494 "00010010" // /* MW 4 */
+ 5495 "01110100" // /* MW 3 */
+ 5496 "01000001" // /* MW 2 */
+ 5497 "01110001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1152 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+ 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5499 "00010000" // /* MW 11 */
+ 5500 "01000000" // /* MW 10 */
+ 5501 "10111011" // /* MW 9 */
+ 5502 "00000101" // /* MW 8 */
+ 5503 "00000000" // /* MW 7 */
+ 5504 "00000000" // /* MW 6 */
+ 5505 "00101000" // /* MW 5 */
+ 5506 "00101000" // /* MW 4 */
+ 5507 "01111000" // /* MW 3 */
+ 5508 "10010101" // /* MW 2 */
+ 5509 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 1154 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8
+ 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5511 "00010000" // /* MW 11 */
+ 5512 "00101000" // /* MW 10 */
+ 5513 "01111011" // /* MW 9 */
+ 5514 "00000100" // /* MW 8 */
+ 5515 "00000000" // /* MW 7 */
+ 5516 "00000000" // /* MW 6 */
+ 5517 "00101000" // /* MW 5 */
+ 5518 "00101000" // /* MW 4 */
+ 5519 "01111000" // /* MW 3 */
+ 5520 "00011101" // /* MW 2 */
+ 5521 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+ 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5523 "00101000" // /* MW 5 */
+ 5524 "00000001" // /* MW 4 */
+ 5525 "01110100" // /* MW 3 */
+ 5526 "10110101" // /* MW 2 */
+ 5527 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1157 31 first
+ 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5529 "00101000" // /* MW 5 */
+ 5530 "00100010" // /* MW 4 */
+ 5531 "01111000" // /* MW 3 */
+ 5532 "10100101" // /* MW 2 */
+ 5533 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1159 31 first
+ 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5535 "00101000" // /* MW 5 */
+ 5536 "00101000" // /* MW 4 */
+ 5537 "01111000" // /* MW 3 */
+ 5538 "00101101" // /* MW 2 */
+ 5539 "11011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5541 "00101000" // /* MW 5 */
+ 5542 "00101000" // /* MW 4 */
+ 5543 "01111000" // /* MW 3 */
+ 5544 "10000001" // /* MW 2 */
+ 5545 "00100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1192 29 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5547 "00101000" // /* MW 5 */
+ 5548 "00000001" // /* MW 4 */
+ 5549 "01110100" // /* MW 3 */
+ 5550 "10111101" // /* MW 2 */
+ 5551 "10000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5553 "11101110" // /* MW 9 */
+ 5554 "11000011" // /* MW 8 */
+ 5555 "10011010" // /* MW 7 */
+ 5556 "00000010" // /* MW 6 */
+ 5557 "00010100" // /* MW 5 */
+ 5558 "00010001" // /* MW 4 */
+ 5559 "01110100" // /* MW 3 */
+ 5560 "11001101" // /* MW 2 */
+ 5561 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1162 81
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5563 "11100000" // /* MW 11 */
+ 5564 "11000001" // /* MW 10 */
+ 5565 "10011010" // /* MW 9 */
+ 5566 "00000001" // /* MW 8 */
+ 5567 "10001011" // /* MW 7 */
+ 5568 "10011000" // /* MW 6 */
+ 5569 "00101100" // /* MW 5 */
+ 5570 "00101000" // /* MW 4 */
+ 5571 "01111000" // /* MW 3 */
+ 5572 "11000101" // /* MW 2 */
+ 5573 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5575 "11101001" // /* MW 9 */
+ 5576 "00010100" // /* MW 8 */
+ 5577 "01001000" // /* MW 7 */
+ 5578 "00011101" // /* MW 6 */
+ 5579 "01010100" // /* MW 5 */
+ 5580 "00000000" // /* MW 4 */
+ 5581 "01110011" // /* MW 3 */
+ 5582 "10000001" // /* MW 2 */
+ 5583 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5585 "11101001" // /* MW 13 */
+ 5586 "00101100" // /* MW 12 */
+ 5587 "01001001" // /* MW 11 */
+ 5588 "00000111" // /* MW 10 */
+ 5589 "01011000" // /* MW 9 */
+ 5590 "01011100" // /* MW 8 */
+ 5591 "00000000" // /* MW 7 */
+ 5592 "00000000" // /* MW 6 */
+ 5593 "10010110" // /* MW 5 */
+ 5594 "10010100" // /* MW 4 */
+ 5595 "01110110" // /* MW 3 */
+ 5596 "00110101" // /* MW 2 */
+ 5597 "11001111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1162 81 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5599 "00001001" // /* MW 13 */
+ 5600 "01010101" // /* MW 12 */
+ 5601 "01001010" // /* MW 11 */
+ 5602 "00111110" // /* MW 10 */
+ 5603 "10010000" // /* MW 9 */
+ 5604 "01001100" // /* MW 8 */
+ 5605 "00000000" // /* MW 7 */
+ 5606 "00000000" // /* MW 6 */
+ 5607 "10010110" // /* MW 5 */
+ 5608 "00111000" // /* MW 4 */
+ 5609 "01111010" // /* MW 3 */
+ 5610 "10111101" // /* MW 2 */
+ 5611 "10000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1199 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5613 "00111101" // /* MW 13 */
+ 5614 "01100000" // /* MW 12 */
+ 5615 "11111000" // /* MW 11 */
+ 5616 "00011110" // /* MW 10 */
+ 5617 "10010000" // /* MW 9 */
+ 5618 "01010100" // /* MW 8 */
+ 5619 "00000000" // /* MW 7 */
+ 5620 "00000000" // /* MW 6 */
+ 5621 "10010110" // /* MW 5 */
+ 5622 "10011000" // /* MW 4 */
+ 5623 "01110100" // /* MW 3 */
+ 5624 "00000001" // /* MW 2 */
+ 5625 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1200 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5627 "00111101" // /* MW 7 */
+ 5628 "01100100" // /* MW 6 */
+ 5629 "11111001" // /* MW 5 */
+ 5630 "00000100" // /* MW 4 */
+ 5631 "01110000" // /* MW 3 */
+ 5632 "10000001" // /* MW 2 */
+ 5633 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1201 26 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5635 "00111101" // /* MW 7 */
+ 5636 "10001000" // /* MW 6 */
+ 5637 "11111010" // /* MW 5 */
+ 5638 "00000100" // /* MW 4 */
+ 5639 "01110000" // /* MW 3 */
+ 5640 "00001001" // /* MW 2 */
+ 5641 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5643 "00001001" // /* MW 7 */
+ 5644 "01101101" // /* MW 6 */
+ 5645 "01001011" // /* MW 5 */
+ 5646 "00000100" // /* MW 4 */
+ 5647 "01110000" // /* MW 3 */
+ 5648 "00000001" // /* MW 2 */
+ 5649 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5651 "00101000" // /* MW 5 */
+ 5652 "00000001" // /* MW 4 */
+ 5653 "01110100" // /* MW 3 */
+ 5654 "10000001" // /* MW 2 */
+ 5655 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5657 "00010100" // /* MW 3 */
+ 5658 "00010001" // /* MW 2 */
+ 5659 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1202 26 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5661 "00111101" // /* MW 11 */
+ 5662 "10001100" // /* MW 10 */
+ 5663 "11111011" // /* MW 9 */
+ 5664 "10000010" // /* MW 8 */
+ 5665 "01111101" // /* MW 7 */
+ 5666 "01110010" // /* MW 6 */
+ 5667 "00101101" // /* MW 5 */
+ 5668 "00101000" // /* MW 4 */
+ 5669 "01111000" // /* MW 3 */
+ 5670 "00001001" // /* MW 2 */
+ 5671 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5673 "00101001" // /* MW 9 */
+ 5674 "00000110" // /* MW 8 */
+ 5675 "10100000" // /* MW 7 */
+ 5676 "00011101" // /* MW 6 */
+ 5677 "00010100" // /* MW 5 */
+ 5678 "00010100" // /* MW 4 */
+ 5679 "01110100" // /* MW 3 */
+ 5680 "00000001" // /* MW 2 */
+ 5681 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5683 "00001001" // /* MW 13 */
+ 5684 "01000110" // /* MW 12 */
+ 5685 "10100010" // /* MW 11 */
+ 5686 "00001111" // /* MW 10 */
+ 5687 "10101010" // /* MW 9 */
+ 5688 "01011000" // /* MW 8 */
+ 5689 "00000000" // /* MW 7 */
+ 5690 "00000000" // /* MW 6 */
+ 5691 "00101000" // /* MW 5 */
+ 5692 "00000001" // /* MW 4 */
+ 5693 "01110100" // /* MW 3 */
+ 5694 "10000001" // /* MW 2 */
+ 5695 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5697 "01010001" // /* MW 15 */
+ 5698 "00001001" // /* MW 14 */
+ 5699 "11101101" // /* MW 13 */
+ 5700 "00000011" // /* MW 12 */
+ 5701 "11001001" // /* MW 11 */
+ 5702 "00000000" // /* MW 10 */
+ 5703 "00000000" // /* MW 9 */
+ 5704 "00000000" // /* MW 8 */
+ 5705 "01011011" // /* MW 7 */
+ 5706 "00000001" // /* MW 6 */
+ 5707 "00101000" // /* MW 5 */
+ 5708 "00100010" // /* MW 4 */
+ 5709 "11111000" // /* MW 3 */
+ 5710 "00101100" // /* MW 2 */
+ 5711 "00000000" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.begin_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5713 "01010000" // /* MW 15 */
+ 5714 "00011011" // /* MW 14 */
+ 5715 "11101101" // /* MW 13 */
+ 5716 "00000001" // /* MW 12 */
+ 5717 "01001001" // /* MW 11 */
+ 5718 "00000001" // /* MW 10 */
+ 5719 "00000000" // /* MW 9 */
+ 5720 "00000000" // /* MW 8 */
+ 5721 "01011011" // /* MW 7 */
+ 5722 "00000001" // /* MW 6 */
+ 5723 "00101000" // /* MW 5 */
+ 5724 "00101000" // /* MW 4 */
+ 5725 "01111000" // /* MW 3 */
+ 5726 "00001001" // /* MW 2 */
+ 5727 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5729 "00110001" // /* MW 15 */
+ 5730 "00000000" // /* MW 14 */
+ 5731 "01111101" // /* MW 13 */
+ 5732 "10100101" // /* MW 12 */
+ 5733 "00000001" // /* MW 11 */
+ 5734 "00000000" // /* MW 10 */
+ 5735 "00000000" // /* MW 9 */
+ 5736 "00000000" // /* MW 8 */
+ 5737 "01011011" // /* MW 7 */
+ 5738 "00000001" // /* MW 6 */
+ 5739 "00101000" // /* MW 5 */
+ 5740 "00101000" // /* MW 4 */
+ 5741 "01111000" // /* MW 3 */
+ 5742 "00000001" // /* MW 2 */
+ 5743 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5745 "00110000" // /* MW 15 */
+ 5746 "00010010" // /* MW 14 */
+ 5747 "01111101" // /* MW 13 */
+ 5748 "10100101" // /* MW 12 */
+ 5749 "00000001" // /* MW 11 */
+ 5750 "00000000" // /* MW 10 */
+ 5751 "00000000" // /* MW 9 */
+ 5752 "00000000" // /* MW 8 */
+ 5753 "01011011" // /* MW 7 */
+ 5754 "00000001" // /* MW 6 */
+ 5755 "00101000" // /* MW 5 */
+ 5756 "00000001" // /* MW 4 */
+ 5757 "01110100" // /* MW 3 */
+ 5758 "10000001" // /* MW 2 */
+ 5759 "00100010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.end_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5761 "01010001" // /* MW 15 */
+ 5762 "00001001" // /* MW 14 */
+ 5763 "11101101" // /* MW 13 */
+ 5764 "00000011" // /* MW 12 */
+ 5765 "11001001" // /* MW 11 */
+ 5766 "00000000" // /* MW 10 */
+ 5767 "00000000" // /* MW 9 */
+ 5768 "00000000" // /* MW 8 */
+ 5769 "01011011" // /* MW 7 */
+ 5770 "00000001" // /* MW 6 */
+ 5771 "00101000" // /* MW 5 */
+ 5772 "00100010" // /* MW 4 */
+ 5773 "11111000" // /* MW 3 */
+ 5774 "00101100" // /* MW 2 */
+ 5775 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5777 "00001001" // /* MW 13 */
+ 5778 "01101010" // /* MW 12 */
+ 5779 "10100011" // /* MW 11 */
+ 5780 "00011110" // /* MW 10 */
+ 5781 "10010000" // /* MW 9 */
+ 5782 "01010100" // /* MW 8 */
+ 5783 "00000000" // /* MW 7 */
+ 5784 "00000000" // /* MW 6 */
+ 5785 "10010110" // /* MW 5 */
+ 5786 "10111100" // /* MW 4 */
+ 5787 "01111100" // /* MW 3 */
+ 5788 "00001001" // /* MW 2 */
+ 5789 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5791 "00101001" // /* MW 13 */
+ 5792 "00000110" // /* MW 12 */
+ 5793 "10100000" // /* MW 11 */
+ 5794 "00000111" // /* MW 10 */
+ 5795 "00111000" // /* MW 9 */
+ 5796 "01111100" // /* MW 8 */
+ 5797 "00000000" // /* MW 7 */
+ 5798 "00000000" // /* MW 6 */
+ 5799 "10010110" // /* MW 5 */
+ 5800 "00011100" // /* MW 4 */
+ 5801 "01111110" // /* MW 3 */
+ 5802 "00000001" // /* MW 2 */
+ 5803 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5805 "00001001" // /* MW 9 */
+ 5806 "01000110" // /* MW 8 */
+ 5807 "10100010" // /* MW 7 */
+ 5808 "11100100" // /* MW 6 */
+ 5809 "00000000" // /* MW 5 */
+ 5810 "01010101" // /* MW 4 */
+ 5811 "01100001" // /* MW 3 */
+ 5812 "10010001" // /* MW 2 */
+ 5813 "01100001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5815 "00101001" // /* MW 9 */
+ 5816 "00101010" // /* MW 8 */
+ 5817 "10100001" // /* MW 7 */
+ 5818 "11000100" // /* MW 6 */
+ 5819 "00000111" // /* MW 5 */
+ 5820 "10010010" // /* MW 4 */
+ 5821 "01100001" // /* MW 3 */
+ 5822 "11000001" // /* MW 2 */
+ 5823 "01101010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5825 "00001001" // /* MW 9 */
+ 5826 "01101010" // /* MW 8 */
+ 5827 "10100011" // /* MW 7 */
+ 5828 "11000100" // /* MW 6 */
+ 5829 "00000011" // /* MW 5 */
+ 5830 "10010010" // /* MW 4 */
+ 5831 "01100010" // /* MW 3 */
+ 5832 "10000001" // /* MW 2 */
+ 5833 "11101011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1285 32 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5835 "00101001" // /* MW 11 */
+ 5836 "00000110" // /* MW 10 */
+ 5837 "10100000" // /* MW 9 */
+ 5838 "11100110" // /* MW 8 */
+ 5839 "00000000" // /* MW 7 */
+ 5840 "10001111" // /* MW 6 */
+ 5841 "00100010" // /* MW 5 */
+ 5842 "01010111" // /* MW 4 */
+ 5843 "01101111" // /* MW 3 */
+ 5844 "10010001" // /* MW 2 */
+ 5845 "10110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5847 "00001001" // /* MW 9 */
+ 5848 "01000110" // /* MW 8 */
+ 5849 "10100010" // /* MW 7 */
+ 5850 "11100100" // /* MW 6 */
+ 5851 "00000000" // /* MW 5 */
+ 5852 "00000110" // /* MW 4 */
+ 5853 "01100010" // /* MW 3 */
+ 5854 "10010001" // /* MW 2 */
+ 5855 "10010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5857 "00101001" // /* MW 7 */
+ 5858 "00101010" // /* MW 6 */
+ 5859 "10100001" // /* MW 5 */
+ 5860 "11000110" // /* MW 4 */
+ 5861 "00000011" // /* MW 3 */
+ 5862 "10010010" // /* MW 2 */
+ 5863 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5865 "00001001" // /* MW 7 */
+ 5866 "01101010" // /* MW 6 */
+ 5867 "10100011" // /* MW 5 */
+ 5868 "11000110" // /* MW 4 */
+ 5869 "00000111" // /* MW 3 */
+ 5870 "10010010" // /* MW 2 */
+ 5871 "00000001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+ 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5873 "00000000" // /* MW 3 */
+ 5874 "10001011" // /* MW 2 */
+ 5875 "00011111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+ 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5877 "00101001" // /* MW 7 */
+ 5878 "00101010" // /* MW 6 */
+ 5879 "10100001" // /* MW 5 */
+ 5880 "11100110" // /* MW 4 */
+ 5881 "10100000" // /* MW 3 */
+ 5882 "00001011" // /* MW 2 */
+ 5883 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+ 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5885 "00101001" // /* MW 7 */
+ 5886 "00000110" // /* MW 6 */
+ 5887 "10100000" // /* MW 5 */
+ 5888 "11100110" // /* MW 4 */
+ 5889 "10100000" // /* MW 3 */
+ 5890 "10001000" // /* MW 2 */
+ 5891 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+ 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5893 "00001001" // /* MW 9 */
+ 5894 "01101010" // /* MW 8 */
+ 5895 "10100011" // /* MW 7 */
+ 5896 "11100110" // /* MW 6 */
+ 5897 "00000000" // /* MW 5 */
+ 5898 "00000101" // /* MW 4 */
+ 5899 "00100011" // /* MW 3 */
+ 5900 "11110111" // /* MW 2 */
+ 5901 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32 first
+ 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5903 "00001001" // /* MW 11 */
+ 5904 "01000110" // /* MW 10 */
+ 5905 "10100010" // /* MW 9 */
+ 5906 "11100110" // /* MW 8 */
+ 5907 "10100000" // /* MW 7 */
+ 5908 "10000010" // /* MW 6 */
+ 5909 "00100101" // /* MW 5 */
+ 5910 "11010111" // /* MW 4 */
+ 5911 "01101110" // /* MW 3 */
+ 5912 "10001001" // /* MW 2 */
+ 5913 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+ 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5915 "01110000" // /* MW 7 */
+ 5916 "10000000" // /* MW 6 */
+ 5917 "11000101" // /* MW 5 */
+ 5918 "00000011" // /* MW 4 */
+ 5919 "01100000" // /* MW 3 */
+ 5920 "10001001" // /* MW 2 */
+ 5921 "01100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5923 "01110000" // /* MW 7 */
+ 5924 "00000000" // /* MW 6 */
+ 5925 "10000001" // /* MW 5 */
+ 5926 "00000001" // /* MW 4 */
+ 5927 "01100000" // /* MW 3 */
+ 5928 "01000001" // /* MW 2 */
+ 5929 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5931 "01110000" // /* MW 7 */
+ 5932 "01010000" // /* MW 6 */
+ 5933 "10000111" // /* MW 5 */
+ 5934 "00000000" // /* MW 4 */
+ 5935 "11000000" // /* MW 3 */
+ 5936 "00010010" // /* MW 2 */
+ 5937 "10110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5939 "01110000" // /* MW 7 */
+ 5940 "10010000" // /* MW 6 */
+ 5941 "11000111" // /* MW 5 */
+ 5942 "00000010" // /* MW 4 */
+ 5943 "11000000" // /* MW 3 */
+ 5944 "00000010" // /* MW 2 */
+ 5945 "10100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5947 "01110110" // /* MW 9 */
+ 5948 "01100000" // /* MW 8 */
+ 5949 "11001000" // /* MW 7 */
+ 5950 "00000001" // /* MW 6 */
+ 5951 "10010000" // /* MW 5 */
+ 5952 "00111011" // /* MW 4 */
+ 5953 "01100001" // /* MW 3 */
+ 5954 "10010001" // /* MW 2 */
+ 5955 "00010011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5957 "01110000" // /* MW 7 */
+ 5958 "00000000" // /* MW 6 */
+ 5959 "10000011" // /* MW 5 */
+ 5960 "00000000" // /* MW 4 */
+ 5961 "11000000" // /* MW 3 */
+ 5962 "00001010" // /* MW 2 */
+ 5963 "01100010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1218 20 first
+.src_ref 2 "conv2d_bf16.h" 1287 37 first
+ 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */
+ 5965 "01100000" // /* MW 11 */
+ 5966 "00000000" // /* MW 10 */
+ 5967 "00000000" // /* MW 9 */
+ 5968 "11111010" // /* MW 8 */
+ 5969 "00000010" // /* MW 7 */
+ 5970 "00100100" // /* MW 6 */
+ 5971 "00100000" // /* MW 5 */
+ 5972 "01010111" // /* MW 4 */
+ 5973 "11000000" // /* MW 3 */
+ 5974 "00100010" // /* MW 2 */
+ 5975 "01010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 738 8
+.delay_slot
+ 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5977 "01110000" // /* MW 7 */
+ 5978 "01100000" // /* MW 6 */
+ 5979 "10101001" // /* MW 5 */
+ 5980 "00000000" // /* MW 4 */
+ 5981 "11000000" // /* MW 3 */
+ 5982 "00011010" // /* MW 2 */
+ 5983 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5985 "01110000" // /* MW 7 */
+ 5986 "11000000" // /* MW 6 */
+ 5987 "10100111" // /* MW 5 */
+ 5988 "00000011" // /* MW 4 */
+ 5989 "11000000" // /* MW 3 */
+ 5990 "00110010" // /* MW 2 */
+ 5991 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5993 "01110110" // /* MW 9 */
+ 5994 "01100000" // /* MW 8 */
+ 5995 "10110101" // /* MW 7 */
+ 5996 "00000000" // /* MW 6 */
+ 5997 "10010000" // /* MW 5 */
+ 5998 "00101011" // /* MW 4 */
+ 5999 "11000101" // /* MW 3 */
+ 6000 "00111010" // /* MW 2 */
+ 6001 "00010010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.delay_slot
+ 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6003 "01110000" // /* MW 7 */
+ 6004 "10000000" // /* MW 6 */
+ 6005 "11000010" // /* MW 5 */
+ 6006 "00000010" // /* MW 4 */
+ 6007 "11000000" // /* MW 3 */
+ 6008 "00101010" // /* MW 2 */
+ 6009 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.delay_slot
+ 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6011 "01110000" // /* MW 7 */
+ 6012 "11000000" // /* MW 6 */
+ 6013 "01001101" // /* MW 5 */
+ 6014 "00000000" // /* MW 4 */
+ 6015 "01100000" // /* MW 3 */
+ 6016 "10001001" // /* MW 2 */
+ 6017 "11100001" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6019 "11101100" // /* MW 3 */
+ 6020 "11011100" // /* MW 2 */
+ 6021 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6023 "11101100" // /* MW 3 */
+ 6024 "10111100" // /* MW 2 */
+ 6025 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6027 "01110000" // /* MW 7 */
+ 6028 "01110110" // /* MW 6 */
+ 6029 "10101010" // /* MW 5 */
+ 6030 "00000010" // /* MW 4 */
+ 6031 "01100000" // /* MW 3 */
+ 6032 "01011010" // /* MW 2 */
+ 6033 "00111100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6035 "01110000" // /* MW 7 */
+ 6036 "01110110" // /* MW 6 */
+ 6037 "11011010" // /* MW 5 */
+ 6038 "00000001" // /* MW 4 */
+ 6039 "01100000" // /* MW 3 */
+ 6040 "10111010" // /* MW 2 */
+ 6041 "10100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */
+ 6043 "00100001" // /* MW 9 */
+ 6044 "00000000" // /* MW 8 */
+ 6045 "00000000" // /* MW 7 */
+ 6046 "11111110" // /* MW 6 */
+ 6047 "00000010" // /* MW 5 */
+ 6048 "00000000" // /* MW 4 */
+ 6049 "01100000" // /* MW 3 */
+ 6050 "11010010" // /* MW 2 */
+ 6051 "00100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6053 "01110000" // /* MW 7 */
+ 6054 "01110110" // /* MW 6 */
+ 6055 "10100010" // /* MW 5 */
+ 6056 "00000010" // /* MW 4 */
+ 6057 "01100000" // /* MW 3 */
+ 6058 "10111010" // /* MW 2 */
+ 6059 "00100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6061 "11101100" // /* MW 3 */
+ 6062 "10001100" // /* MW 2 */
+ 6063 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6065 "01110000" // /* MW 7 */
+ 6066 "01110110" // /* MW 6 */
+ 6067 "10010110" // /* MW 5 */
+ 6068 "00000010" // /* MW 4 */
+ 6069 "01100000" // /* MW 3 */
+ 6070 "11010010" // /* MW 2 */
+ 6071 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6073 "01110000" // /* MW 7 */
+ 6074 "01110110" // /* MW 6 */
+ 6075 "10001010" // /* MW 5 */
+ 6076 "00000000" // /* MW 4 */
+ 6077 "01100000" // /* MW 3 */
+ 6078 "10111010" // /* MW 2 */
+ 6079 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6081 "00000000" // /* MW 15 */
+ 6082 "00000000" // /* MW 14 */
+ 6083 "01111000" // /* MW 13 */
+ 6084 "10100101" // /* MW 12 */
+ 6085 "00000001" // /* MW 11 */
+ 6086 "00000000" // /* MW 10 */
+ 6087 "00000000" // /* MW 9 */
+ 6088 "00000000" // /* MW 8 */
+ 6089 "10010011" // /* MW 7 */
+ 6090 "10100010" // /* MW 6 */
+ 6091 "00100100" // /* MW 5 */
+ 6092 "00000000" // /* MW 4 */
+ 6093 "11110000" // /* MW 3 */
+ 6094 "00101100" // /* MW 2 */
+ 6095 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632
+.src_ref 4 "vector.hpp" 1152 43
+ 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6097 "10100011" // /* MW 3 */
+ 6098 "11100000" // /* MW 2 */
+ 6099 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6101 "11100011" // /* MW 3 */
+ 6102 "00010100" // /* MW 2 */
+ 6103 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6105 "00100011" // /* MW 3 */
+ 6106 "00000100" // /* MW 2 */
+ 6107 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6109 "01100011" // /* MW 3 */
+ 6110 "00010100" // /* MW 2 */
+ 6111 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6113 "00010011" // /* MW 3 */
+ 6114 "00000110" // /* MW 2 */
+ 6115 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6117 "11100011" // /* MW 3 */
+ 6118 "00010101" // /* MW 2 */
+ 6119 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6121 "01110000" // /* MW 7 */
+ 6122 "10100101" // /* MW 6 */
+ 6123 "00000001" // /* MW 5 */
+ 6124 "00000000" // /* MW 4 */
+ 6125 "01100000" // /* MW 3 */
+ 6126 "00100100" // /* MW 2 */
+ 6127 "10010100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1143 12 first
+ 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6129 "01011000" // /* MW 11 */
+ 6130 "00000000" // /* MW 10 */
+ 6131 "01000000" // /* MW 9 */
+ 6132 "00000001" // /* MW 8 */
+ 6133 "00110101" // /* MW 7 */
+ 6134 "00000110" // /* MW 6 */
+ 6135 "00100000" // /* MW 5 */
+ 6136 "01010111" // /* MW 4 */
+ 6137 "01101111" // /* MW 3 */
+ 6138 "10010010" // /* MW 2 */
+ 6139 "11100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.delay_slot
+ 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6141 "10000000" // /* MW 3 */
+ 6142 "01000100" // /* MW 2 */
+ 6143 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.delay_slot
+ 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6145 "10100000" // /* MW 3 */
+ 6146 "01001001" // /* MW 2 */
+ 6147 "00011010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.delay_slot
+ 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6149 "00000001" // /* MW 5 */
+ 6150 "00011110" // /* MW 4 */
+ 6151 "00000101" // /* MW 3 */
+ 6152 "01110010" // /* MW 2 */
+ 6153 "11101011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.delay_slot
+ 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6155 "10000000" // /* MW 3 */
+ 6156 "01001110" // /* MW 2 */
+ 6157 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6159 "00000000" // /* MW 1 */
+.loop_nesting 0
+ 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */
+ 6161 "00000000" // /* MW 5 */
+ 6162 "00000000" // /* MW 4 */
+ 6163 "01011000" // /* MW 3 */
+ 6164 "00001101" // /* MW 2 */
+ 6165 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6167 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6169 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6171 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6173 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6175 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 1364 80
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6177 "01111000" // /* MW 11 */
+ 6178 "10010000" // /* MW 10 */
+ 6179 "10110011" // /* MW 9 */
+ 6180 "00001000" // /* MW 8 */
+ 6181 "11100001" // /* MW 7 */
+ 6182 "00000100" // /* MW 6 */
+ 6183 "10001011" // /* MW 5 */
+ 6184 "00001100" // /* MW 4 */
+ 6185 "00100010" // /* MW 3 */
+ 6186 "01111110" // /* MW 2 */
+ 6187 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1369 80
+ 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6189 "01111000" // /* MW 11 */
+ 6190 "01000000" // /* MW 10 */
+ 6191 "01100010" // /* MW 9 */
+ 6192 "00000011" // /* MW 8 */
+ 6193 "11010100" // /* MW 7 */
+ 6194 "00011011" // /* MW 6 */
+ 6195 "00001011" // /* MW 5 */
+ 6196 "01010110" // /* MW 4 */
+ 6197 "10000010" // /* MW 3 */
+ 6198 "10010000" // /* MW 2 */
+ 6199 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 807 26
+.src_ref 2 "conv2d_bf16.h" 808 26
+.src_ref 2 "conv2d_bf16.h" 809 26
+.src_ref 2 "conv2d_bf16.h" 810 26
+.src_ref 2 "conv2d_bf16.h" 1436 26
+.src_ref 2 "conv2d_bf16.h" 1437 26
+.src_ref 2 "conv2d_bf16.h" 1438 26
+.src_ref 2 "conv2d_bf16.h" 1439 26
+ 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6201 "01111000" // /* MW 9 */
+ 6202 "11010000" // /* MW 8 */
+ 6203 "00000101" // /* MW 7 */
+ 6204 "10001001" // /* MW 6 */
+ 6205 "00110001" // /* MW 5 */
+ 6206 "00011001" // /* MW 4 */
+ 6207 "00000000" // /* MW 3 */
+ 6208 "10010100" // /* MW 2 */
+ 6209 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 802 83
+.src_ref 2 "conv2d_bf16.h" 1428 39
+ 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6211 "01111000" // /* MW 11 */
+ 6212 "10010000" // /* MW 10 */
+ 6213 "11000111" // /* MW 9 */
+ 6214 "11001010" // /* MW 8 */
+ 6215 "00100000" // /* MW 7 */
+ 6216 "00000001" // /* MW 6 */
+ 6217 "00001011" // /* MW 5 */
+ 6218 "01011100" // /* MW 4 */
+ 6219 "10000110" // /* MW 3 */
+ 6220 "10011000" // /* MW 2 */
+ 6221 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 794 8
+ 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6223 "01111000" // /* MW 11 */
+ 6224 "01010000" // /* MW 10 */
+ 6225 "10000111" // /* MW 9 */
+ 6226 "00001000" // /* MW 8 */
+ 6227 "10010000" // /* MW 7 */
+ 6228 "00000001" // /* MW 6 */
+ 6229 "00001011" // /* MW 5 */
+ 6230 "00000010" // /* MW 4 */
+ 6231 "00100101" // /* MW 3 */
+ 6232 "10000011" // /* MW 2 */
+ 6233 "11111010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 794 8
+.src_ref 2 "conv2d_bf16.h" 1455 20
+ 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6235 "01111000" // /* MW 9 */
+ 6236 "01010000" // /* MW 8 */
+ 6237 "01000101" // /* MW 7 */
+ 6238 "00001011" // /* MW 6 */
+ 6239 "10000000" // /* MW 5 */
+ 6240 "00000001" // /* MW 4 */
+ 6241 "00100000" // /* MW 3 */
+ 6242 "11010110" // /* MW 2 */
+ 6243 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 12
+ 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6245 "00010000" // /* MW 9 */
+ 6246 "01011000" // /* MW 8 */
+ 6247 "00110100" // /* MW 7 */
+ 6248 "00000101" // /* MW 6 */
+ 6249 "00000000" // /* MW 5 */
+ 6250 "00000000" // /* MW 4 */
+ 6251 "00100000" // /* MW 3 */
+ 6252 "00110110" // /* MW 2 */
+ 6253 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80 first
+.src_ref 2 "conv2d_bf16.h" 1873
+ 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6255 "01110010" // /* MW 5 */
+ 6256 "11011111" // /* MW 4 */
+ 6257 "00100110" // /* MW 3 */
+ 6258 "10000111" // /* MW 2 */
+ 6259 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6261 "11000110" // /* MW 3 */
+ 6262 "00011101" // /* MW 2 */
+ 6263 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 80 first
+ 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6265 "00000110" // /* MW 3 */
+ 6266 "10001010" // /* MW 2 */
+ 6267 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 799 87 first
+ 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6269 "10000110" // /* MW 3 */
+ 6270 "10011110" // /* MW 2 */
+ 6271 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 83 first
+ 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6273 "11010110" // /* MW 3 */
+ 6274 "00011110" // /* MW 2 */
+ 6275 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 83 first
+ 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6277 "11110110" // /* MW 3 */
+ 6278 "11001010" // /* MW 2 */
+ 6279 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 66 first
+ 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6281 "10110110" // /* MW 3 */
+ 6282 "00010111" // /* MW 2 */
+ 6283 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1443 71 first
+ 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6285 "10010110" // /* MW 3 */
+ 6286 "00000111" // /* MW 2 */
+ 6287 "00000011" // /* MW 1 */
+ 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6289 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1369 89
+ 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6291 "00000000" // /* MW 3 */
+ 6292 "10011000" // /* MW 2 */
+ 6293 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+.src_ref 2 "conv2d_bf16.h" 1518 37
+ 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6295 "00000000" // /* MW 3 */
+ 6296 "00000111" // /* MW 2 */
+ 6297 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+ 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6299 "00000000" // /* MW 3 */
+ 6300 "11011100" // /* MW 2 */
+ 6301 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89 first
+ 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6303 "11100000" // /* MW 3 */
+ 6304 "00001111" // /* MW 2 */
+ 6305 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 89 first
+ 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6307 "11000000" // /* MW 5 */
+ 6308 "00011110" // /* MW 4 */
+ 6309 "11101110" // /* MW 3 */
+ 6310 "01111111" // /* MW 2 */
+ 6311 "11101111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+ 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6313 "01110000" // /* MW 7 */
+ 6314 "10010000" // /* MW 6 */
+ 6315 "11000111" // /* MW 5 */
+ 6316 "00000011" // /* MW 4 */
+ 6317 "01100000" // /* MW 3 */
+ 6318 "00101011" // /* MW 2 */
+ 6319 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1362 31 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+.loop_nesting 1
+ 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6321 "01100000" // /* MW 13 */
+ 6322 "10000001" // /* MW 12 */
+ 6323 "01110001" // /* MW 11 */
+ 6324 "00000010" // /* MW 10 */
+ 6325 "10010110" // /* MW 9 */
+ 6326 "10001111" // /* MW 8 */
+ 6327 "00000000" // /* MW 7 */
+ 6328 "00000000" // /* MW 6 */
+ 6329 "00101000" // /* MW 5 */
+ 6330 "00101000" // /* MW 4 */
+ 6331 "01111010" // /* MW 3 */
+ 6332 "10000101" // /* MW 2 */
+ 6333 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1364 31 first
+.src_ref 2 "conv2d_bf16.h" 1443 16
+ 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6335 "00010000" // /* MW 11 */
+ 6336 "11001000" // /* MW 10 */
+ 6337 "10111100" // /* MW 9 */
+ 6338 "00000101" // /* MW 8 */
+ 6339 "00000000" // /* MW 7 */
+ 6340 "00000000" // /* MW 6 */
+ 6341 "00101000" // /* MW 5 */
+ 6342 "00101000" // /* MW 4 */
+ 6343 "01111010" // /* MW 3 */
+ 6344 "00001101" // /* MW 2 */
+ 6345 "11001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1428 39 first
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+ 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6347 "01001000" // /* MW 11 */
+ 6348 "00111111" // /* MW 10 */
+ 6349 "10111111" // /* MW 9 */
+ 6350 "01101110" // /* MW 8 */
+ 6351 "11101001" // /* MW 7 */
+ 6352 "00000101" // /* MW 6 */
+ 6353 "00101000" // /* MW 5 */
+ 6354 "00000101" // /* MW 4 */
+ 6355 "01110110" // /* MW 3 */
+ 6356 "10000001" // /* MW 2 */
+ 6357 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6359 "01111110" // /* MW 9 */
+ 6360 "10010000" // /* MW 8 */
+ 6361 "01000111" // /* MW 7 */
+ 6362 "00000001" // /* MW 6 */
+ 6363 "00010100" // /* MW 5 */
+ 6364 "00000001" // /* MW 4 */
+ 6365 "01110011" // /* MW 3 */
+ 6366 "01011001" // /* MW 2 */
+ 6367 "01010101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1367 31 first
+ 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6369 "00101000" // /* MW 5 */
+ 6370 "00000001" // /* MW 4 */
+ 6371 "01110110" // /* MW 3 */
+ 6372 "10010101" // /* MW 2 */
+ 6373 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1369 31 first
+ 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6375 "10101000" // /* MW 5 */
+ 6376 "00100001" // /* MW 4 */
+ 6377 "01111010" // /* MW 3 */
+ 6378 "00011101" // /* MW 2 */
+ 6379 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1372 31 first
+ 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6381 "00101000" // /* MW 5 */
+ 6382 "00101000" // /* MW 4 */
+ 6383 "01111010" // /* MW 3 */
+ 6384 "10100101" // /* MW 2 */
+ 6385 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1374 31 first
+ 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6387 "00101000" // /* MW 5 */
+ 6388 "00101000" // /* MW 4 */
+ 6389 "01111010" // /* MW 3 */
+ 6390 "00101101" // /* MW 2 */
+ 6391 "11001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1377 31 first
+ 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6393 "10101000" // /* MW 5 */
+ 6394 "00000000" // /* MW 4 */
+ 6395 "01110110" // /* MW 3 */
+ 6396 "10110101" // /* MW 2 */
+ 6397 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1379 31 first
+ 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6399 "00101000" // /* MW 5 */
+ 6400 "00000011" // /* MW 4 */
+ 6401 "01110110" // /* MW 3 */
+ 6402 "00111101" // /* MW 2 */
+ 6403 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50 first
+ 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6405 "10101000" // /* MW 5 */
+ 6406 "00000011" // /* MW 4 */
+ 6407 "01110110" // /* MW 3 */
+ 6408 "01000101" // /* MW 2 */
+ 6409 "01101000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6411 "11101110" // /* MW 9 */
+ 6412 "00101101" // /* MW 8 */
+ 6413 "01101001" // /* MW 7 */
+ 6414 "00000001" // /* MW 6 */
+ 6415 "00010100" // /* MW 5 */
+ 6416 "00010010" // /* MW 4 */
+ 6417 "01110101" // /* MW 3 */
+ 6418 "01001101" // /* MW 2 */
+ 6419 "01101000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6421 "11101110" // /* MW 9 */
+ 6422 "00101111" // /* MW 8 */
+ 6423 "10101001" // /* MW 7 */
+ 6424 "00000010" // /* MW 6 */
+ 6425 "00010100" // /* MW 5 */
+ 6426 "00010100" // /* MW 4 */
+ 6427 "01110101" // /* MW 3 */
+ 6428 "10000001" // /* MW 2 */
+ 6429 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6431 "01101001" // /* MW 11 */
+ 6432 "00001011" // /* MW 10 */
+ 6433 "01001000" // /* MW 9 */
+ 6434 "11000010" // /* MW 8 */
+ 6435 "11011011" // /* MW 7 */
+ 6436 "00010001" // /* MW 6 */
+ 6437 "00101010" // /* MW 5 */
+ 6438 "00101000" // /* MW 4 */
+ 6439 "01111010" // /* MW 3 */
+ 6440 "00000001" // /* MW 2 */
+ 6441 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6443 "01101001" // /* MW 9 */
+ 6444 "00110101" // /* MW 8 */
+ 6445 "01001001" // /* MW 7 */
+ 6446 "11000010" // /* MW 6 */
+ 6447 "11011111" // /* MW 5 */
+ 6448 "00010001" // /* MW 4 */
+ 6449 "01110101" // /* MW 3 */
+ 6450 "10000001" // /* MW 2 */
+ 6451 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6453 "01101001" // /* MW 3 */
+ 6454 "01001001" // /* MW 2 */
+ 6455 "01001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6457 "01101001" // /* MW 3 */
+ 6458 "01110101" // /* MW 2 */
+ 6459 "01001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.src_ref 2 "conv2d_bf16.h" 1437 26 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6461 "00111101" // /* MW 9 */
+ 6462 "10000100" // /* MW 8 */
+ 6463 "10100001" // /* MW 7 */
+ 6464 "11000110" // /* MW 6 */
+ 6465 "01011111" // /* MW 5 */
+ 6466 "10001011" // /* MW 4 */
+ 6467 "10101010" // /* MW 3 */
+ 6468 "00000000" // /* MW 2 */
+ 6469 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1436 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6471 "00111101" // /* MW 7 */
+ 6472 "10000000" // /* MW 6 */
+ 6473 "10100000" // /* MW 5 */
+ 6474 "00000000" // /* MW 4 */
+ 6475 "10010100" // /* MW 3 */
+ 6476 "00000001" // /* MW 2 */
+ 6477 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1438 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6479 "00111101" // /* MW 7 */
+ 6480 "10001000" // /* MW 6 */
+ 6481 "10100010" // /* MW 5 */
+ 6482 "00000000" // /* MW 4 */
+ 6483 "11010100" // /* MW 3 */
+ 6484 "00000001" // /* MW 2 */
+ 6485 "00000011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1439 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6487 "00111101" // /* MW 9 */
+ 6488 "10001100" // /* MW 8 */
+ 6489 "10100011" // /* MW 7 */
+ 6490 "00011101" // /* MW 6 */
+ 6491 "00010100" // /* MW 5 */
+ 6492 "00010010" // /* MW 4 */
+ 6493 "01110101" // /* MW 3 */
+ 6494 "00000001" // /* MW 2 */
+ 6495 "01010101" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.begin_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6497 "10110111" // /* MW 5 */
+ 6498 "00010110" // /* MW 4 */
+ 6499 "10000010" // /* MW 3 */
+ 6500 "10000010" // /* MW 2 */
+ 6501 "10100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6503 "00001001" // /* MW 9 */
+ 6504 "00101010" // /* MW 8 */
+ 6505 "10011001" // /* MW 7 */
+ 6506 "11000110" // /* MW 6 */
+ 6507 "01011111" // /* MW 5 */
+ 6508 "00111100" // /* MW 4 */
+ 6509 "00101010" // /* MW 3 */
+ 6510 "00101000" // /* MW 2 */
+ 6511 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6513 "00001001" // /* MW 9 */
+ 6514 "00000100" // /* MW 8 */
+ 6515 "10011000" // /* MW 7 */
+ 6516 "11000110" // /* MW 6 */
+ 6517 "01011011" // /* MW 5 */
+ 6518 "10111100" // /* MW 4 */
+ 6519 "10101001" // /* MW 3 */
+ 6520 "00000000" // /* MW 2 */
+ 6521 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6523 "00001001" // /* MW 7 */
+ 6524 "01101000" // /* MW 6 */
+ 6525 "10011011" // /* MW 5 */
+ 6526 "00000000" // /* MW 4 */
+ 6527 "10010100" // /* MW 3 */
+ 6528 "00000001" // /* MW 2 */
+ 6529 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6531 "00001001" // /* MW 13 */
+ 6532 "01000110" // /* MW 12 */
+ 6533 "10011010" // /* MW 11 */
+ 6534 "01101100" // /* MW 10 */
+ 6535 "00000101" // /* MW 9 */
+ 6536 "00000000" // /* MW 8 */
+ 6537 "00000000" // /* MW 7 */
+ 6538 "00000000" // /* MW 6 */
+ 6539 "10101000" // /* MW 5 */
+ 6540 "00000011" // /* MW 4 */
+ 6541 "01110110" // /* MW 3 */
+ 6542 "10000001" // /* MW 2 */
+ 6543 "00000010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.end_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6545 "00000000" // /* MW 15 */
+ 6546 "00000000" // /* MW 14 */
+ 6547 "11101000" // /* MW 13 */
+ 6548 "10101111" // /* MW 12 */
+ 6549 "01000101" // /* MW 11 */
+ 6550 "00000001" // /* MW 10 */
+ 6551 "00000000" // /* MW 9 */
+ 6552 "00000000" // /* MW 8 */
+ 6553 "01011011" // /* MW 7 */
+ 6554 "00000001" // /* MW 6 */
+ 6555 "00101000" // /* MW 5 */
+ 6556 "00100100" // /* MW 4 */
+ 6557 "01111010" // /* MW 3 */
+ 6558 "00000001" // /* MW 2 */
+ 6559 "01010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6561 "11100000" // /* MW 11 */
+ 6562 "10101101" // /* MW 10 */
+ 6563 "10000101" // /* MW 9 */
+ 6564 "00000000" // /* MW 8 */
+ 6565 "10001011" // /* MW 7 */
+ 6566 "10011100" // /* MW 6 */
+ 6567 "00100101" // /* MW 5 */
+ 6568 "10010111" // /* MW 4 */
+ 6569 "11111111" // /* MW 3 */
+ 6570 "00001100" // /* MW 2 */
+ 6571 "00000111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.src_ref 2 "conv2d_bf16.h" 1517 32 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6573 "00001001" // /* MW 11 */
+ 6574 "00101010" // /* MW 10 */
+ 6575 "10011001" // /* MW 9 */
+ 6576 "11000110" // /* MW 8 */
+ 6577 "01011111" // /* MW 7 */
+ 6578 "00111100" // /* MW 6 */
+ 6579 "00100010" // /* MW 5 */
+ 6580 "00010111" // /* MW 4 */
+ 6581 "01101111" // /* MW 3 */
+ 6582 "10010001" // /* MW 2 */
+ 6583 "10010011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.src_ref 2 "conv2d_bf16.h" 1518 37 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6585 "00001001" // /* MW 11 */
+ 6586 "00000100" // /* MW 10 */
+ 6587 "10011000" // /* MW 9 */
+ 6588 "11000110" // /* MW 8 */
+ 6589 "01011011" // /* MW 7 */
+ 6590 "10111100" // /* MW 6 */
+ 6591 "00100001" // /* MW 5 */
+ 6592 "10010111" // /* MW 4 */
+ 6593 "01101111" // /* MW 3 */
+ 6594 "10010001" // /* MW 2 */
+ 6595 "01110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6597 "00001001" // /* MW 7 */
+ 6598 "01101000" // /* MW 6 */
+ 6599 "10011011" // /* MW 5 */
+ 6600 "11100110" // /* MW 4 */
+ 6601 "10100000" // /* MW 3 */
+ 6602 "10001000" // /* MW 2 */
+ 6603 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.src_ref 2 "conv2d_bf16.h" 1428 39
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6605 "00001001" // /* MW 9 */
+ 6606 "01000110" // /* MW 8 */
+ 6607 "10011010" // /* MW 7 */
+ 6608 "11100110" // /* MW 6 */
+ 6609 "10000000" // /* MW 5 */
+ 6610 "10011011" // /* MW 4 */
+ 6611 "00100000" // /* MW 3 */
+ 6612 "10110111" // /* MW 2 */
+ 6613 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+ 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6615 "01011011" // /* MW 3 */
+ 6616 "00001011" // /* MW 2 */
+ 6617 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6619 "01011111" // /* MW 3 */
+ 6620 "10001011" // /* MW 2 */
+ 6621 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6623 "00001001" // /* MW 7 */
+ 6624 "00000100" // /* MW 6 */
+ 6625 "10011000" // /* MW 5 */
+ 6626 "11000110" // /* MW 4 */
+ 6627 "01011011" // /* MW 3 */
+ 6628 "10111100" // /* MW 2 */
+ 6629 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6631 "00001001" // /* MW 7 */
+ 6632 "00101010" // /* MW 6 */
+ 6633 "10011001" // /* MW 5 */
+ 6634 "11000110" // /* MW 4 */
+ 6635 "01011111" // /* MW 3 */
+ 6636 "00111100" // /* MW 2 */
+ 6637 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6639 "00001001" // /* MW 3 */
+ 6640 "01000110" // /* MW 2 */
+ 6641 "10011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+ 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6643 "00001001" // /* MW 3 */
+ 6644 "01101000" // /* MW 2 */
+ 6645 "10011011" // /* MW 1 */
+ 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6647 "00000000" // /* MW 1 */
+ 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6649 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6651 "00010110" // /* MW 3 */
+ 6652 "00010000" // /* MW 2 */
+ 6653 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6655 "10010110" // /* MW 3 */
+ 6656 "10010000" // /* MW 2 */
+ 6657 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1455 20 first
+ 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */
+ 6659 "01100001" // /* MW 9 */
+ 6660 "00000000" // /* MW 8 */
+ 6661 "00000000" // /* MW 7 */
+ 6662 "01001110" // /* MW 6 */
+ 6663 "00000011" // /* MW 5 */
+ 6664 "00101010" // /* MW 4 */
+ 6665 "11000000" // /* MW 3 */
+ 6666 "00011010" // /* MW 2 */
+ 6667 "00010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.delay_slot
+ 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6669 "01010110" // /* MW 3 */
+ 6670 "00010000" // /* MW 2 */
+ 6671 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6673 "10010110" // /* MW 3 */
+ 6674 "00010001" // /* MW 2 */
+ 6675 "00001001" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6677 "11010110" // /* MW 3 */
+ 6678 "10010001" // /* MW 2 */
+ 6679 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6681 "00010110" // /* MW 3 */
+ 6682 "10010001" // /* MW 2 */
+ 6683 "00001010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6685 "01010110" // /* MW 3 */
+ 6686 "00010001" // /* MW 2 */
+ 6687 "00001100" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6689 "11101100" // /* MW 3 */
+ 6690 "11011100" // /* MW 2 */
+ 6691 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6693 "11101100" // /* MW 3 */
+ 6694 "10001100" // /* MW 2 */
+ 6695 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6697 "01110000" // /* MW 7 */
+ 6698 "01110110" // /* MW 6 */
+ 6699 "10101010" // /* MW 5 */
+ 6700 "00000010" // /* MW 4 */
+ 6701 "01100000" // /* MW 3 */
+ 6702 "01011010" // /* MW 2 */
+ 6703 "10101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6705 "01110000" // /* MW 7 */
+ 6706 "01110110" // /* MW 6 */
+ 6707 "01011010" // /* MW 5 */
+ 6708 "00000000" // /* MW 4 */
+ 6709 "01100000" // /* MW 3 */
+ 6710 "10001010" // /* MW 2 */
+ 6711 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */
+ 6713 "00100001" // /* MW 9 */
+ 6714 "00000000" // /* MW 8 */
+ 6715 "00000000" // /* MW 7 */
+ 6716 "01010010" // /* MW 6 */
+ 6717 "00000011" // /* MW 5 */
+ 6718 "00000000" // /* MW 4 */
+ 6719 "01100000" // /* MW 3 */
+ 6720 "11010010" // /* MW 2 */
+ 6721 "10100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6723 "01110000" // /* MW 7 */
+ 6724 "01110110" // /* MW 6 */
+ 6725 "10001010" // /* MW 5 */
+ 6726 "00000010" // /* MW 4 */
+ 6727 "01100000" // /* MW 3 */
+ 6728 "10001010" // /* MW 2 */
+ 6729 "10100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6731 "11101100" // /* MW 3 */
+ 6732 "10111100" // /* MW 2 */
+ 6733 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6735 "01110000" // /* MW 7 */
+ 6736 "01110110" // /* MW 6 */
+ 6737 "10010110" // /* MW 5 */
+ 6738 "00000010" // /* MW 4 */
+ 6739 "01100000" // /* MW 3 */
+ 6740 "01010010" // /* MW 2 */
+ 6741 "01101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6743 "01110010" // /* MW 9 */
+ 6744 "01110110" // /* MW 8 */
+ 6745 "00100010" // /* MW 7 */
+ 6746 "00000010" // /* MW 6 */
+ 6747 "01010011" // /* MW 5 */
+ 6748 "00010100" // /* MW 4 */
+ 6749 "11110111" // /* MW 3 */
+ 6750 "00101100" // /* MW 2 */
+ 6751 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6753 "00000000" // /* MW 15 */
+ 6754 "00000000" // /* MW 14 */
+ 6755 "01111000" // /* MW 13 */
+ 6756 "10100101" // /* MW 12 */
+ 6757 "00000001" // /* MW 11 */
+ 6758 "00000000" // /* MW 10 */
+ 6759 "00000000" // /* MW 9 */
+ 6760 "00000000" // /* MW 8 */
+ 6761 "10010011" // /* MW 7 */
+ 6762 "11100010" // /* MW 6 */
+ 6763 "00100100" // /* MW 5 */
+ 6764 "00000000" // /* MW 4 */
+ 6765 "11110000" // /* MW 3 */
+ 6766 "00101100" // /* MW 2 */
+ 6767 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304
+.src_ref 4 "vector.hpp" 1152 43
+ 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6769 "10100011" // /* MW 3 */
+ 6770 "01100000" // /* MW 2 */
+ 6771 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6773 "11100011" // /* MW 3 */
+ 6774 "00010100" // /* MW 2 */
+ 6775 "00001100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6777 "00100011" // /* MW 3 */
+ 6778 "00000100" // /* MW 2 */
+ 6779 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6781 "01100011" // /* MW 3 */
+ 6782 "00010100" // /* MW 2 */
+ 6783 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6785 "10100011" // /* MW 3 */
+ 6786 "01100001" // /* MW 2 */
+ 6787 "00001011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6789 "11100011" // /* MW 3 */
+ 6790 "00010101" // /* MW 2 */
+ 6791 "00001111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6793 "01110000" // /* MW 7 */
+ 6794 "10100101" // /* MW 6 */
+ 6795 "00000001" // /* MW 5 */
+ 6796 "00000000" // /* MW 4 */
+ 6797 "01100000" // /* MW 3 */
+ 6798 "00100100" // /* MW 2 */
+ 6799 "10011100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1337 12 first
+ 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6801 "01000000" // /* MW 5 */
+ 6802 "11110101" // /* MW 4 */
+ 6803 "01101110" // /* MW 3 */
+ 6804 "11000010" // /* MW 2 */
+ 6805 "01100010" // /* MW 1 */
+.delay_slot
+ 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6807 "10010000" // /* MW 3 */
+ 6808 "10001011" // /* MW 2 */
+ 6809 "00111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6811 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6813 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6815 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6817 "00000000" // /* MW 15 */
+ 6818 "00000000" // /* MW 14 */
+ 6819 "01111000" // /* MW 13 */
+ 6820 "10100101" // /* MW 12 */
+ 6821 "00000001" // /* MW 11 */
+ 6822 "00000000" // /* MW 10 */
+ 6823 "00000000" // /* MW 9 */
+ 6824 "00000000" // /* MW 8 */
+ 6825 "01011011" // /* MW 7 */
+ 6826 "00000001" // /* MW 6 */
+ 6827 "00100000" // /* MW 5 */
+ 6828 "00000000" // /* MW 4 */
+ 6829 "11110000" // /* MW 3 */
+ 6830 "00101100" // /* MW 2 */
+ 6831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368
+.loop_nesting 0
+ 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6833 "11110001" // /* MW 3 */
+ 6834 "11101101" // /* MW 2 */
+ 6835 "00000111" // /* MW 1 */
+ 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6837 "10010001" // /* MW 3 */
+ 6838 "11110001" // /* MW 2 */
+ 6839 "00000111" // /* MW 1 */
+ 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6841 "00110001" // /* MW 3 */
+ 6842 "11110101" // /* MW 2 */
+ 6843 "00000111" // /* MW 1 */
+ 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6845 "00011001" // /* MW 3 */
+ 6846 "11101011" // /* MW 2 */
+ 6847 "00000111" // /* MW 1 */
+ 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6849 "10011001" // /* MW 3 */
+ 6850 "11111011" // /* MW 2 */
+ 6851 "00000111" // /* MW 1 */
+ 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6853 "11010001" // /* MW 3 */
+ 6854 "11111101" // /* MW 2 */
+ 6855 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873 first
+ 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 6857 "00000000" // /* MW 3 */
+ 6858 "00101000" // /* MW 2 */
+ 6859 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873
+.delay_slot
+ 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6861 "00000001" // /* MW 5 */
+ 6862 "00000000" // /* MW 4 */
+ 6863 "00000000" // /* MW 3 */
+ 6864 "11110000" // /* MW 2 */
+ 6865 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6869 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6871 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0
+ 6873 "00000000" // /* MW 1 */
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 74 first
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 81 4
+.function_start
+ 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6881 "00010000" // /* MW 9 */
+ 6882 "00100000" // /* MW 8 */
+ 6883 "00110010" // /* MW 7 */
+ 6884 "11110010" // /* MW 6 */
+ 6885 "00000001" // /* MW 5 */
+ 6886 "00000000" // /* MW 4 */
+ 6887 "00000000" // /* MW 3 */
+ 6888 "00100000" // /* MW 2 */
+ 6889 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6 first
+.src_ref 7 "superkernels.cpp" 81 4
+ 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6891 "01111000" // /* MW 9 */
+ 6892 "11010000" // /* MW 8 */
+ 6893 "01001011" // /* MW 7 */
+ 6894 "00001000" // /* MW 6 */
+ 6895 "00010000" // /* MW 5 */
+ 6896 "00000000" // /* MW 4 */
+ 6897 "11010000" // /* MW 3 */
+ 6898 "11000010" // /* MW 2 */
+ 6899 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 74
+ 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6901 "00000001" // /* MW 5 */
+ 6902 "00000000" // /* MW 4 */
+ 6903 "00000000" // /* MW 3 */
+ 6904 "00001000" // /* MW 2 */
+ 6905 "00000000" // /* MW 1 */
+ 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6907 "01010101" // /* MW 3 */
+ 6908 "11110000" // /* MW 2 */
+ 6909 "00001111" // /* MW 1 */
+ 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6911 "00000000" // /* MW 1 */
+ 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6913 "00000000" // /* MW 1 */
+ 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6915 "00000000" // /* MW 1 */
+ 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6917 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 79 16
+ 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */
+ 6919 "00000001" // /* MW 5 */
+ 6920 "01000000" // /* MW 4 */
+ 6921 "11011000" // /* MW 3 */
+ 6922 "00001101" // /* MW 2 */
+ 6923 "10000000" // /* MW 1 */
+.delay_slot
+ 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6925 "10011101" // /* MW 3 */
+ 6926 "11111011" // /* MW 2 */
+ 6927 "00001111" // /* MW 1 */
+.delay_slot
+ 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6929 "00011101" // /* MW 3 */
+ 6930 "11111111" // /* MW 2 */
+ 6931 "00001111" // /* MW 1 */
+.delay_slot
+ 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6933 "10011101" // /* MW 3 */
+ 6934 "11101101" // /* MW 2 */
+ 6935 "00001111" // /* MW 1 */
+.delay_slot
+ 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6937 "00111101" // /* MW 3 */
+ 6938 "11110100" // /* MW 2 */
+ 6939 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6940 "01000100" // MOVXM r15, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6941 "00000000" // /* MW 5 */
+ 6942 "10101100" // /* MW 4 */
+ 6943 "11000111" // /* MW 3 */
+ 6944 "00000111" // /* MW 2 */
+ 6945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6947 "00010001" // /* MW 9 */
+ 6948 "00110100" // /* MW 8 */
+ 6949 "10110010" // /* MW 7 */
+ 6950 "11110011" // /* MW 6 */
+ 6951 "00000001" // /* MW 5 */
+ 6952 "00000000" // /* MW 4 */
+ 6953 "01100000" // /* MW 3 */
+ 6954 "10010001" // /* MW 2 */
+ 6955 "11010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6957 "00010000" // /* MW 11 */
+ 6958 "00110010" // /* MW 10 */
+ 6959 "10110010" // /* MW 9 */
+ 6960 "11110011" // /* MW 8 */
+ 6961 "00000001" // /* MW 7 */
+ 6962 "00000000" // /* MW 6 */
+ 6963 "00001011" // /* MW 5 */
+ 6964 "10001111" // /* MW 4 */
+ 6965 "11100001" // /* MW 3 */
+ 6966 "11000000" // /* MW 2 */
+ 6967 "11100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6969 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6971 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */
+ 6973 "00000001" // /* MW 5 */
+ 6974 "00000000" // /* MW 4 */
+ 6975 "01100000" // /* MW 3 */
+ 6976 "00000101" // /* MW 2 */
+ 6977 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6979 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6981 "00110001" // /* MW 3 */
+ 6982 "00100000" // /* MW 2 */
+ 6983 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6985 "00000101" // /* MW 3 */
+ 6986 "00100000" // /* MW 2 */
+ 6987 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6989 "01110000" // /* MW 7 */
+ 6990 "01100000" // /* MW 6 */
+ 6991 "10110000" // /* MW 5 */
+ 6992 "00000011" // /* MW 4 */
+ 6993 "00110000" // /* MW 3 */
+ 6994 "11000010" // /* MW 2 */
+ 6995 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6997 "01110000" // /* MW 11 */
+ 6998 "01100000" // /* MW 10 */
+ 6999 "00110010" // /* MW 9 */
+ 7000 "00000000" // /* MW 8 */
+ 7001 "01011011" // /* MW 7 */
+ 7002 "00000001" // /* MW 6 */
+ 7003 "00100000" // /* MW 5 */
+ 7004 "00000000" // /* MW 4 */
+ 7005 "11110000" // /* MW 3 */
+ 7006 "00101100" // /* MW 2 */
+ 7007 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.return_address
+ 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7009 "10000101" // /* MW 3 */
+ 7010 "01100111" // /* MW 2 */
+ 7011 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19
+.src_ref 7 "superkernels.cpp" 87 35 first
+ 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7013 "00010000" // /* MW 9 */
+ 7014 "00100010" // /* MW 8 */
+ 7015 "10110010" // /* MW 7 */
+ 7016 "11110000" // /* MW 6 */
+ 7017 "00000001" // /* MW 5 */
+ 7018 "00000000" // /* MW 4 */
+ 7019 "01010000" // /* MW 3 */
+ 7020 "11000001" // /* MW 2 */
+ 7021 "01001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 37 first
+.src_ref 7 "superkernels.cpp" 89 13
+ 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7023 "00010000" // /* MW 9 */
+ 7024 "00110000" // /* MW 8 */
+ 7025 "00110010" // /* MW 7 */
+ 7026 "11110000" // /* MW 6 */
+ 7027 "00000001" // /* MW 5 */
+ 7028 "00000000" // /* MW 4 */
+ 7029 "01010000" // /* MW 3 */
+ 7030 "11001111" // /* MW 2 */
+ 7031 "01000011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 73
+ 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7033 "00111010" // /* MW 3 */
+ 7034 "00000110" // /* MW 2 */
+ 7035 "00000010" // /* MW 1 */
+ 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7037 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 110
+ 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7039 "01011010" // /* MW 3 */
+ 7040 "00010110" // /* MW 2 */
+ 7041 "00000010" // /* MW 1 */
+ 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7043 "00000000" // /* MW 1 */
+ 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7045 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19 first
+.src_ref 7 "superkernels.cpp" 113 2
+ 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7047 "01110000" // /* MW 7 */
+ 7048 "01100000" // /* MW 6 */
+ 7049 "10110110" // /* MW 5 */
+ 7050 "00000000" // /* MW 4 */
+ 7051 "00110000" // /* MW 3 */
+ 7052 "11000010" // /* MW 2 */
+ 7053 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 57 first
+ 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7055 "00001111" // /* MW 3 */
+ 7056 "11100001" // /* MW 2 */
+ 7057 "00010100" // /* MW 1 */
+ 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7059 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 94
+ 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7061 "00001111" // /* MW 3 */
+ 7062 "01100001" // /* MW 2 */
+ 7063 "00010100" // /* MW 1 */
+ 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7065 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 28 first
+ 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7067 "00001111" // /* MW 3 */
+ 7068 "10100001" // /* MW 2 */
+ 7069 "00010100" // /* MW 1 */
+ 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7071 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 13
+.src_ref 7 "superkernels.cpp" 113 2
+ 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7073 "00000000" // /* MW 15 */
+ 7074 "00000000" // /* MW 14 */
+ 7075 "01111000" // /* MW 13 */
+ 7076 "01100000" // /* MW 12 */
+ 7077 "00110111" // /* MW 11 */
+ 7078 "00000000" // /* MW 10 */
+ 7079 "00000000" // /* MW 9 */
+ 7080 "10000000" // /* MW 8 */
+ 7081 "00010001" // /* MW 7 */
+ 7082 "00000110" // /* MW 6 */
+ 7083 "00100000" // /* MW 5 */
+ 7084 "00000000" // /* MW 4 */
+ 7085 "11110000" // /* MW 3 */
+ 7086 "00101100" // /* MW 2 */
+ 7087 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 106 12
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 7 "superkernels.cpp" 117 6
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7089 "00010000" // /* MW 9 */
+ 7090 "00100100" // /* MW 8 */
+ 7091 "00110010" // /* MW 7 */
+ 7092 "11110011" // /* MW 6 */
+ 7093 "00000001" // /* MW 5 */
+ 7094 "00000000" // /* MW 4 */
+ 7095 "00100000" // /* MW 3 */
+ 7096 "10111110" // /* MW 2 */
+ 7097 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.src_ref 7 "superkernels.cpp" 108 13
+ 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7099 "00010000" // /* MW 9 */
+ 7100 "00100110" // /* MW 8 */
+ 7101 "00110010" // /* MW 7 */
+ 7102 "11110001" // /* MW 6 */
+ 7103 "00000001" // /* MW 5 */
+ 7104 "00000000" // /* MW 4 */
+ 7105 "11010000" // /* MW 3 */
+ 7106 "11000010" // /* MW 2 */
+ 7107 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11
+.src_ref 7 "superkernels.cpp" 108 13 first
+.src_ref 7 "superkernels.cpp" 139 6
+.src_ref 7 "superkernels.cpp" 140 14
+ 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7109 "00010000" // /* MW 9 */
+ 7110 "00100000" // /* MW 8 */
+ 7111 "10110010" // /* MW 7 */
+ 7112 "11110011" // /* MW 6 */
+ 7113 "00000001" // /* MW 5 */
+ 7114 "00000000" // /* MW 4 */
+ 7115 "11010000" // /* MW 3 */
+ 7116 "11000110" // /* MW 2 */
+ 7117 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+ 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7119 "01010110" // /* MW 3 */
+ 7120 "00000110" // /* MW 2 */
+ 7121 "00000111" // /* MW 1 */
+ 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7123 "00000000" // /* MW 1 */
+ 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7125 "00000000" // /* MW 1 */
+ 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7127 "00000000" // /* MW 1 */
+ 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7129 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 110 6 first
+.src_ref 7 "superkernels.cpp" 110 17 first
+ 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */
+ 7131 "00000001" // /* MW 5 */
+ 7132 "01000000" // /* MW 4 */
+ 7133 "00011000" // /* MW 3 */
+ 7134 "00001110" // /* MW 2 */
+ 7135 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 108 13 first
+.delay_slot
+ 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7137 "00000111" // /* MW 3 */
+ 7138 "01100010" // /* MW 2 */
+ 7139 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.src_ref 7 "superkernels.cpp" 108 13
+.delay_slot
+ 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7141 "00001110" // /* MW 5 */
+ 7142 "01000100" // /* MW 4 */
+ 7143 "00111001" // /* MW 3 */
+ 7144 "11000110" // /* MW 2 */
+ 7145 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.delay_slot
+ 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7147 "00000111" // /* MW 3 */
+ 7148 "00100110" // /* MW 2 */
+ 7149 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12
+.delay_slot
+ 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7151 "01110001" // /* MW 3 */
+ 7152 "00000110" // /* MW 2 */
+ 7153 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.delay_slot
+ 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7155 "00110001" // /* MW 3 */
+ 7156 "00000110" // /* MW 2 */
+ 7157 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7159 "10000110" // /* MW 3 */
+ 7160 "01100111" // /* MW 2 */
+ 7161 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7163 "01110110" // /* MW 3 */
+ 7164 "11111111" // /* MW 2 */
+ 7165 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7167 "00010110" // /* MW 3 */
+ 7168 "11111110" // /* MW 2 */
+ 7169 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7171 "00110110" // /* MW 3 */
+ 7172 "11111110" // /* MW 2 */
+ 7173 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7175 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7177 "00010110" // /* MW 3 */
+ 7178 "01000110" // /* MW 2 */
+ 7179 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7181 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7183 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7185 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7187 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7189 "00000010" // /* MW 3 */
+ 7190 "01100001" // /* MW 2 */
+ 7191 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7193 "00010001" // /* MW 3 */
+ 7194 "00000110" // /* MW 2 */
+ 7195 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7197 "11111101" // /* MW 3 */
+ 7198 "11100010" // /* MW 2 */
+ 7199 "00010111" // /* MW 1 */
+ 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7201 "00000000" // /* MW 1 */
+ 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7203 "00000000" // /* MW 1 */
+ 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7205 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7207 "00011000" // /* MW 9 */
+ 7208 "00010011" // /* MW 8 */
+ 7209 "00000100" // /* MW 7 */
+ 7210 "00000000" // /* MW 6 */
+ 7211 "01011011" // /* MW 5 */
+ 7212 "00000001" // /* MW 4 */
+ 7213 "11110000" // /* MW 3 */
+ 7214 "00101100" // /* MW 2 */
+ 7215 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336
+.src_ref 7 "superkernels.cpp" 113 2 first
+.no_stack_arguments
+ 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */
+ 7217 "00000001" // /* MW 5 */
+ 7218 "00000000" // /* MW 4 */
+ 7219 "10111000" // /* MW 3 */
+ 7220 "00001000" // /* MW 2 */
+ 7221 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7222 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7223 "00000000" // /* MW 5 */
+ 7224 "11001100" // /* MW 4 */
+ 7225 "11000110" // /* MW 3 */
+ 7226 "00000111" // /* MW 2 */
+ 7227 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7229 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7233 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7235 "00011100" // /* MW 13 */
+ 7236 "00000000" // /* MW 12 */
+ 7237 "00000000" // /* MW 11 */
+ 7238 "00000111" // /* MW 10 */
+ 7239 "00111101" // /* MW 9 */
+ 7240 "01010011" // /* MW 8 */
+ 7241 "00000000" // /* MW 7 */
+ 7242 "00000000" // /* MW 6 */
+ 7243 "10110110" // /* MW 5 */
+ 7244 "00000010" // /* MW 4 */
+ 7245 "11110000" // /* MW 3 */
+ 7246 "00101100" // /* MW 2 */
+ 7247 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6 first
+.src_ref 7 "superkernels.cpp" 117 20
+.return_address
+ 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7249 "00010000" // /* MW 9 */
+ 7250 "00100010" // /* MW 8 */
+ 7251 "10110010" // /* MW 7 */
+ 7252 "11110000" // /* MW 6 */
+ 7253 "00000001" // /* MW 5 */
+ 7254 "00000000" // /* MW 4 */
+ 7255 "11010000" // /* MW 3 */
+ 7256 "11000010" // /* MW 2 */
+ 7257 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 20
+ 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7259 "00110110" // /* MW 3 */
+ 7260 "00000110" // /* MW 2 */
+ 7261 "00000001" // /* MW 1 */
+ 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7263 "00010001" // /* MW 3 */
+ 7264 "11110000" // /* MW 2 */
+ 7265 "00000111" // /* MW 1 */
+ 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7267 "00000000" // /* MW 1 */
+ 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7269 "00000000" // /* MW 1 */
+ 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7271 "00000000" // /* MW 1 */
+ 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7273 "00000000" // /* MW 1 */
+ 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7275 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 17
+ 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7277 "00001000" // /* MW 3 */
+ 7278 "01100001" // /* MW 2 */
+ 7279 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6
+ 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */
+ 7281 "00000001" // /* MW 5 */
+ 7282 "01000000" // /* MW 4 */
+ 7283 "01100000" // /* MW 3 */
+ 7284 "00001110" // /* MW 2 */
+ 7285 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 7 "superkernels.cpp" 140 14
+.delay_slot
+ 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7287 "00000001" // /* MW 3 */
+ 7288 "00110000" // /* MW 2 */
+ 7289 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7293 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7295 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7297 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7299 "00010100" // /* MW 5 */
+ 7300 "11001111" // /* MW 4 */
+ 7301 "10100010" // /* MW 3 */
+ 7302 "00000000" // /* MW 2 */
+ 7303 "00000100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7305 "00110110" // /* MW 3 */
+ 7306 "00000110" // /* MW 2 */
+ 7307 "00000001" // /* MW 1 */
+ 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7309 "00000000" // /* MW 1 */
+ 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7311 "00000000" // /* MW 1 */
+ 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7313 "00000000" // /* MW 1 */
+ 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7315 "00000000" // /* MW 1 */
+ 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7317 "00000000" // /* MW 1 */
+ 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7319 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7321 "00001000" // /* MW 3 */
+ 7322 "01010001" // /* MW 2 */
+ 7323 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15 first
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7325 "00100011" // /* MW 5 */
+ 7326 "00001110" // /* MW 4 */
+ 7327 "11011100" // /* MW 3 */
+ 7328 "11000110" // /* MW 2 */
+ 7329 "00111100" // /* MW 1 */
+ 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7331 "00000000" // /* MW 1 */
+ 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7333 "00000000" // /* MW 1 */
+ 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7335 "00000000" // /* MW 1 */
+ 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7337 "00000000" // /* MW 1 */
+ 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7339 "00000000" // /* MW 1 */
+ 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7341 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7343 "00010001" // /* MW 3 */
+ 7344 "00100001" // /* MW 2 */
+ 7345 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7347 "00011100" // /* MW 13 */
+ 7348 "00000000" // /* MW 12 */
+ 7349 "00000000" // /* MW 11 */
+ 7350 "01010111" // /* MW 10 */
+ 7351 "00011010" // /* MW 9 */
+ 7352 "01000000" // /* MW 8 */
+ 7353 "00000000" // /* MW 7 */
+ 7354 "00000000" // /* MW 6 */
+ 7355 "00100011" // /* MW 5 */
+ 7356 "11001100" // /* MW 4 */
+ 7357 "11110011" // /* MW 3 */
+ 7358 "00101100" // /* MW 2 */
+ 7359 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480
+.src_ref 7 "superkernels.cpp" 139 6 first
+.src_ref 7 "superkernels.cpp" 139 19
+ 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7361 "00010000" // /* MW 9 */
+ 7362 "00110000" // /* MW 8 */
+ 7363 "00110010" // /* MW 7 */
+ 7364 "11110011" // /* MW 6 */
+ 7365 "00000001" // /* MW 5 */
+ 7366 "00000000" // /* MW 4 */
+ 7367 "11010000" // /* MW 3 */
+ 7368 "11000010" // /* MW 2 */
+ 7369 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 19
+ 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7371 "00110110" // /* MW 3 */
+ 7372 "00000110" // /* MW 2 */
+ 7373 "00000110" // /* MW 1 */
+ 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7375 "10011001" // /* MW 3 */
+ 7376 "11111000" // /* MW 2 */
+ 7377 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+ 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7379 "00111001" // /* MW 3 */
+ 7380 "11110100" // /* MW 2 */
+ 7381 "00000111" // /* MW 1 */
+ 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7383 "00000000" // /* MW 1 */
+ 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7385 "00000000" // /* MW 1 */
+ 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7387 "00000000" // /* MW 1 */
+ 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7389 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 16
+ 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7391 "00001000" // /* MW 3 */
+ 7392 "01100001" // /* MW 2 */
+ 7393 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 6
+ 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */
+ 7395 "00000001" // /* MW 5 */
+ 7396 "01000000" // /* MW 4 */
+ 7397 "10000000" // /* MW 3 */
+ 7398 "00001110" // /* MW 2 */
+ 7399 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7407 "00000000" // /* MW 1 */
+.delay_slot
+ 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7409 "00100000" // /* MW 3 */
+ 7410 "11010000" // /* MW 2 */
+ 7411 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 140 14 first
+ 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7413 "11000001" // /* MW 11 */
+ 7414 "10001000" // /* MW 10 */
+ 7415 "10000011" // /* MW 9 */
+ 7416 "00000011" // /* MW 8 */
+ 7417 "00000000" // /* MW 7 */
+ 7418 "00000000" // /* MW 6 */
+ 7419 "00100000" // /* MW 5 */
+ 7420 "00000000" // /* MW 4 */
+ 7421 "11110000" // /* MW 3 */
+ 7422 "00101100" // /* MW 2 */
+ 7423 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544
+ 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7425 "00011001" // /* MW 3 */
+ 7426 "11111111" // /* MW 2 */
+ 7427 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142 first
+ 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7429 "00000000" // /* MW 3 */
+ 7430 "00101000" // /* MW 2 */
+ 7431 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+.delay_slot
+ 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7433 "00000001" // /* MW 5 */
+ 7434 "00000000" // /* MW 4 */
+ 7435 "00000000" // /* MW 3 */
+ 7436 "11111000" // /* MW 2 */
+ 7437 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7443 "00000000" // /* MW 1 */
+.delay_slot
+ 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7445 "10001011" // /* MW 3 */
+ 7446 "10000100" // /* MW 2 */
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 7447 "00001111" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7457 "00000001" // /* MW 5 */
+ 7458 "00100001" // /* MW 4 */
+ 7459 "00000000" // /* MW 3 */
+ 7460 "00000000" // /* MW 2 */
+ 7461 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7463 "11000000" // /* MW 3 */
+ 7464 "01010000" // /* MW 2 */
+ 7465 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7467 "10010000" // /* MW 3 */
+ 7468 "01100000" // /* MW 2 */
+ 7469 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7471 "00010001" // /* MW 3 */
+ 7472 "00000100" // /* MW 2 */
+ 7473 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7475 "00010001" // /* MW 3 */
+ 7476 "00010100" // /* MW 2 */
+ 7477 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7479 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7489 "00101110" // /* MW 3 */
+ 7490 "00011100" // /* MW 2 */
+ 7491 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7493 "00000001" // /* MW 5 */
+ 7494 "00000000" // /* MW 4 */
+ 7495 "00000000" // /* MW 3 */
+ 7496 "00001000" // /* MW 2 */
+ 7497 "00000000" // /* MW 1 */
+ 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7499 "00111101" // /* MW 3 */
+ 7500 "11111000" // /* MW 2 */
+ 7501 "00001111" // /* MW 1 */
+ 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7503 "11110101" // /* MW 3 */
+ 7504 "11111101" // /* MW 2 */
+ 7505 "00001111" // /* MW 1 */
+ 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7507 "00000000" // /* MW 1 */
+ 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7509 "00000000" // /* MW 1 */
+ 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7511 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7513 "00101001" // /* MW 3 */
+ 7514 "00011100" // /* MW 2 */
+ 7515 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7517 "00101110" // /* MW 3 */
+ 7518 "00011100" // /* MW 2 */
+ 7519 "00000001" // /* MW 1 */
+ 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7521 "00000000" // /* MW 1 */
+ 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7523 "00000000" // /* MW 1 */
+ 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7525 "00000000" // /* MW 1 */
+ 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7527 "00000000" // /* MW 1 */
+ 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7529 "00000000" // /* MW 1 */
+ 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7531 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7533 "00101001" // /* MW 3 */
+ 7534 "00011100" // /* MW 2 */
+ 7535 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7537 "00101110" // /* MW 3 */
+ 7538 "00000100" // /* MW 2 */
+ 7539 "00000001" // /* MW 1 */
+ 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7541 "00000000" // /* MW 1 */
+ 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7543 "00000000" // /* MW 1 */
+ 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7545 "00000000" // /* MW 1 */
+ 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7547 "00000000" // /* MW 1 */
+ 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7549 "00000000" // /* MW 1 */
+ 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7551 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7553 "00101001" // /* MW 3 */
+ 7554 "00011100" // /* MW 2 */
+ 7555 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7557 "00101110" // /* MW 3 */
+ 7558 "00010100" // /* MW 2 */
+ 7559 "00000001" // /* MW 1 */
+ 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7561 "00000000" // /* MW 1 */
+ 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7563 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */
+ 7565 "00000001" // /* MW 5 */
+ 7566 "00000000" // /* MW 4 */
+ 7567 "10010000" // /* MW 3 */
+ 7568 "00001110" // /* MW 2 */
+ 7569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7571 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7573 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7575 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7577 "00101001" // /* MW 3 */
+ 7578 "11011100" // /* MW 2 */
+ 7579 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.delay_slot
+ 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7581 "11000000" // /* MW 3 */
+ 7582 "11010000" // /* MW 2 */
+ 7583 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "add_impl.h" 146 29
+.return_address
+ 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7585 "00001000" // /* MW 9 */
+ 7586 "11000100" // /* MW 8 */
+ 7587 "00110011" // /* MW 7 */
+ 7588 "01101000" // /* MW 6 */
+ 7589 "00000000" // /* MW 5 */
+ 7590 "00000001" // /* MW 4 */
+ 7591 "00100000" // /* MW 3 */
+ 7592 "00000111" // /* MW 2 */
+ 7593 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29
+.src_ref 8 "add_impl.h" 147 37
+.src_ref 8 "add_impl.h" 147 39
+ 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7595 "01011000" // /* MW 9 */
+ 7596 "11111101" // /* MW 8 */
+ 7597 "00000111" // /* MW 7 */
+ 7598 "00001000" // /* MW 6 */
+ 7599 "10000000" // /* MW 5 */
+ 7600 "00000001" // /* MW 4 */
+ 7601 "10000000" // /* MW 3 */
+ 7602 "11100010" // /* MW 2 */
+ 7603 "00000001" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29 first
+.src_ref 8 "add_impl.h" 147 39
+ 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7605 "00000001" // /* MW 9 */
+ 7606 "10100000" // /* MW 8 */
+ 7607 "00000111" // /* MW 7 */
+ 7608 "10000000" // /* MW 6 */
+ 7609 "00010001" // /* MW 5 */
+ 7610 "00001010" // /* MW 4 */
+ 7611 "00100000" // /* MW 3 */
+ 7612 "10111110" // /* MW 2 */
+ 7613 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 50 first
+ 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7615 "01001010" // /* MW 3 */
+ 7616 "00000110" // /* MW 2 */
+ 7617 "00000000" // /* MW 1 */
+ 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7619 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7621 "00000000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 37
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7623 "00010111" // /* MW 3 */
+ 7624 "00000010" // /* MW 2 */
+ 7625 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7627 "00000000" // /* MW 3 */
+ 7628 "00101000" // /* MW 2 */
+ 7629 "00010000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7631 "00000101" // /* MW 3 */
+ 7632 "00100010" // /* MW 2 */
+ 7633 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7635 "00000001" // /* MW 5 */
+ 7636 "00000000" // /* MW 4 */
+ 7637 "00000000" // /* MW 3 */
+ 7638 "11111000" // /* MW 2 */
+ 7639 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7641 "00100111" // /* MW 3 */
+ 7642 "01110111" // /* MW 2 */
+ 7643 "00010100" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 39
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7645 "10000010" // /* MW 3 */
+ 7646 "00100001" // /* MW 2 */
+ 7647 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7649 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 81 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25
+.function_start
+ 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7665 "01111000" // /* MW 9 */
+ 7666 "01100000" // /* MW 8 */
+ 7667 "00001000" // /* MW 7 */
+ 7668 "11001000" // /* MW 6 */
+ 7669 "00010000" // /* MW 5 */
+ 7670 "00000000" // /* MW 4 */
+ 7671 "10000000" // /* MW 3 */
+ 7672 "10000000" // /* MW 2 */
+ 7673 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+ 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7675 "00001100" // /* MW 5 */
+ 7676 "11000000" // /* MW 4 */
+ 7677 "10100000" // /* MW 3 */
+ 7678 "00000000" // /* MW 2 */
+ 7679 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+ 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7681 "01001010" // /* MW 3 */
+ 7682 "00001000" // /* MW 2 */
+ 7683 "00000000" // /* MW 1 */
+ 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7685 "00000000" // /* MW 1 */
+ 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7687 "00000000" // /* MW 1 */
+ 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7689 "00000000" // /* MW 1 */
+ 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7691 "00000000" // /* MW 1 */
+ 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7693 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first
+ 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7695 "00000000" // /* MW 3 */
+ 7696 "00101000" // /* MW 2 */
+ 7697 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.delay_slot
+ 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7699 "00001000" // /* MW 3 */
+ 7700 "10000000" // /* MW 2 */
+ 7701 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first
+.delay_slot
+ 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7703 "00011101" // /* MW 3 */
+ 7704 "00000000" // /* MW 2 */
+ 7705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 23
+.delay_slot
+ 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7707 "11100000" // /* MW 5 */
+ 7708 "00001101" // /* MW 4 */
+ 7709 "00110001" // /* MW 3 */
+ 7710 "10000010" // /* MW 2 */
+ 7711 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.delay_slot
+ 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7713 "00011101" // /* MW 3 */
+ 7714 "11000100" // /* MW 2 */
+ 7715 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 23
+.delay_slot
+ 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7717 "01010001" // /* MW 3 */
+ 7718 "00000100" // /* MW 2 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7719 "00001000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_broadcasting.h" 76
+.src_ref 3 "elementwise_binary_broadcasting.h" 76 first
+.function_start
+ 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7729 "00000001" // /* MW 5 */
+ 7730 "00000000" // /* MW 4 */
+ 7731 "00000000" // /* MW 3 */
+ 7732 "00001000" // /* MW 2 */
+ 7733 "00000000" // /* MW 1 */
+ 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7735 "00111101" // /* MW 3 */
+ 7736 "11111100" // /* MW 2 */
+ 7737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first
+.no_stack_arguments
+ 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */
+ 7739 "00000001" // /* MW 5 */
+ 7740 "00000000" // /* MW 4 */
+ 7741 "10100000" // /* MW 3 */
+ 7742 "00001110" // /* MW 2 */
+ 7743 "00000000" // /* MW 1 */
+.delay_slot
+ 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7745 "10011101" // /* MW 3 */
+ 7746 "11111011" // /* MW 2 */
+ 7747 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+ 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7749 "11000000" // /* MW 3 */
+ 7750 "01100000" // /* MW 2 */
+ 7751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7757 "01100111" // /* MW 3 */
+ 7758 "00000001" // /* MW 2 */
+ 7759 "00000000" // /* MW 1 */
+.return_address
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7761 "10011001" // /* MW 3 */
+ 7762 "11111011" // /* MW 2 */
+ 7763 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7765 "00111001" // /* MW 3 */
+ 7766 "11111100" // /* MW 2 */
+ 7767 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first
+.tail_call
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */
+ 7769 "00000000" // /* MW 5 */
+ 7770 "00000000" // /* MW 4 */
+ 7771 "11111000" // /* MW 3 */
+ 7772 "00001110" // /* MW 2 */
+ 7773 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7775 "11000000" // /* MW 3 */
+ 7776 "01101110" // /* MW 2 */
+ 7777 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first
+.delay_slot
+ 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7779 "00000001" // /* MW 5 */
+ 7780 "00000000" // /* MW 4 */
+ 7781 "00000000" // /* MW 3 */
+ 7782 "11111000" // /* MW 2 */
+ 7783 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7789 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 89 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19
+.function_start
+ 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7793 "01010001" // /* MW 5 */
+ 7794 "00000000" // /* MW 4 */
+ 7795 "11010000" // /* MW 3 */
+ 7796 "10000010" // /* MW 2 */
+ 7797 "01100111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7799 "10000001" // /* MW 5 */
+ 7800 "11001101" // /* MW 4 */
+ 7801 "01011000" // /* MW 3 */
+ 7802 "00000101" // /* MW 2 */
+ 7803 "01100001" // /* MW 1 */
+ 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7805 "00000000" // /* MW 1 */
+ 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7807 "00000000" // /* MW 1 */
+ 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7809 "00000000" // /* MW 1 */
+ 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7811 "00000000" // /* MW 1 */
+ 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7813 "00000000" // /* MW 1 */
+ 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7815 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 12
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 35
+ 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */
+ 7817 "00000001" // /* MW 5 */
+ 7818 "01000000" // /* MW 4 */
+ 7819 "01100000" // /* MW 3 */
+ 7820 "00001111" // /* MW 2 */
+ 7821 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78
+.delay_slot
+ 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7823 "11101001" // /* MW 3 */
+ 7824 "11000100" // /* MW 2 */
+ 7825 "00010111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first
+.delay_slot
+ 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7827 "00101101" // /* MW 3 */
+ 7828 "00000000" // /* MW 2 */
+ 7829 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7835 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first
+ 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7837 "00110010" // /* MW 3 */
+ 7838 "00000100" // /* MW 2 */
+ 7839 "00000000" // /* MW 1 */
+ 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7841 "00000000" // /* MW 1 */
+ 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7843 "00000000" // /* MW 1 */
+ 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7845 "00000000" // /* MW 1 */
+ 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */
+ 7847 "00000000" // /* MW 5 */
+ 7848 "00000000" // /* MW 4 */
+ 7849 "01110000" // /* MW 3 */
+ 7850 "00001111" // /* MW 2 */
+ 7851 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7853 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7857 "01110010" // /* MW 3 */
+ 7858 "00000101" // /* MW 2 */
+ 7859 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7861 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7863 "00000000" // /* MW 9 */
+ 7864 "00000000" // /* MW 8 */
+ 7865 "00000000" // /* MW 7 */
+ 7866 "00000000" // /* MW 6 */
+ 7867 "00010011" // /* MW 5 */
+ 7868 "00000100" // /* MW 4 */
+ 7869 "11110000" // /* MW 3 */
+ 7870 "00101100" // /* MW 2 */
+ 7871 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80
+.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first
+ 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7873 "00110010" // /* MW 3 */
+ 7874 "00000100" // /* MW 2 */
+ 7875 "00000001" // /* MW 1 */
+ 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7877 "00000000" // /* MW 1 */
+ 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7879 "00000000" // /* MW 1 */
+ 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7881 "00000000" // /* MW 1 */
+ 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7883 "00000000" // /* MW 1 */
+ 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7885 "00000000" // /* MW 1 */
+ 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7887 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7889 "01110010" // /* MW 3 */
+ 7890 "00000101" // /* MW 2 */
+ 7891 "00011000" // /* MW 1 */
+ 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7893 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7895 "00000000" // /* MW 9 */
+ 7896 "00000000" // /* MW 8 */
+ 7897 "00000000" // /* MW 7 */
+ 7898 "00000000" // /* MW 6 */
+ 7899 "00010011" // /* MW 5 */
+ 7900 "00000100" // /* MW 4 */
+ 7901 "11110001" // /* MW 3 */
+ 7902 "00101100" // /* MW 2 */
+ 7903 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+ 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7905 "01001000" // /* MW 9 */
+ 7906 "00111111" // /* MW 8 */
+ 7907 "10111000" // /* MW 7 */
+ 7908 "10001010" // /* MW 6 */
+ 7909 "00000111" // /* MW 5 */
+ 7910 "00000000" // /* MW 4 */
+ 7911 "11010000" // /* MW 3 */
+ 7912 "10000000" // /* MW 2 */
+ 7913 "10001010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7915 "00010000" // /* MW 9 */
+ 7916 "10101000" // /* MW 8 */
+ 7917 "01111111" // /* MW 7 */
+ 7918 "00000100" // /* MW 6 */
+ 7919 "00000000" // /* MW 5 */
+ 7920 "00000000" // /* MW 4 */
+ 7921 "11010000" // /* MW 3 */
+ 7922 "10010000" // /* MW 2 */
+ 7923 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7925 "11100000" // /* MW 5 */
+ 7926 "11111110" // /* MW 4 */
+ 7927 "00010110" // /* MW 3 */
+ 7928 "00000000" // /* MW 2 */
+ 7929 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7931 "11010000" // /* MW 5 */
+ 7932 "11001000" // /* MW 4 */
+ 7933 "11001000" // /* MW 3 */
+ 7934 "00000111" // /* MW 2 */
+ 7935 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7937 "00100010" // /* MW 3 */
+ 7938 "00000100" // /* MW 2 */
+ 7939 "00000100" // /* MW 1 */
+ 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7941 "00000000" // /* MW 1 */
+ 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7945 "10101011" // /* MW 3 */
+ 7946 "00001000" // /* MW 2 */
+ 7947 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 189 20 first
+ 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7949 "00101011" // /* MW 3 */
+ 7950 "00101001" // /* MW 2 */
+ 7951 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+ 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7953 "00101011" // /* MW 3 */
+ 7954 "00001000" // /* MW 2 */
+ 7955 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7957 "00101011" // /* MW 3 */
+ 7958 "00101010" // /* MW 2 */
+ 7959 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7961 "00000000" // /* MW 5 */
+ 7962 "11110101" // /* MW 4 */
+ 7963 "01110000" // /* MW 3 */
+ 7964 "00010101" // /* MW 2 */
+ 7965 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7967 "00111101" // /* MW 7 */
+ 7968 "00101000" // /* MW 6 */
+ 7969 "00000011" // /* MW 5 */
+ 7970 "00000100" // /* MW 4 */
+ 7971 "01110000" // /* MW 3 */
+ 7972 "00100101" // /* MW 2 */
+ 7973 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7975 "00101011" // /* MW 3 */
+ 7976 "00001000" // /* MW 2 */
+ 7977 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7979 "00111101" // /* MW 7 */
+ 7980 "00010000" // /* MW 6 */
+ 7981 "00000100" // /* MW 5 */
+ 7982 "00000100" // /* MW 4 */
+ 7983 "01110000" // /* MW 3 */
+ 7984 "01000101" // /* MW 2 */
+ 7985 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7987 "10101011" // /* MW 3 */
+ 7988 "00001000" // /* MW 2 */
+ 7989 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7991 "00111101" // /* MW 7 */
+ 7992 "00101000" // /* MW 6 */
+ 7993 "00000011" // /* MW 5 */
+ 7994 "00000100" // /* MW 4 */
+ 7995 "01110000" // /* MW 3 */
+ 7996 "00100101" // /* MW 2 */
+ 7997 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7999 "00101011" // /* MW 3 */
+ 8000 "00001000" // /* MW 2 */
+ 8001 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8003 "00111101" // /* MW 13 */
+ 8004 "00010000" // /* MW 12 */
+ 8005 "00000100" // /* MW 11 */
+ 8006 "01010111" // /* MW 10 */
+ 8007 "00011010" // /* MW 9 */
+ 8008 "01000000" // /* MW 8 */
+ 8009 "00000000" // /* MW 7 */
+ 8010 "00000000" // /* MW 6 */
+ 8011 "01000110" // /* MW 5 */
+ 8012 "00111011" // /* MW 4 */
+ 8013 "01110100" // /* MW 3 */
+ 8014 "01000101" // /* MW 2 */
+ 8015 "00100101" // /* MW 1 */
+.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8017 "10101011" // /* MW 3 */
+ 8018 "00001000" // /* MW 2 */
+ 8019 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8021 "00111101" // /* MW 11 */
+ 8022 "00101000" // /* MW 10 */
+ 8023 "00000011" // /* MW 9 */
+ 8024 "10001110" // /* MW 8 */
+ 8025 "00010001" // /* MW 7 */
+ 8026 "00001111" // /* MW 6 */
+ 8027 "00100001" // /* MW 5 */
+ 8028 "00000000" // /* MW 4 */
+ 8029 "01110000" // /* MW 3 */
+ 8030 "00100101" // /* MW 2 */
+ 8031 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8033 "00000000" // /* MW 15 */
+ 8034 "00000000" // /* MW 14 */
+ 8035 "01111000" // /* MW 13 */
+ 8036 "10100101" // /* MW 12 */
+ 8037 "00000001" // /* MW 11 */
+ 8038 "00000000" // /* MW 10 */
+ 8039 "00000000" // /* MW 9 */
+ 8040 "00000000" // /* MW 8 */
+ 8041 "01011011" // /* MW 7 */
+ 8042 "00000001" // /* MW 6 */
+ 8043 "00100000" // /* MW 5 */
+ 8044 "00000000" // /* MW 4 */
+ 8045 "01110000" // /* MW 3 */
+ 8046 "00000101" // /* MW 2 */
+ 8047 "00000001" // /* MW 1 */
+.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8049 "10000001" // /* MW 15 */
+ 8050 "00100000" // /* MW 14 */
+ 8051 "01111000" // /* MW 13 */
+ 8052 "10100101" // /* MW 12 */
+ 8053 "00000001" // /* MW 11 */
+ 8054 "00000000" // /* MW 10 */
+ 8055 "00000000" // /* MW 9 */
+ 8056 "00000000" // /* MW 8 */
+ 8057 "10100011" // /* MW 7 */
+ 8058 "00011101" // /* MW 6 */
+ 8059 "00100010" // /* MW 5 */
+ 8060 "00000000" // /* MW 4 */
+ 8061 "01110000" // /* MW 3 */
+ 8062 "01000101" // /* MW 2 */
+ 8063 "00100101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8065 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8067 "00111101" // /* MW 7 */
+ 8068 "00101000" // /* MW 6 */
+ 8069 "00000011" // /* MW 5 */
+ 8070 "00000010" // /* MW 4 */
+ 8071 "01100000" // /* MW 3 */
+ 8072 "11000100" // /* MW 2 */
+ 8073 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8075 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8077 "00111101" // /* MW 7 */
+ 8078 "00010000" // /* MW 6 */
+ 8079 "00000100" // /* MW 5 */
+ 8080 "00000010" // /* MW 4 */
+ 8081 "01100000" // /* MW 3 */
+ 8082 "10110100" // /* MW 2 */
+ 8083 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8085 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8087 "00000000" // /* MW 5 */
+ 8088 "01010000" // /* MW 4 */
+ 8089 "01100000" // /* MW 3 */
+ 8090 "11000100" // /* MW 2 */
+ 8091 "01000011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8093 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8095 "10100011" // /* MW 3 */
+ 8096 "00011101" // /* MW 2 */
+ 8097 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8099 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8101 "00100011" // /* MW 3 */
+ 8102 "00011110" // /* MW 2 */
+ 8103 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8105 "00000000" // /* MW 1 */
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first
+.function_start
+ 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8113 "00000001" // /* MW 5 */
+ 8114 "00000000" // /* MW 4 */
+ 8115 "00000000" // /* MW 3 */
+ 8116 "00010000" // /* MW 2 */
+ 8117 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24
+ 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8119 "01110000" // /* MW 7 */
+ 8120 "01100000" // /* MW 6 */
+ 8121 "00001010" // /* MW 5 */
+ 8122 "00000010" // /* MW 4 */
+ 8123 "10110000" // /* MW 3 */
+ 8124 "10000111" // /* MW 2 */
+ 8125 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+ 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8127 "00000000" // /* MW 7 */
+ 8128 "00000011" // /* MW 6 */
+ 8129 "10110100" // /* MW 5 */
+ 8130 "00000001" // /* MW 4 */
+ 8131 "01100000" // /* MW 3 */
+ 8132 "10010001" // /* MW 2 */
+ 8133 "01010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+ 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8135 "10000001" // /* MW 5 */
+ 8136 "00100001" // /* MW 4 */
+ 8137 "01011000" // /* MW 3 */
+ 8138 "11101101" // /* MW 2 */
+ 8139 "01100101" // /* MW 1 */
+ 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8141 "11000001" // /* MW 5 */
+ 8142 "10101011" // /* MW 4 */
+ 8143 "01011000" // /* MW 3 */
+ 8144 "11001010" // /* MW 2 */
+ 8145 "01110011" // /* MW 1 */
+ 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8147 "11000000" // /* MW 3 */
+ 8148 "01101000" // /* MW 2 */
+ 8149 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+ 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8151 "00101011" // /* MW 3 */
+ 8152 "00000111" // /* MW 2 */
+ 8153 "00001000" // /* MW 1 */
+ 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8155 "01010111" // /* MW 3 */
+ 8156 "00000110" // /* MW 2 */
+ 8157 "00000000" // /* MW 1 */
+ 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8159 "00000000" // /* MW 1 */
+ 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8161 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first
+.no_stack_arguments
+ 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */
+ 8163 "00000001" // /* MW 5 */
+ 8164 "00000000" // /* MW 4 */
+ 8165 "00111000" // /* MW 3 */
+ 8166 "00001111" // /* MW 2 */
+ 8167 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.delay_slot
+ 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8169 "11000000" // /* MW 3 */
+ 8170 "01010000" // /* MW 2 */
+ 8171 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first
+.delay_slot
+ 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8175 "00010010" // /* MW 3 */
+ 8176 "00100101" // /* MW 2 */
+ 8177 "00010100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8179 "01000001" // /* MW 5 */
+ 8180 "11010010" // /* MW 4 */
+ 8181 "01000010" // /* MW 3 */
+ 8182 "00100000" // /* MW 2 */
+ 8183 "10001100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8185 "01110000" // /* MW 7 */
+ 8186 "00010000" // /* MW 6 */
+ 8187 "00110100" // /* MW 5 */
+ 8188 "00000000" // /* MW 4 */
+ 8189 "01100000" // /* MW 3 */
+ 8190 "00101011" // /* MW 2 */
+ 8191 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.return_address
+ 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8193 "00111001" // /* MW 3 */
+ 8194 "11111100" // /* MW 2 */
+ 8195 "00000111" // /* MW 1 */
+ 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8197 "00000000" // /* MW 1 */
+ 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8199 "00000000" // /* MW 1 */
+ 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8201 "00000000" // /* MW 1 */
+ 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8203 "00000000" // /* MW 1 */
+ 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8205 "00000000" // /* MW 1 */
+ 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8207 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first
+ 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8209 "00000000" // /* MW 3 */
+ 8210 "00101000" // /* MW 2 */
+ 8211 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.delay_slot
+ 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8213 "00000001" // /* MW 5 */
+ 8214 "00000000" // /* MW 4 */
+ 8215 "00000000" // /* MW 3 */
+ 8216 "11110000" // /* MW 2 */
+ 8217 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8219 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8221 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8223 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8225 "00000000" // /* MW 1 */
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 147 first
+.src_ref 7 "superkernels.cpp" 152 6
+.function_start
+ 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8241 "10000000" // /* MW 5 */
+ 8242 "11001000" // /* MW 4 */
+ 8243 "11000110" // /* MW 3 */
+ 8244 "00000111" // /* MW 2 */
+ 8245 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6 first
+ 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8247 "11000001" // /* MW 5 */
+ 8248 "10110101" // /* MW 4 */
+ 8249 "11011000" // /* MW 3 */
+ 8250 "11000010" // /* MW 2 */
+ 8251 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 147
+ 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8253 "00000001" // /* MW 5 */
+ 8254 "00000000" // /* MW 4 */
+ 8255 "00000000" // /* MW 3 */
+ 8256 "00001000" // /* MW 2 */
+ 8257 "00000000" // /* MW 1 */
+ 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8259 "01110000" // /* MW 7 */
+ 8260 "11010000" // /* MW 6 */
+ 8261 "00001011" // /* MW 5 */
+ 8262 "00000000" // /* MW 4 */
+ 8263 "10110000" // /* MW 3 */
+ 8264 "01100011" // /* MW 2 */
+ 8265 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+ 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8267 "00010001" // /* MW 9 */
+ 8268 "00101000" // /* MW 8 */
+ 8269 "00110010" // /* MW 7 */
+ 8270 "11110011" // /* MW 6 */
+ 8271 "00000001" // /* MW 5 */
+ 8272 "00000000" // /* MW 4 */
+ 8273 "10110000" // /* MW 3 */
+ 8274 "10000010" // /* MW 2 */
+ 8275 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8277 "11000000" // /* MW 3 */
+ 8278 "11010100" // /* MW 2 */
+ 8279 "00011011" // /* MW 1 */
+ 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8281 "00000000" // /* MW 1 */
+ 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8283 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6
+.src_ref 7 "superkernels.cpp" 152 16
+ 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */
+ 8285 "00000001" // /* MW 5 */
+ 8286 "01000000" // /* MW 4 */
+ 8287 "10000000" // /* MW 3 */
+ 8288 "00010000" // /* MW 2 */
+ 8289 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 22 first
+.delay_slot
+ 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8291 "10010000" // /* MW 3 */
+ 8292 "01100010" // /* MW 2 */
+ 8293 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 30
+.delay_slot
+ 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8295 "11111011" // /* MW 3 */
+ 8296 "01100011" // /* MW 2 */
+ 8297 "00010100" // /* MW 1 */
+.delay_slot
+ 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8299 "00111101" // /* MW 3 */
+ 8300 "11110100" // /* MW 2 */
+ 8301 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8303 "01110000" // /* MW 7 */
+ 8304 "01100000" // /* MW 6 */
+ 8305 "00110000" // /* MW 5 */
+ 8306 "00000011" // /* MW 4 */
+ 8307 "00110000" // /* MW 3 */
+ 8308 "11000110" // /* MW 2 */
+ 8309 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4
+.src_ref 7 "superkernels.cpp" 166 2
+.delay_slot
+ 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8311 "10000000" // /* MW 5 */
+ 8312 "11001001" // /* MW 4 */
+ 8313 "11000000" // /* MW 3 */
+ 8314 "00000111" // /* MW 2 */
+ 8315 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8317 "11010000" // /* MW 5 */
+ 8318 "11001000" // /* MW 4 */
+ 8319 "11000100" // /* MW 3 */
+ 8320 "00000111" // /* MW 2 */
+ 8321 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8323 "00010000" // /* MW 9 */
+ 8324 "00110010" // /* MW 8 */
+ 8325 "00110010" // /* MW 7 */
+ 8326 "11110001" // /* MW 6 */
+ 8327 "00000001" // /* MW 5 */
+ 8328 "00000000" // /* MW 4 */
+ 8329 "11100000" // /* MW 3 */
+ 8330 "11000000" // /* MW 2 */
+ 8331 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8333 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */
+ 8335 "00000001" // /* MW 5 */
+ 8336 "00000000" // /* MW 4 */
+ 8337 "00011000" // /* MW 3 */
+ 8338 "00001111" // /* MW 2 */
+ 8339 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8341 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8343 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8345 "00110001" // /* MW 3 */
+ 8346 "00100000" // /* MW 2 */
+ 8347 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8349 "00000101" // /* MW 3 */
+ 8350 "00100000" // /* MW 2 */
+ 8351 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8353 "00000000" // /* MW 15 */
+ 8354 "00000000" // /* MW 14 */
+ 8355 "01111000" // /* MW 13 */
+ 8356 "10100101" // /* MW 12 */
+ 8357 "00000001" // /* MW 11 */
+ 8358 "00000000" // /* MW 10 */
+ 8359 "00000000" // /* MW 9 */
+ 8360 "10000000" // /* MW 8 */
+ 8361 "00010001" // /* MW 7 */
+ 8362 "00000110" // /* MW 6 */
+ 8363 "00100010" // /* MW 5 */
+ 8364 "00000000" // /* MW 4 */
+ 8365 "11110000" // /* MW 3 */
+ 8366 "00101100" // /* MW 2 */
+ 8367 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18
+.return_address
+ 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8369 "10100000" // /* MW 5 */
+ 8370 "11001000" // /* MW 4 */
+ 8371 "11000100" // /* MW 3 */
+ 8372 "00000111" // /* MW 2 */
+ 8373 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18 first
+.src_ref 7 "superkernels.cpp" 159 65
+ 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8375 "00010000" // /* MW 9 */
+ 8376 "01100000" // /* MW 8 */
+ 8377 "00110010" // /* MW 7 */
+ 8378 "11110001" // /* MW 6 */
+ 8379 "00000001" // /* MW 5 */
+ 8380 "00000000" // /* MW 4 */
+ 8381 "11010000" // /* MW 3 */
+ 8382 "11000010" // /* MW 2 */
+ 8383 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51
+.src_ref 7 "superkernels.cpp" 159 65
+.src_ref 7 "superkernels.cpp" 166 2
+ 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8385 "00010000" // /* MW 9 */
+ 8386 "01100000" // /* MW 8 */
+ 8387 "00110010" // /* MW 7 */
+ 8388 "11110001" // /* MW 6 */
+ 8389 "00000001" // /* MW 5 */
+ 8390 "00000000" // /* MW 4 */
+ 8391 "11010000" // /* MW 3 */
+ 8392 "11000110" // /* MW 2 */
+ 8393 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51 first
+.src_ref 7 "superkernels.cpp" 159 16
+.src_ref 7 "superkernels.cpp" 164 47
+ 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8395 "00010000" // /* MW 9 */
+ 8396 "00101010" // /* MW 8 */
+ 8397 "10110010" // /* MW 7 */
+ 8398 "11110000" // /* MW 6 */
+ 8399 "00000001" // /* MW 5 */
+ 8400 "00000000" // /* MW 4 */
+ 8401 "01010000" // /* MW 3 */
+ 8402 "11001011" // /* MW 2 */
+ 8403 "01001010" // /* MW 1 */
+ 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8405 "00000000" // /* MW 1 */
+ 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8407 "00000000" // /* MW 1 */
+ 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */
+ 8409 "00000000" // /* MW 5 */
+ 8410 "00000000" // /* MW 4 */
+ 8411 "10001000" // /* MW 3 */
+ 8412 "00010000" // /* MW 2 */
+ 8413 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13
+.delay_slot
+ 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8415 "11000000" // /* MW 5 */
+ 8416 "11001000" // /* MW 4 */
+ 8417 "11000000" // /* MW 3 */
+ 8418 "00000111" // /* MW 2 */
+ 8419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8421 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 27 first
+.delay_slot
+ 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8423 "00001111" // /* MW 3 */
+ 8424 "01100001" // /* MW 2 */
+ 8425 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13 first
+.delay_slot
+ 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8427 "10100011" // /* MW 5 */
+ 8428 "00001100" // /* MW 4 */
+ 8429 "11110000" // /* MW 3 */
+ 8430 "00101100" // /* MW 2 */
+ 8431 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 16 first
+.delay_slot
+ 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8433 "00000000" // /* MW 15 */
+ 8434 "00000000" // /* MW 14 */
+ 8435 "01111000" // /* MW 13 */
+ 8436 "10100101" // /* MW 12 */
+ 8437 "00000001" // /* MW 11 */
+ 8438 "00000000" // /* MW 10 */
+ 8439 "00000000" // /* MW 9 */
+ 8440 "10000000" // /* MW 8 */
+ 8441 "00010001" // /* MW 7 */
+ 8442 "00000110" // /* MW 6 */
+ 8443 "00100001" // /* MW 5 */
+ 8444 "00000000" // /* MW 4 */
+ 8445 "11110000" // /* MW 3 */
+ 8446 "00101100" // /* MW 2 */
+ 8447 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 164 47
+.src_ref 7 "superkernels.cpp" 166 2
+ 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8449 "00000000" // /* MW 15 */
+ 8450 "00000000" // /* MW 14 */
+ 8451 "00010000" // /* MW 13 */
+ 8452 "00101010" // /* MW 12 */
+ 8453 "10110010" // /* MW 11 */
+ 8454 "11110000" // /* MW 10 */
+ 8455 "00000001" // /* MW 9 */
+ 8456 "00000000" // /* MW 8 */
+ 8457 "10001011" // /* MW 7 */
+ 8458 "10000000" // /* MW 6 */
+ 8459 "00100010" // /* MW 5 */
+ 8460 "00000000" // /* MW 4 */
+ 8461 "11110000" // /* MW 3 */
+ 8462 "00101100" // /* MW 2 */
+ 8463 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8465 "00000000" // /* MW 7 */
+ 8466 "11000011" // /* MW 6 */
+ 8467 "10110011" // /* MW 5 */
+ 8468 "00000011" // /* MW 4 */
+ 8469 "01100000" // /* MW 3 */
+ 8470 "10010001" // /* MW 2 */
+ 8471 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8473 "00010000" // /* MW 9 */
+ 8474 "00100000" // /* MW 8 */
+ 8475 "00110010" // /* MW 7 */
+ 8476 "11110000" // /* MW 6 */
+ 8477 "00000001" // /* MW 5 */
+ 8478 "00000000" // /* MW 4 */
+ 8479 "11010000" // /* MW 3 */
+ 8480 "11101110" // /* MW 2 */
+ 8481 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8483 "00010110" // /* MW 3 */
+ 8484 "11111110" // /* MW 2 */
+ 8485 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8487 "00110110" // /* MW 3 */
+ 8488 "11111110" // /* MW 2 */
+ 8489 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8491 "01010110" // /* MW 3 */
+ 8492 "01000110" // /* MW 2 */
+ 8493 "00000111" // /* MW 1 */
+ 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8495 "00000000" // /* MW 1 */
+ 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8497 "00000000" // /* MW 1 */
+ 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8499 "00000000" // /* MW 1 */
+ 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8501 "00000000" // /* MW 1 */
+ 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8503 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8505 "00000010" // /* MW 3 */
+ 8506 "01100001" // /* MW 2 */
+ 8507 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8509 "00010001" // /* MW 3 */
+ 8510 "00000110" // /* MW 2 */
+ 8511 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8513 "11111101" // /* MW 3 */
+ 8514 "11100000" // /* MW 2 */
+ 8515 "00010111" // /* MW 1 */
+ 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8517 "00000000" // /* MW 1 */
+ 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8519 "00000000" // /* MW 1 */
+ 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8521 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8523 "00001000" // /* MW 3 */
+ 8524 "10010011" // /* MW 2 */
+ 8525 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+ 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8527 "10000001" // /* MW 5 */
+ 8528 "10101101" // /* MW 4 */
+ 8529 "10100111" // /* MW 3 */
+ 8530 "00000000" // /* MW 2 */
+ 8531 "00000100" // /* MW 1 */
+ 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8533 "00000000" // /* MW 1 */
+ 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8535 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+ 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8537 "00110110" // /* MW 3 */
+ 8538 "00000110" // /* MW 2 */
+ 8539 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8541 "10000001" // /* MW 5 */
+ 8542 "11011101" // /* MW 4 */
+ 8543 "11011100" // /* MW 3 */
+ 8544 "11001010" // /* MW 2 */
+ 8545 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 47 first
+ 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8547 "01110110" // /* MW 3 */
+ 8548 "00000110" // /* MW 2 */
+ 8549 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8551 "10011110" // /* MW 3 */
+ 8552 "01011100" // /* MW 2 */
+ 8553 "00000111" // /* MW 1 */
+ 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 166 2 first
+.no_stack_arguments
+ 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */
+ 8557 "00000001" // /* MW 5 */
+ 8558 "00000000" // /* MW 4 */
+ 8559 "11011000" // /* MW 3 */
+ 8560 "00001111" // /* MW 2 */
+ 8561 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8563 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+.delay_slot
+ 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8565 "00000111" // /* MW 3 */
+ 8566 "01100010" // /* MW 2 */
+ 8567 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.delay_slot
+ 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8569 "00110001" // /* MW 3 */
+ 8570 "00000110" // /* MW 2 */
+ 8571 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45 first
+.delay_slot
+ 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8573 "00001101" // /* MW 3 */
+ 8574 "11100001" // /* MW 2 */
+ 8575 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+.delay_slot
+ 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8577 "00000000" // /* MW 15 */
+ 8578 "00000000" // /* MW 14 */
+ 8579 "10101000" // /* MW 13 */
+ 8580 "10100000" // /* MW 12 */
+ 8581 "00110100" // /* MW 11 */
+ 8582 "00000000" // /* MW 10 */
+ 8583 "00000000" // /* MW 9 */
+ 8584 "00000000" // /* MW 8 */
+ 8585 "01011011" // /* MW 7 */
+ 8586 "00000001" // /* MW 6 */
+ 8587 "00100000" // /* MW 5 */
+ 8588 "00000000" // /* MW 4 */
+ 8589 "11110000" // /* MW 3 */
+ 8590 "00101100" // /* MW 2 */
+ 8591 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+.src_ref 7 "superkernels.cpp" 169 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8593 "00010000" // /* MW 9 */
+ 8594 "00100000" // /* MW 8 */
+ 8595 "00110010" // /* MW 7 */
+ 8596 "11110011" // /* MW 6 */
+ 8597 "00000001" // /* MW 5 */
+ 8598 "00000000" // /* MW 4 */
+ 8599 "11010000" // /* MW 3 */
+ 8600 "11000110" // /* MW 2 */
+ 8601 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8603 "00000101" // /* MW 3 */
+ 8604 "00100000" // /* MW 2 */
+ 8605 "00010000" // /* MW 1 */
+ 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8607 "00000000" // /* MW 1 */
+ 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8609 "00000000" // /* MW 1 */
+ 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8611 "00000000" // /* MW 1 */
+ 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8613 "00000000" // /* MW 1 */
+ 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8617 "00001000" // /* MW 3 */
+ 8618 "01010001" // /* MW 2 */
+ 8619 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8621 "00010000" // /* MW 9 */
+ 8622 "00110000" // /* MW 8 */
+ 8623 "00110010" // /* MW 7 */
+ 8624 "11110001" // /* MW 6 */
+ 8625 "00000001" // /* MW 5 */
+ 8626 "00000000" // /* MW 4 */
+ 8627 "11010000" // /* MW 3 */
+ 8628 "11001110" // /* MW 2 */
+ 8629 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6 first
+ 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8631 "00110110" // /* MW 3 */
+ 8632 "00000110" // /* MW 2 */
+ 8633 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+ 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8635 "01010110" // /* MW 3 */
+ 8636 "00000110" // /* MW 2 */
+ 8637 "00000010" // /* MW 1 */
+ 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8639 "00000000" // /* MW 1 */
+ 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8641 "00000000" // /* MW 1 */
+ 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8643 "00000000" // /* MW 1 */
+ 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8645 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8647 "00110001" // /* MW 3 */
+ 8648 "00100001" // /* MW 2 */
+ 8649 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8651 "00010001" // /* MW 3 */
+ 8652 "11100110" // /* MW 2 */
+ 8653 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 16 first
+ 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8655 "00101000" // /* MW 3 */
+ 8656 "01100001" // /* MW 2 */
+ 8657 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+ 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */
+ 8659 "00000001" // /* MW 5 */
+ 8660 "01000000" // /* MW 4 */
+ 8661 "11111000" // /* MW 3 */
+ 8662 "00010000" // /* MW 2 */
+ 8663 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8665 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8671 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8673 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14
+ 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8675 "00000001" // /* MW 3 */
+ 8676 "00100000" // /* MW 2 */
+ 8677 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14 first
+ 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8679 "00000000" // /* MW 9 */
+ 8680 "00000000" // /* MW 8 */
+ 8681 "00000000" // /* MW 7 */
+ 8682 "10000000" // /* MW 6 */
+ 8683 "00010001" // /* MW 5 */
+ 8684 "00000110" // /* MW 4 */
+ 8685 "11110110" // /* MW 3 */
+ 8686 "00101100" // /* MW 2 */
+ 8687 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 171
+ 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8689 "00111001" // /* MW 3 */
+ 8690 "11110100" // /* MW 2 */
+ 8691 "00000111" // /* MW 1 */
+ 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8693 "00011001" // /* MW 3 */
+ 8694 "11111011" // /* MW 2 */
+ 8695 "00000111" // /* MW 1 */
+ 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8697 "00000000" // /* MW 1 */
+ 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8699 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8701 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8703 "11110001" // /* MW 3 */
+ 8704 "11111101" // /* MW 2 */
+ 8705 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8707 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8709 "00000000" // /* MW 3 */
+ 8710 "00101000" // /* MW 2 */
+ 8711 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8713 "10100000" // /* MW 3 */
+ 8714 "01100111" // /* MW 2 */
+ 8715 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171
+.delay_slot
+ 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8717 "00000001" // /* MW 5 */
+ 8718 "00000000" // /* MW 4 */
+ 8719 "00000000" // /* MW 3 */
+ 8720 "11111000" // /* MW 2 */
+ 8721 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8723 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 8727 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.src_ref 3 "elementwise_unary.h" 124 first
+.src_ref 3 "elementwise_unary.h" 126 24 first
+.function_start
+ 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8737 "00101110" // /* MW 3 */
+ 8738 "00011100" // /* MW 2 */
+ 8739 "00000001" // /* MW 1 */
+ 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8741 "00000000" // /* MW 1 */
+ 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8743 "00000000" // /* MW 1 */
+ 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8745 "00000000" // /* MW 1 */
+ 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8747 "00000000" // /* MW 1 */
+ 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8749 "00000000" // /* MW 1 */
+ 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8751 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 126 22 first
+ 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8753 "00101001" // /* MW 3 */
+ 8754 "00011100" // /* MW 2 */
+ 8755 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 24 first
+ 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8757 "00101110" // /* MW 3 */
+ 8758 "00011100" // /* MW 2 */
+ 8759 "00000001" // /* MW 1 */
+ 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8761 "00000000" // /* MW 1 */
+ 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8763 "00000000" // /* MW 1 */
+ 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8765 "00000000" // /* MW 1 */
+ 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8767 "00000000" // /* MW 1 */
+ 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8769 "00000000" // /* MW 1 */
+ 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8771 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 22
+ 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8773 "00101001" // /* MW 3 */
+ 8774 "00011100" // /* MW 2 */
+ 8775 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 24 first
+ 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8777 "00101110" // /* MW 3 */
+ 8778 "01101100" // /* MW 2 */
+ 8779 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8781 "00010010" // /* MW 3 */
+ 8782 "00000100" // /* MW 2 */
+ 8783 "00000001" // /* MW 1 */
+ 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8785 "00000000" // /* MW 1 */
+ 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8787 "00000000" // /* MW 1 */
+ 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8789 "00000000" // /* MW 1 */
+ 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8791 "00000000" // /* MW 1 */
+ 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8793 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 22 first
+ 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8795 "00101001" // /* MW 3 */
+ 8796 "01101100" // /* MW 2 */
+ 8797 "00001000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8799 "00010111" // /* MW 3 */
+ 8800 "00000100" // /* MW 2 */
+ 8801 "00000000" // /* MW 1 */
+ 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8803 "00000000" // /* MW 1 */
+ 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8805 "00000000" // /* MW 1 */
+ 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8807 "00000000" // /* MW 1 */
+ 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8809 "00000000" // /* MW 1 */
+ 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8811 "00000000" // /* MW 1 */
+ 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8813 "00000000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33 first
+ 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8815 "00010010" // /* MW 3 */
+ 8816 "00100100" // /* MW 2 */
+ 8817 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33
+ 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8819 "00010111" // /* MW 3 */
+ 8820 "00010100" // /* MW 2 */
+ 8821 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 130 4 first
+ 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8823 "00000000" // /* MW 3 */
+ 8824 "00101000" // /* MW 2 */
+ 8825 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8827 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8829 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0
+ 8835 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 136 first
+.src_ref 3 "elementwise_unary.h" 142 37
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 171 19
+.function_start
+ 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8849 "00010000" // /* MW 11 */
+ 8850 "10001000" // /* MW 10 */
+ 8851 "01111001" // /* MW 9 */
+ 8852 "00001000" // /* MW 8 */
+ 8853 "00000000" // /* MW 7 */
+ 8854 "00000000" // /* MW 6 */
+ 8855 "01101000" // /* MW 5 */
+ 8856 "00111010" // /* MW 4 */
+ 8857 "10000000" // /* MW 3 */
+ 8858 "11000010" // /* MW 2 */
+ 8859 "11111011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 142 78
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+ 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8861 "00010000" // /* MW 11 */
+ 8862 "10100000" // /* MW 10 */
+ 8863 "10111001" // /* MW 9 */
+ 8864 "00001001" // /* MW 8 */
+ 8865 "00000000" // /* MW 7 */
+ 8866 "00000000" // /* MW 6 */
+ 8867 "01101000" // /* MW 5 */
+ 8868 "00111001" // /* MW 4 */
+ 8869 "00000000" // /* MW 3 */
+ 8870 "01010001" // /* MW 2 */
+ 8871 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136
+ 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8873 "11000000" // /* MW 3 */
+ 8874 "00010100" // /* MW 2 */
+ 8875 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136 first
+ 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8877 "00010000" // /* MW 3 */
+ 8878 "01100000" // /* MW 2 */
+ 8879 "00011010" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 103 16 first
+ 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8881 "01010010" // /* MW 3 */
+ 8882 "00011100" // /* MW 2 */
+ 8883 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 142 37 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8885 "00010110" // /* MW 3 */
+ 8886 "00000000" // /* MW 2 */
+ 8887 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 8 "clip_impl.h" 104 16 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8889 "01101000" // /* MW 5 */
+ 8890 "00111010" // /* MW 4 */
+ 8891 "01010000" // /* MW 3 */
+ 8892 "10000110" // /* MW 2 */
+ 8893 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8895 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8897 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8899 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8901 "10110100" // /* MW 3 */
+ 8902 "00011100" // /* MW 2 */
+ 8903 "00111000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8905 "01110010" // /* MW 3 */
+ 8906 "00001001" // /* MW 2 */
+ 8907 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 142 78 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8909 "01111000" // /* MW 9 */
+ 8910 "00110110" // /* MW 8 */
+ 8911 "01010000" // /* MW 7 */
+ 8912 "11101101" // /* MW 6 */
+ 8913 "00011000" // /* MW 5 */
+ 8914 "00000001" // /* MW 4 */
+ 8915 "01101000" // /* MW 3 */
+ 8916 "00111010" // /* MW 2 */
+ 8917 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8919 "11111110" // /* MW 3 */
+ 8920 "01111000" // /* MW 2 */
+ 8921 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8923 "01110010" // /* MW 3 */
+ 8924 "10000101" // /* MW 2 */
+ 8925 "00011000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8927 "10101100" // /* MW 3 */
+ 8928 "10101000" // /* MW 2 */
+ 8929 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8931 "01100000" // /* MW 13 */
+ 8932 "00101011" // /* MW 12 */
+ 8933 "00000000" // /* MW 11 */
+ 8934 "11001111" // /* MW 10 */
+ 8935 "00000110" // /* MW 9 */
+ 8936 "00110001" // /* MW 8 */
+ 8937 "00000000" // /* MW 7 */
+ 8938 "00000000" // /* MW 6 */
+ 8939 "01101000" // /* MW 5 */
+ 8940 "00111001" // /* MW 4 */
+ 8941 "11110000" // /* MW 3 */
+ 8942 "00101100" // /* MW 2 */
+ 8943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8945 "00000000" // /* MW 15 */
+ 8946 "00000000" // /* MW 14 */
+ 8947 "01111000" // /* MW 13 */
+ 8948 "01010110" // /* MW 12 */
+ 8949 "11011000" // /* MW 11 */
+ 8950 "00000001" // /* MW 10 */
+ 8951 "00000000" // /* MW 9 */
+ 8952 "00000000" // /* MW 8 */
+ 8953 "11010011" // /* MW 7 */
+ 8954 "00011100" // /* MW 6 */
+ 8955 "00100001" // /* MW 5 */
+ 8956 "00000000" // /* MW 4 */
+ 8957 "11110000" // /* MW 3 */
+ 8958 "00101100" // /* MW 2 */
+ 8959 "00000000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8961 "00000000" // /* MW 15 */
+ 8962 "00000000" // /* MW 14 */
+ 8963 "01111000" // /* MW 13 */
+ 8964 "00110110" // /* MW 12 */
+ 8965 "01010000" // /* MW 11 */
+ 8966 "00000001" // /* MW 10 */
+ 8967 "00000000" // /* MW 9 */
+ 8968 "00000000" // /* MW 8 */
+ 8969 "01011011" // /* MW 7 */
+ 8970 "00000001" // /* MW 6 */
+ 8971 "00100000" // /* MW 5 */
+ 8972 "00000000" // /* MW 4 */
+ 8973 "11110000" // /* MW 3 */
+ 8974 "00101100" // /* MW 2 */
+ 8975 "00000000" // /* MW 1 */
+.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8977 "00000000" // /* MW 15 */
+ 8978 "00000000" // /* MW 14 */
+ 8979 "01111000" // /* MW 13 */
+ 8980 "01010110" // /* MW 12 */
+ 8981 "11010100" // /* MW 11 */
+ 8982 "00000000" // /* MW 10 */
+ 8983 "00000000" // /* MW 9 */
+ 8984 "00000000" // /* MW 8 */
+ 8985 "11010011" // /* MW 7 */
+ 8986 "00011101" // /* MW 6 */
+ 8987 "01101001" // /* MW 5 */
+ 8988 "00111010" // /* MW 4 */
+ 8989 "11110000" // /* MW 3 */
+ 8990 "00101100" // /* MW 2 */
+ 8991 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8993 "00000000" // /* MW 15 */
+ 8994 "00000000" // /* MW 14 */
+ 8995 "01111000" // /* MW 13 */
+ 8996 "00110110" // /* MW 12 */
+ 8997 "10001000" // /* MW 11 */
+ 8998 "00000001" // /* MW 10 */
+ 8999 "00000000" // /* MW 9 */
+ 9000 "00000000" // /* MW 8 */
+ 9001 "01011011" // /* MW 7 */
+ 9002 "00000001" // /* MW 6 */
+ 9003 "01101000" // /* MW 5 */
+ 9004 "00111001" // /* MW 4 */
+ 9005 "11110000" // /* MW 3 */
+ 9006 "00101100" // /* MW 2 */
+ 9007 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9009 "00000000" // /* MW 15 */
+ 9010 "00000000" // /* MW 14 */
+ 9011 "01111000" // /* MW 13 */
+ 9012 "01010110" // /* MW 12 */
+ 9013 "11011000" // /* MW 11 */
+ 9014 "00000001" // /* MW 10 */
+ 9015 "00000000" // /* MW 9 */
+ 9016 "00000000" // /* MW 8 */
+ 9017 "11010011" // /* MW 7 */
+ 9018 "00011100" // /* MW 6 */
+ 9019 "00100001" // /* MW 5 */
+ 9020 "00000000" // /* MW 4 */
+ 9021 "11110000" // /* MW 3 */
+ 9022 "00101100" // /* MW 2 */
+ 9023 "00000000" // /* MW 1 */
+.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176
+.src_ref 4 "max_min.hpp" 20 104 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9025 "00000000" // /* MW 15 */
+ 9026 "00000000" // /* MW 14 */
+ 9027 "01111000" // /* MW 13 */
+ 9028 "00110110" // /* MW 12 */
+ 9029 "01010000" // /* MW 11 */
+ 9030 "00000001" // /* MW 10 */
+ 9031 "00000000" // /* MW 9 */
+ 9032 "00000000" // /* MW 8 */
+ 9033 "01011011" // /* MW 7 */
+ 9034 "00000001" // /* MW 6 */
+ 9035 "00100000" // /* MW 5 */
+ 9036 "00000000" // /* MW 4 */
+ 9037 "11110000" // /* MW 3 */
+ 9038 "00101100" // /* MW 2 */
+ 9039 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9041 "01110000" // /* MW 7 */
+ 9042 "01010110" // /* MW 6 */
+ 9043 "11010100" // /* MW 5 */
+ 9044 "00000000" // /* MW 4 */
+ 9045 "01100000" // /* MW 3 */
+ 9046 "10111010" // /* MW 2 */
+ 9047 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9049 "01101100" // /* MW 3 */
+ 9050 "00010000" // /* MW 2 */
+ 9051 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+ 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9053 "01110000" // /* MW 7 */
+ 9054 "01010110" // /* MW 6 */
+ 9055 "11011000" // /* MW 5 */
+ 9056 "00000001" // /* MW 4 */
+ 9057 "01100000" // /* MW 3 */
+ 9058 "10011010" // /* MW 2 */
+ 9059 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 158 4 first
+ 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9061 "11011001" // /* MW 5 */
+ 9062 "01000000" // /* MW 4 */
+ 9063 "00000101" // /* MW 3 */
+ 9064 "00000000" // /* MW 2 */
+ 9065 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9067 "01110000" // /* MW 7 */
+ 9068 "01010110" // /* MW 6 */
+ 9069 "11010100" // /* MW 5 */
+ 9070 "00000000" // /* MW 4 */
+ 9071 "01100000" // /* MW 3 */
+ 9072 "10111010" // /* MW 2 */
+ 9073 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9075 "01101100" // /* MW 3 */
+ 9076 "00010000" // /* MW 2 */
+ 9077 "00011011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.delay_slot
+ 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9079 "10101100" // /* MW 3 */
+ 9080 "10110000" // /* MW 2 */
+ 9081 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.delay_slot
+ 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9083 "11010011" // /* MW 3 */
+ 9084 "00011100" // /* MW 2 */
+ 9085 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9087 "11010011" // /* MW 3 */
+ 9088 "00011101" // /* MW 2 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0
+ 9089 "00001001" // /* MW 1 */
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 176 first
+.src_ref 7 "superkernels.cpp" 181 6
+.function_start
+ 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9105 "10000000" // /* MW 5 */
+ 9106 "11001000" // /* MW 4 */
+ 9107 "11000110" // /* MW 3 */
+ 9108 "00000111" // /* MW 2 */
+ 9109 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6 first
+ 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9111 "11000001" // /* MW 5 */
+ 9112 "10110101" // /* MW 4 */
+ 9113 "11011000" // /* MW 3 */
+ 9114 "11000010" // /* MW 2 */
+ 9115 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 176
+ 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9117 "00000001" // /* MW 5 */
+ 9118 "00000000" // /* MW 4 */
+ 9119 "00000000" // /* MW 3 */
+ 9120 "00001000" // /* MW 2 */
+ 9121 "00000000" // /* MW 1 */
+ 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9123 "01110000" // /* MW 7 */
+ 9124 "11010000" // /* MW 6 */
+ 9125 "00001011" // /* MW 5 */
+ 9126 "00000000" // /* MW 4 */
+ 9127 "10110000" // /* MW 3 */
+ 9128 "01100011" // /* MW 2 */
+ 9129 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+ 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9131 "00010001" // /* MW 9 */
+ 9132 "00101000" // /* MW 8 */
+ 9133 "00110010" // /* MW 7 */
+ 9134 "11110011" // /* MW 6 */
+ 9135 "00000001" // /* MW 5 */
+ 9136 "00000000" // /* MW 4 */
+ 9137 "10110000" // /* MW 3 */
+ 9138 "10000010" // /* MW 2 */
+ 9139 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9141 "11000000" // /* MW 3 */
+ 9142 "11010100" // /* MW 2 */
+ 9143 "00011011" // /* MW 1 */
+ 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9145 "00000000" // /* MW 1 */
+ 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9147 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6
+.src_ref 7 "superkernels.cpp" 181 16
+ 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */
+ 9149 "00000001" // /* MW 5 */
+ 9150 "01000000" // /* MW 4 */
+ 9151 "00110000" // /* MW 3 */
+ 9152 "00010010" // /* MW 2 */
+ 9153 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 22 first
+.delay_slot
+ 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9155 "10010000" // /* MW 3 */
+ 9156 "01100010" // /* MW 2 */
+ 9157 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 30
+.delay_slot
+ 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9159 "11111011" // /* MW 3 */
+ 9160 "01100011" // /* MW 2 */
+ 9161 "00010100" // /* MW 1 */
+.delay_slot
+ 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9163 "00111101" // /* MW 3 */
+ 9164 "11110100" // /* MW 2 */
+ 9165 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9167 "01110000" // /* MW 7 */
+ 9168 "01100000" // /* MW 6 */
+ 9169 "00110000" // /* MW 5 */
+ 9170 "00000011" // /* MW 4 */
+ 9171 "00110000" // /* MW 3 */
+ 9172 "11000110" // /* MW 2 */
+ 9173 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4
+.src_ref 7 "superkernels.cpp" 195 2
+.delay_slot
+ 9174 "01000100" // MOVXM p0, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9175 "10000000" // /* MW 5 */
+ 9176 "11001011" // /* MW 4 */
+ 9177 "11000000" // /* MW 3 */
+ 9178 "00000111" // /* MW 2 */
+ 9179 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9181 "11010000" // /* MW 5 */
+ 9182 "11001000" // /* MW 4 */
+ 9183 "11000100" // /* MW 3 */
+ 9184 "00000111" // /* MW 2 */
+ 9185 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9187 "00010000" // /* MW 9 */
+ 9188 "00110010" // /* MW 8 */
+ 9189 "00110010" // /* MW 7 */
+ 9190 "11110001" // /* MW 6 */
+ 9191 "00000001" // /* MW 5 */
+ 9192 "00000000" // /* MW 4 */
+ 9193 "11100000" // /* MW 3 */
+ 9194 "11000000" // /* MW 2 */
+ 9195 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9197 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */
+ 9199 "00000001" // /* MW 5 */
+ 9200 "00000000" // /* MW 4 */
+ 9201 "00010000" // /* MW 3 */
+ 9202 "00010001" // /* MW 2 */
+ 9203 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9205 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9207 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9209 "00110001" // /* MW 3 */
+ 9210 "00100000" // /* MW 2 */
+ 9211 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9213 "00000101" // /* MW 3 */
+ 9214 "00100000" // /* MW 2 */
+ 9215 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9217 "00000000" // /* MW 15 */
+ 9218 "00000000" // /* MW 14 */
+ 9219 "01111000" // /* MW 13 */
+ 9220 "10100101" // /* MW 12 */
+ 9221 "00000001" // /* MW 11 */
+ 9222 "00000000" // /* MW 10 */
+ 9223 "00000000" // /* MW 9 */
+ 9224 "10000000" // /* MW 8 */
+ 9225 "00010001" // /* MW 7 */
+ 9226 "00000110" // /* MW 6 */
+ 9227 "00100010" // /* MW 5 */
+ 9228 "00000000" // /* MW 4 */
+ 9229 "11110000" // /* MW 3 */
+ 9230 "00101100" // /* MW 2 */
+ 9231 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18
+.return_address
+ 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9233 "10100000" // /* MW 5 */
+ 9234 "11001000" // /* MW 4 */
+ 9235 "11000100" // /* MW 3 */
+ 9236 "00000111" // /* MW 2 */
+ 9237 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18 first
+.src_ref 7 "superkernels.cpp" 188 43
+ 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9239 "00010000" // /* MW 9 */
+ 9240 "11100000" // /* MW 8 */
+ 9241 "00110010" // /* MW 7 */
+ 9242 "11110001" // /* MW 6 */
+ 9243 "00000001" // /* MW 5 */
+ 9244 "00000000" // /* MW 4 */
+ 9245 "11010000" // /* MW 3 */
+ 9246 "11000010" // /* MW 2 */
+ 9247 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29
+.src_ref 7 "superkernels.cpp" 188 43
+.src_ref 7 "superkernels.cpp" 195 2
+ 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9249 "00010000" // /* MW 9 */
+ 9250 "11100000" // /* MW 8 */
+ 9251 "00110010" // /* MW 7 */
+ 9252 "11110001" // /* MW 6 */
+ 9253 "00000001" // /* MW 5 */
+ 9254 "00000000" // /* MW 4 */
+ 9255 "11010000" // /* MW 3 */
+ 9256 "11000110" // /* MW 2 */
+ 9257 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29 first
+.src_ref 7 "superkernels.cpp" 188 16
+.src_ref 7 "superkernels.cpp" 193 47
+ 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9259 "00010000" // /* MW 9 */
+ 9260 "00101010" // /* MW 8 */
+ 9261 "10110010" // /* MW 7 */
+ 9262 "11110000" // /* MW 6 */
+ 9263 "00000001" // /* MW 5 */
+ 9264 "00000000" // /* MW 4 */
+ 9265 "01010000" // /* MW 3 */
+ 9266 "11001011" // /* MW 2 */
+ 9267 "01001000" // /* MW 1 */
+ 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9269 "00000000" // /* MW 1 */
+ 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9271 "00000000" // /* MW 1 */
+ 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */
+ 9273 "00000000" // /* MW 5 */
+ 9274 "00000000" // /* MW 4 */
+ 9275 "00111000" // /* MW 3 */
+ 9276 "00010010" // /* MW 2 */
+ 9277 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13
+.delay_slot
+ 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9279 "11000000" // /* MW 5 */
+ 9280 "11001000" // /* MW 4 */
+ 9281 "11000000" // /* MW 3 */
+ 9282 "00000111" // /* MW 2 */
+ 9283 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9285 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 27 first
+.delay_slot
+ 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9287 "00001111" // /* MW 3 */
+ 9288 "01100001" // /* MW 2 */
+ 9289 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13 first
+.delay_slot
+ 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9291 "10100011" // /* MW 5 */
+ 9292 "00001100" // /* MW 4 */
+ 9293 "11110000" // /* MW 3 */
+ 9294 "00101100" // /* MW 2 */
+ 9295 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 16 first
+.delay_slot
+ 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9297 "00000000" // /* MW 15 */
+ 9298 "00000000" // /* MW 14 */
+ 9299 "01111000" // /* MW 13 */
+ 9300 "10100101" // /* MW 12 */
+ 9301 "00000001" // /* MW 11 */
+ 9302 "00000000" // /* MW 10 */
+ 9303 "00000000" // /* MW 9 */
+ 9304 "10000000" // /* MW 8 */
+ 9305 "00010001" // /* MW 7 */
+ 9306 "00000110" // /* MW 6 */
+ 9307 "00100001" // /* MW 5 */
+ 9308 "00000000" // /* MW 4 */
+ 9309 "11110000" // /* MW 3 */
+ 9310 "00101100" // /* MW 2 */
+ 9311 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 193 47
+.src_ref 7 "superkernels.cpp" 195 2
+ 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9313 "00000000" // /* MW 15 */
+ 9314 "00000000" // /* MW 14 */
+ 9315 "00010000" // /* MW 13 */
+ 9316 "00101010" // /* MW 12 */
+ 9317 "10110010" // /* MW 11 */
+ 9318 "11110000" // /* MW 10 */
+ 9319 "00000001" // /* MW 9 */
+ 9320 "00000000" // /* MW 8 */
+ 9321 "10001011" // /* MW 7 */
+ 9322 "10000000" // /* MW 6 */
+ 9323 "00100010" // /* MW 5 */
+ 9324 "00000000" // /* MW 4 */
+ 9325 "11110000" // /* MW 3 */
+ 9326 "00101100" // /* MW 2 */
+ 9327 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9329 "00000000" // /* MW 7 */
+ 9330 "11000011" // /* MW 6 */
+ 9331 "10110011" // /* MW 5 */
+ 9332 "00000011" // /* MW 4 */
+ 9333 "01100000" // /* MW 3 */
+ 9334 "10010001" // /* MW 2 */
+ 9335 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9337 "00010000" // /* MW 9 */
+ 9338 "00100000" // /* MW 8 */
+ 9339 "00110010" // /* MW 7 */
+ 9340 "11110000" // /* MW 6 */
+ 9341 "00000001" // /* MW 5 */
+ 9342 "00000000" // /* MW 4 */
+ 9343 "11010000" // /* MW 3 */
+ 9344 "11101110" // /* MW 2 */
+ 9345 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9347 "00010110" // /* MW 3 */
+ 9348 "11111110" // /* MW 2 */
+ 9349 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9351 "00110110" // /* MW 3 */
+ 9352 "11111110" // /* MW 2 */
+ 9353 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9355 "01010110" // /* MW 3 */
+ 9356 "01000110" // /* MW 2 */
+ 9357 "00000111" // /* MW 1 */
+ 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9359 "00000000" // /* MW 1 */
+ 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9361 "00000000" // /* MW 1 */
+ 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9363 "00000000" // /* MW 1 */
+ 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9365 "00000000" // /* MW 1 */
+ 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9367 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9369 "00000010" // /* MW 3 */
+ 9370 "01100001" // /* MW 2 */
+ 9371 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9373 "00010001" // /* MW 3 */
+ 9374 "00000110" // /* MW 2 */
+ 9375 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9377 "11111101" // /* MW 3 */
+ 9378 "11100000" // /* MW 2 */
+ 9379 "00010111" // /* MW 1 */
+ 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9381 "00000000" // /* MW 1 */
+ 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9383 "00000000" // /* MW 1 */
+ 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9385 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9387 "00001000" // /* MW 3 */
+ 9388 "10010011" // /* MW 2 */
+ 9389 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+ 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9391 "10000001" // /* MW 5 */
+ 9392 "10101101" // /* MW 4 */
+ 9393 "10100111" // /* MW 3 */
+ 9394 "00000000" // /* MW 2 */
+ 9395 "00000100" // /* MW 1 */
+ 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9397 "00000000" // /* MW 1 */
+ 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9399 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+ 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9401 "00110110" // /* MW 3 */
+ 9402 "00000110" // /* MW 2 */
+ 9403 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9405 "10000001" // /* MW 5 */
+ 9406 "11011101" // /* MW 4 */
+ 9407 "11011100" // /* MW 3 */
+ 9408 "11001010" // /* MW 2 */
+ 9409 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 47 first
+ 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9411 "01110110" // /* MW 3 */
+ 9412 "00000110" // /* MW 2 */
+ 9413 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9415 "10011110" // /* MW 3 */
+ 9416 "01011100" // /* MW 2 */
+ 9417 "00000111" // /* MW 1 */
+ 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9419 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 195 2 first
+.no_stack_arguments
+ 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */
+ 9421 "00000001" // /* MW 5 */
+ 9422 "00000000" // /* MW 4 */
+ 9423 "01001000" // /* MW 3 */
+ 9424 "00010001" // /* MW 2 */
+ 9425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9427 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+.delay_slot
+ 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9429 "00000111" // /* MW 3 */
+ 9430 "01100010" // /* MW 2 */
+ 9431 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.delay_slot
+ 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9433 "00110001" // /* MW 3 */
+ 9434 "00000110" // /* MW 2 */
+ 9435 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45 first
+.delay_slot
+ 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9437 "00001101" // /* MW 3 */
+ 9438 "11100001" // /* MW 2 */
+ 9439 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+.delay_slot
+ 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9441 "00000000" // /* MW 15 */
+ 9442 "00000000" // /* MW 14 */
+ 9443 "10101000" // /* MW 13 */
+ 9444 "10100000" // /* MW 12 */
+ 9445 "00110100" // /* MW 11 */
+ 9446 "00000000" // /* MW 10 */
+ 9447 "00000000" // /* MW 9 */
+ 9448 "00000000" // /* MW 8 */
+ 9449 "01011011" // /* MW 7 */
+ 9450 "00000001" // /* MW 6 */
+ 9451 "00100000" // /* MW 5 */
+ 9452 "00000000" // /* MW 4 */
+ 9453 "11110000" // /* MW 3 */
+ 9454 "00101100" // /* MW 2 */
+ 9455 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+.src_ref 7 "superkernels.cpp" 198 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9457 "00010000" // /* MW 9 */
+ 9458 "00100000" // /* MW 8 */
+ 9459 "00110010" // /* MW 7 */
+ 9460 "11110011" // /* MW 6 */
+ 9461 "00000001" // /* MW 5 */
+ 9462 "00000000" // /* MW 4 */
+ 9463 "11010000" // /* MW 3 */
+ 9464 "11000110" // /* MW 2 */
+ 9465 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9467 "00000101" // /* MW 3 */
+ 9468 "00100000" // /* MW 2 */
+ 9469 "00010000" // /* MW 1 */
+ 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9471 "00000000" // /* MW 1 */
+ 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9473 "00000000" // /* MW 1 */
+ 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9475 "00000000" // /* MW 1 */
+ 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9477 "00000000" // /* MW 1 */
+ 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9479 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9481 "00001000" // /* MW 3 */
+ 9482 "01010001" // /* MW 2 */
+ 9483 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9485 "00010000" // /* MW 9 */
+ 9486 "00110000" // /* MW 8 */
+ 9487 "00110010" // /* MW 7 */
+ 9488 "11110001" // /* MW 6 */
+ 9489 "00000001" // /* MW 5 */
+ 9490 "00000000" // /* MW 4 */
+ 9491 "11010000" // /* MW 3 */
+ 9492 "11001110" // /* MW 2 */
+ 9493 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6 first
+ 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9495 "00110110" // /* MW 3 */
+ 9496 "00000110" // /* MW 2 */
+ 9497 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+ 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9499 "01010110" // /* MW 3 */
+ 9500 "00000110" // /* MW 2 */
+ 9501 "00000010" // /* MW 1 */
+ 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9503 "00000000" // /* MW 1 */
+ 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9505 "00000000" // /* MW 1 */
+ 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9507 "00000000" // /* MW 1 */
+ 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9509 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9511 "00110001" // /* MW 3 */
+ 9512 "00100001" // /* MW 2 */
+ 9513 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9515 "00010001" // /* MW 3 */
+ 9516 "11100110" // /* MW 2 */
+ 9517 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 16 first
+ 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9519 "00101000" // /* MW 3 */
+ 9520 "01100001" // /* MW 2 */
+ 9521 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+ 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */
+ 9523 "00000001" // /* MW 5 */
+ 9524 "01000000" // /* MW 4 */
+ 9525 "10101000" // /* MW 3 */
+ 9526 "00010010" // /* MW 2 */
+ 9527 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9535 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9537 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14
+ 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9539 "00000001" // /* MW 3 */
+ 9540 "00100000" // /* MW 2 */
+ 9541 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14 first
+ 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9543 "00000000" // /* MW 9 */
+ 9544 "00000000" // /* MW 8 */
+ 9545 "00000000" // /* MW 7 */
+ 9546 "10000000" // /* MW 6 */
+ 9547 "00010001" // /* MW 5 */
+ 9548 "00000110" // /* MW 4 */
+ 9549 "11110110" // /* MW 3 */
+ 9550 "00101100" // /* MW 2 */
+ 9551 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 200
+ 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9553 "00111001" // /* MW 3 */
+ 9554 "11110100" // /* MW 2 */
+ 9555 "00000111" // /* MW 1 */
+ 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9557 "00011001" // /* MW 3 */
+ 9558 "11111011" // /* MW 2 */
+ 9559 "00000111" // /* MW 1 */
+ 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9561 "00000000" // /* MW 1 */
+ 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9563 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9565 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9567 "11110001" // /* MW 3 */
+ 9568 "11111101" // /* MW 2 */
+ 9569 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9571 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9573 "00000000" // /* MW 3 */
+ 9574 "00101000" // /* MW 2 */
+ 9575 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9577 "10100000" // /* MW 3 */
+ 9578 "01100111" // /* MW 2 */
+ 9579 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200
+.delay_slot
+ 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9581 "00000001" // /* MW 5 */
+ 9582 "00000000" // /* MW 4 */
+ 9583 "00000000" // /* MW 3 */
+ 9584 "11111000" // /* MW 2 */
+ 9585 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9587 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9589 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 9591 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 205 first
+.src_ref 3 "elementwise_binary_shared.h" 211 24 first
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.function_start
+ 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9601 "01011000" // /* MW 9 */
+ 9602 "00000000" // /* MW 8 */
+ 9603 "00001000" // /* MW 7 */
+ 9604 "00001011" // /* MW 6 */
+ 9605 "00100000" // /* MW 5 */
+ 9606 "00001000" // /* MW 4 */
+ 9607 "11010000" // /* MW 3 */
+ 9608 "10000101" // /* MW 2 */
+ 9609 "00100011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+ 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9611 "00000001" // /* MW 3 */
+ 9612 "10000000" // /* MW 2 */
+ 9613 "00010111" // /* MW 1 */
+ 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9615 "00000000" // /* MW 1 */
+ 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9617 "00000000" // /* MW 1 */
+ 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9619 "00000000" // /* MW 1 */
+ 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9621 "00000000" // /* MW 1 */
+ 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9623 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 211 22 first
+ 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9625 "00101001" // /* MW 3 */
+ 9626 "00011100" // /* MW 2 */
+ 9627 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 24 first
+ 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9629 "00101110" // /* MW 3 */
+ 9630 "00011100" // /* MW 2 */
+ 9631 "00000001" // /* MW 1 */
+ 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9633 "00000000" // /* MW 1 */
+ 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9635 "00000000" // /* MW 1 */
+ 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9637 "00000000" // /* MW 1 */
+ 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9639 "00000000" // /* MW 1 */
+ 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9641 "00000000" // /* MW 1 */
+ 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9643 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 22
+ 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9645 "00101001" // /* MW 3 */
+ 9646 "00011100" // /* MW 2 */
+ 9647 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 24 first
+ 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9649 "00101110" // /* MW 3 */
+ 9650 "00000100" // /* MW 2 */
+ 9651 "00000001" // /* MW 1 */
+ 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9653 "00000000" // /* MW 1 */
+ 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9655 "00000000" // /* MW 1 */
+ 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9657 "00000000" // /* MW 1 */
+ 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9659 "00000000" // /* MW 1 */
+ 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9661 "00000000" // /* MW 1 */
+ 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9663 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 22
+ 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9665 "00101001" // /* MW 3 */
+ 9666 "00011100" // /* MW 2 */
+ 9667 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 24 first
+ 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9669 "01110110" // /* MW 3 */
+ 9670 "00010100" // /* MW 2 */
+ 9671 "00000001" // /* MW 1 */
+ 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9673 "00000000" // /* MW 1 */
+ 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9675 "00000000" // /* MW 1 */
+ 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9677 "00000000" // /* MW 1 */
+ 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9679 "00000000" // /* MW 1 */
+ 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9681 "00000000" // /* MW 1 */
+ 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9683 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 22
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9685 "01110001" // /* MW 3 */
+ 9686 "01001100" // /* MW 2 */
+ 9687 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 34 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9689 "00010111" // /* MW 3 */
+ 9690 "00000100" // /* MW 2 */
+ 9691 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 217 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9693 "00000000" // /* MW 3 */
+ 9694 "00101000" // /* MW 2 */
+ 9695 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9697 "00000000" // /* MW 5 */
+ 9698 "10111110" // /* MW 4 */
+ 9699 "11110000" // /* MW 3 */
+ 9700 "00000000" // /* MW 2 */
+ 9701 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9703 "00010100" // /* MW 3 */
+ 9704 "11000010" // /* MW 2 */
+ 9705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9707 "00100111" // /* MW 3 */
+ 9708 "01110110" // /* MW 2 */
+ 9709 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9711 "10000010" // /* MW 3 */
+ 9712 "00000001" // /* MW 2 */
+ 9713 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9715 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 219
+.src_ref 3 "elementwise_binary_shared.h" 219 first
+.function_start
+ 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9729 "00000001" // /* MW 5 */
+ 9730 "00000000" // /* MW 4 */
+ 9731 "00000000" // /* MW 3 */
+ 9732 "00001000" // /* MW 2 */
+ 9733 "00000000" // /* MW 1 */
+ 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9735 "00111101" // /* MW 3 */
+ 9736 "11111000" // /* MW 2 */
+ 9737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8 first
+.no_stack_arguments
+ 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */
+ 9739 "00000001" // /* MW 5 */
+ 9740 "00000000" // /* MW 4 */
+ 9741 "11000000" // /* MW 3 */
+ 9742 "00010010" // /* MW 2 */
+ 9743 "00000000" // /* MW 1 */
+.delay_slot
+ 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9745 "10011101" // /* MW 3 */
+ 9746 "11111111" // /* MW 2 */
+ 9747 "00001111" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+ 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9749 "11000000" // /* MW 3 */
+ 9750 "01100000" // /* MW 2 */
+ 9751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9757 "01100111" // /* MW 3 */
+ 9758 "00000001" // /* MW 2 */
+ 9759 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.return_address
+ 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9761 "00111001" // /* MW 3 */
+ 9762 "11111000" // /* MW 2 */
+ 9763 "00000111" // /* MW 1 */
+ 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9765 "00000000" // /* MW 1 */
+ 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9767 "00000000" // /* MW 1 */
+ 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9769 "00000000" // /* MW 1 */
+ 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9771 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9773 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9775 "10011001" // /* MW 3 */
+ 9776 "11111111" // /* MW 2 */
+ 9777 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9779 "00000000" // /* MW 3 */
+ 9780 "00101000" // /* MW 2 */
+ 9781 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9783 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9787 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9789 "00001001" // /* MW 3 */
+ 9790 "00100000" // /* MW 2 */
+ 9791 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.src_ref 8 "mul_impl.h" 193 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9793 "01110001" // /* MW 9 */
+ 9794 "00000000" // /* MW 8 */
+ 9795 "00000000" // /* MW 7 */
+ 9796 "00000000" // /* MW 6 */
+ 9797 "11111110" // /* MW 5 */
+ 9798 "00111111" // /* MW 4 */
+ 9799 "00110000" // /* MW 3 */
+ 9800 "11000010" // /* MW 2 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9801 "11101000" // /* MW 1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0
+.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.src_ref 3 "elementwise_binary_shared.h" 107 first
+.src_ref 3 "elementwise_binary_shared.h" 119 37
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.function_start
+ 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9809 "11000000" // /* MW 3 */
+ 9810 "00010110" // /* MW 2 */
+ 9811 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+ 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9813 "00000111" // /* MW 3 */
+ 9814 "01100000" // /* MW 2 */
+ 9815 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 122 22 first
+ 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9817 "01010010" // /* MW 3 */
+ 9818 "00011100" // /* MW 2 */
+ 9819 "00000011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 15 first
+ 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9821 "10010110" // /* MW 3 */
+ 9822 "00000100" // /* MW 2 */
+ 9823 "00000011" // /* MW 1 */
+ 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9825 "00000000" // /* MW 1 */
+ 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9827 "00000000" // /* MW 1 */
+ 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9829 "00000000" // /* MW 1 */
+ 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9831 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9833 "00001001" // /* MW 3 */
+ 9834 "00000110" // /* MW 2 */
+ 9835 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 107
+ 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9837 "00000001" // /* MW 5 */
+ 9838 "00000000" // /* MW 4 */
+ 9839 "00000000" // /* MW 3 */
+ 9840 "00010000" // /* MW 2 */
+ 9841 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9843 "01001100" // /* MW 3 */
+ 9844 "11000110" // /* MW 2 */
+ 9845 "00010000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25
+.src_ref 3 "elementwise_binary_shared.h" 124 8
+ 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */
+ 9847 "01100000" // /* MW 9 */
+ 9848 "00000000" // /* MW 8 */
+ 9849 "00010000" // /* MW 7 */
+ 9850 "11100010" // /* MW 6 */
+ 9851 "00000100" // /* MW 5 */
+ 9852 "00000110" // /* MW 4 */
+ 9853 "00000000" // /* MW 3 */
+ 9854 "00000001" // /* MW 2 */
+ 9855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25 first
+.delay_slot
+ 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9857 "01110010" // /* MW 3 */
+ 9858 "00000101" // /* MW 2 */
+ 9859 "00011000" // /* MW 1 */
+.delay_slot
+ 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9861 "11000000" // /* MW 3 */
+ 9862 "01011110" // /* MW 2 */
+ 9863 "00011000" // /* MW 1 */
+.delay_slot
+ 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9865 "11100000" // /* MW 3 */
+ 9866 "01100101" // /* MW 2 */
+ 9867 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9869 "10000001" // /* MW 5 */
+ 9870 "11011101" // /* MW 4 */
+ 9871 "00001010" // /* MW 3 */
+ 9872 "11110010" // /* MW 2 */
+ 9873 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+.delay_slot
+ 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9875 "00010011" // /* MW 3 */
+ 9876 "00000100" // /* MW 2 */
+ 9877 "00001111" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+ 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9879 "01110010" // /* MW 9 */
+ 9880 "10111001" // /* MW 8 */
+ 9881 "00000100" // /* MW 7 */
+ 9882 "00000000" // /* MW 6 */
+ 9883 "00001011" // /* MW 5 */
+ 9884 "10000000" // /* MW 4 */
+ 9885 "10000100" // /* MW 3 */
+ 9886 "10000010" // /* MW 2 */
+ 9887 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 126 34 first
+.src_ref 3 "elementwise_binary_shared.h" 131 19 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9889 "00000001" // /* MW 5 */
+ 9890 "00000001" // /* MW 4 */
+ 9891 "01010100" // /* MW 3 */
+ 9892 "00000001" // /* MW 2 */
+ 9893 "10000000" // /* MW 1 */
+ 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9895 "00000000" // /* MW 1 */
+ 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9897 "00000000" // /* MW 1 */
+ 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9899 "00000000" // /* MW 1 */
+ 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9901 "00000000" // /* MW 1 */
+ 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9903 "00000000" // /* MW 1 */
+ 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9905 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 131 12
+.src_ref 3 "elementwise_binary_shared.h" 131 35
+ 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */
+ 9907 "00000001" // /* MW 5 */
+ 9908 "01000000" // /* MW 4 */
+ 9909 "01110000" // /* MW 3 */
+ 9910 "00010011" // /* MW 2 */
+ 9911 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9913 "00000000" // /* MW 3 */
+ 9914 "00000000" // /* MW 2 */
+ 9915 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9917 "11010000" // /* MW 5 */
+ 9918 "11001000" // /* MW 4 */
+ 9919 "11001000" // /* MW 3 */
+ 9920 "00000111" // /* MW 2 */
+ 9921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9927 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */
+ 9929 "00100000" // /* MW 9 */
+ 9930 "00000000" // /* MW 8 */
+ 9931 "00000000" // /* MW 7 */
+ 9932 "11011110" // /* MW 6 */
+ 9933 "00000100" // /* MW 5 */
+ 9934 "00000000" // /* MW 4 */
+ 9935 "10000000" // /* MW 3 */
+ 9936 "00000100" // /* MW 2 */
+ 9937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9945 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9947 "00100110" // /* MW 5 */
+ 9948 "00001000" // /* MW 4 */
+ 9949 "11110000" // /* MW 3 */
+ 9950 "00101100" // /* MW 2 */
+ 9951 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9953 "10000000" // /* MW 3 */
+ 9954 "00000000" // /* MW 2 */
+ 9955 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9957 "01010000" // /* MW 11 */
+ 9958 "00000000" // /* MW 10 */
+ 9959 "00000000" // /* MW 9 */
+ 9960 "00000001" // /* MW 8 */
+ 9961 "00010011" // /* MW 7 */
+ 9962 "00000100" // /* MW 6 */
+ 9963 "00100001" // /* MW 5 */
+ 9964 "00000000" // /* MW 4 */
+ 9965 "11110000" // /* MW 3 */
+ 9966 "00101100" // /* MW 2 */
+ 9967 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160
+ 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */
+ 9969 "00000000" // /* MW 5 */
+ 9970 "00000000" // /* MW 4 */
+ 9971 "11001000" // /* MW 3 */
+ 9972 "00010011" // /* MW 2 */
+ 9973 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9975 "01110000" // /* MW 7 */
+ 9976 "01100000" // /* MW 6 */
+ 9977 "10110000" // /* MW 5 */
+ 9978 "00000011" // /* MW 4 */
+ 9979 "01100000" // /* MW 3 */
+ 9980 "10010001" // /* MW 2 */
+ 9981 "00010011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9983 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9985 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9987 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9989 "10000001" // /* MW 11 */
+ 9990 "10101101" // /* MW 10 */
+ 9991 "00000000" // /* MW 9 */
+ 9992 "00000000" // /* MW 8 */
+ 9993 "00000000" // /* MW 7 */
+ 9994 "00000000" // /* MW 6 */
+ 9995 "00100000" // /* MW 5 */
+ 9996 "00000000" // /* MW 4 */
+ 9997 "11110000" // /* MW 3 */
+ 9998 "00101100" // /* MW 2 */
+ 9999 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192
+.src_ref 3 "elementwise_binary_shared.h" 150 97
+ 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10001 "00001101" // /* MW 3 */
+ 10002 "00000100" // /* MW 2 */
+ 10003 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 97 first
+ 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10005 "01000111" // /* MW 3 */
+ 10006 "10000100" // /* MW 2 */
+ 10007 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */
+ 10009 "00000001" // /* MW 5 */
+ 10010 "01000000" // /* MW 4 */
+ 10011 "10100000" // /* MW 3 */
+ 10012 "00010011" // /* MW 2 */
+ 10013 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10015 "00000000" // /* MW 5 */
+ 10016 "00100000" // /* MW 4 */
+ 10017 "00000000" // /* MW 3 */
+ 10018 "10000000" // /* MW 2 */
+ 10019 "00111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10021 "11010000" // /* MW 5 */
+ 10022 "11001000" // /* MW 4 */
+ 10023 "11001000" // /* MW 3 */
+ 10024 "00000111" // /* MW 2 */
+ 10025 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10027 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10029 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10031 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10033 "00000000" // /* MW 15 */
+ 10034 "00000000" // /* MW 14 */
+ 10035 "00010000" // /* MW 13 */
+ 10036 "00000000" // /* MW 12 */
+ 10037 "00001000" // /* MW 11 */
+ 10038 "00000000" // /* MW 10 */
+ 10039 "11100000" // /* MW 9 */
+ 10040 "00101111" // /* MW 8 */
+ 10041 "01011011" // /* MW 7 */
+ 10042 "00000001" // /* MW 6 */
+ 10043 "00100000" // /* MW 5 */
+ 10044 "00000000" // /* MW 4 */
+ 10045 "11110000" // /* MW 3 */
+ 10046 "00101100" // /* MW 2 */
+ 10047 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10049 "01011000" // /* MW 9 */
+ 10050 "10111110" // /* MW 8 */
+ 10051 "01000111" // /* MW 7 */
+ 10052 "00000000" // /* MW 6 */
+ 10053 "11010010" // /* MW 5 */
+ 10054 "00000010" // /* MW 4 */
+ 10055 "01010000" // /* MW 3 */
+ 10056 "10000000" // /* MW 2 */
+ 10057 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10059 "10000000" // /* MW 3 */
+ 10060 "00000000" // /* MW 2 */
+ 10061 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10063 "00000000" // /* MW 3 */
+ 10064 "00000000" // /* MW 2 */
+ 10065 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10067 "10000000" // /* MW 3 */
+ 10068 "00000000" // /* MW 2 */
+ 10069 "00011010" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10071 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10073 "00010001" // /* MW 3 */
+ 10074 "00000000" // /* MW 2 */
+ 10075 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10077 "00100101" // /* MW 5 */
+ 10078 "00000001" // /* MW 4 */
+ 10079 "11100010" // /* MW 3 */
+ 10080 "00000010" // /* MW 2 */
+ 10081 "10100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10083 "10000000" // /* MW 3 */
+ 10084 "00111010" // /* MW 2 */
+ 10085 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10087 "10010110" // /* MW 3 */
+ 10088 "01000000" // /* MW 2 */
+ 10089 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10091 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10093 "00000001" // /* MW 3 */
+ 10094 "00000001" // /* MW 2 */
+ 10095 "00011000" // /* MW 1 */
+ 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10097 "00000000" // /* MW 1 */
+ 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10099 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10101 "00010010" // /* MW 3 */
+ 10102 "00000000" // /* MW 2 */
+ 10103 "00000101" // /* MW 1 */
+ 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10105 "00000000" // /* MW 1 */
+ 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10107 "00000000" // /* MW 1 */
+ 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10109 "00000000" // /* MW 1 */
+ 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10111 "00000000" // /* MW 1 */
+ 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10113 "00000000" // /* MW 1 */
+ 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10115 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10117 "01110010" // /* MW 3 */
+ 10118 "00000001" // /* MW 2 */
+ 10119 "00011000" // /* MW 1 */
+ 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10121 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10123 "01100110" // /* MW 5 */
+ 10124 "11111000" // /* MW 4 */
+ 10125 "11111111" // /* MW 3 */
+ 10126 "00101100" // /* MW 2 */
+ 10127 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 166 4 first
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+ 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10129 "00010000" // /* MW 11 */
+ 10130 "00000000" // /* MW 10 */
+ 10131 "01111100" // /* MW 9 */
+ 10132 "00001000" // /* MW 8 */
+ 10133 "00000000" // /* MW 7 */
+ 10134 "00000000" // /* MW 6 */
+ 10135 "11101000" // /* MW 5 */
+ 10136 "01010000" // /* MW 4 */
+ 10137 "11011110" // /* MW 3 */
+ 10138 "10001010" // /* MW 2 */
+ 10139 "01111000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10141 "00010000" // /* MW 11 */
+ 10142 "00011000" // /* MW 10 */
+ 10143 "10111100" // /* MW 9 */
+ 10144 "00001001" // /* MW 8 */
+ 10145 "00000000" // /* MW 7 */
+ 10146 "00000000" // /* MW 6 */
+ 10147 "01101000" // /* MW 5 */
+ 10148 "10010000" // /* MW 4 */
+ 10149 "00000010" // /* MW 3 */
+ 10150 "01100011" // /* MW 2 */
+ 10151 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 177 44
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10153 "11110001" // /* MW 7 */
+ 10154 "00000000" // /* MW 6 */
+ 10155 "11101000" // /* MW 5 */
+ 10156 "01010000" // /* MW 4 */
+ 10157 "01111110" // /* MW 3 */
+ 10158 "00000101" // /* MW 2 */
+ 10159 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10161 "01101000" // /* MW 5 */
+ 10162 "10010000" // /* MW 4 */
+ 10163 "01010010" // /* MW 3 */
+ 10164 "10010000" // /* MW 2 */
+ 10165 "10000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10167 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10169 "00101011" // /* MW 3 */
+ 10170 "00001000" // /* MW 2 */
+ 10171 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10175 "00111101" // /* MW 3 */
+ 10176 "10000100" // /* MW 2 */
+ 10177 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10179 "00000001" // /* MW 7 */
+ 10180 "00000010" // /* MW 6 */
+ 10181 "00000001" // /* MW 5 */
+ 10182 "10000110" // /* MW 4 */
+ 10183 "01111110" // /* MW 3 */
+ 10184 "01110001" // /* MW 2 */
+ 10185 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10187 "11101000" // /* MW 5 */
+ 10188 "01010000" // /* MW 4 */
+ 10189 "01111110" // /* MW 3 */
+ 10190 "00000011" // /* MW 2 */
+ 10191 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10193 "00000000" // /* MW 15 */
+ 10194 "00000000" // /* MW 14 */
+ 10195 "01111000" // /* MW 13 */
+ 10196 "10100101" // /* MW 12 */
+ 10197 "00000001" // /* MW 11 */
+ 10198 "00000000" // /* MW 10 */
+ 10199 "11010100" // /* MW 9 */
+ 10200 "00001001" // /* MW 8 */
+ 10201 "01011011" // /* MW 7 */
+ 10202 "00000001" // /* MW 6 */
+ 10203 "00100000" // /* MW 5 */
+ 10204 "00000000" // /* MW 4 */
+ 10205 "01110000" // /* MW 3 */
+ 10206 "00000101" // /* MW 2 */
+ 10207 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10209 "00000000" // /* MW 15 */
+ 10210 "00000000" // /* MW 14 */
+ 10211 "01111000" // /* MW 13 */
+ 10212 "10100101" // /* MW 12 */
+ 10213 "00000001" // /* MW 11 */
+ 10214 "00000000" // /* MW 10 */
+ 10215 "00000000" // /* MW 9 */
+ 10216 "00000000" // /* MW 8 */
+ 10217 "01011011" // /* MW 7 */
+ 10218 "00000001" // /* MW 6 */
+ 10219 "00100000" // /* MW 5 */
+ 10220 "00000000" // /* MW 4 */
+ 10221 "11110000" // /* MW 3 */
+ 10222 "00101100" // /* MW 2 */
+ 10223 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10225 "00010000" // /* MW 15 */
+ 10226 "00001000" // /* MW 14 */
+ 10227 "01111000" // /* MW 13 */
+ 10228 "10100101" // /* MW 12 */
+ 10229 "00000001" // /* MW 11 */
+ 10230 "00000000" // /* MW 10 */
+ 10231 "00000000" // /* MW 9 */
+ 10232 "00000000" // /* MW 8 */
+ 10233 "01011011" // /* MW 7 */
+ 10234 "00000001" // /* MW 6 */
+ 10235 "00100000" // /* MW 5 */
+ 10236 "00000000" // /* MW 4 */
+ 10237 "11110000" // /* MW 3 */
+ 10238 "00101100" // /* MW 2 */
+ 10239 "00000000" // /* MW 1 */
+.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10241 "00000000" // /* MW 15 */
+ 10242 "00000000" // /* MW 14 */
+ 10243 "01111000" // /* MW 13 */
+ 10244 "10100101" // /* MW 12 */
+ 10245 "00000001" // /* MW 11 */
+ 10246 "00000000" // /* MW 10 */
+ 10247 "00000000" // /* MW 9 */
+ 10248 "00000000" // /* MW 8 */
+ 10249 "01011011" // /* MW 7 */
+ 10250 "00000001" // /* MW 6 */
+ 10251 "11101000" // /* MW 5 */
+ 10252 "01010000" // /* MW 4 */
+ 10253 "01111110" // /* MW 3 */
+ 10254 "00000011" // /* MW 2 */
+ 10255 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10257 "00000000" // /* MW 15 */
+ 10258 "00000000" // /* MW 14 */
+ 10259 "01111000" // /* MW 13 */
+ 10260 "10100101" // /* MW 12 */
+ 10261 "00000001" // /* MW 11 */
+ 10262 "00000000" // /* MW 10 */
+ 10263 "00000000" // /* MW 9 */
+ 10264 "00000000" // /* MW 8 */
+ 10265 "10100011" // /* MW 7 */
+ 10266 "00011100" // /* MW 6 */
+ 10267 "00100010" // /* MW 5 */
+ 10268 "00000000" // /* MW 4 */
+ 10269 "01110000" // /* MW 3 */
+ 10270 "00000101" // /* MW 2 */
+ 10271 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10273 "00000000" // /* MW 15 */
+ 10274 "00000000" // /* MW 14 */
+ 10275 "01111000" // /* MW 13 */
+ 10276 "10100101" // /* MW 12 */
+ 10277 "00000001" // /* MW 11 */
+ 10278 "00000000" // /* MW 10 */
+ 10279 "00000000" // /* MW 9 */
+ 10280 "00000000" // /* MW 8 */
+ 10281 "01011011" // /* MW 7 */
+ 10282 "00000001" // /* MW 6 */
+ 10283 "00100000" // /* MW 5 */
+ 10284 "00000000" // /* MW 4 */
+ 10285 "11110000" // /* MW 3 */
+ 10286 "00101100" // /* MW 2 */
+ 10287 "00000000" // /* MW 1 */
+.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10289 "00010000" // /* MW 15 */
+ 10290 "00001000" // /* MW 14 */
+ 10291 "01111000" // /* MW 13 */
+ 10292 "10100101" // /* MW 12 */
+ 10293 "00000001" // /* MW 11 */
+ 10294 "00000000" // /* MW 10 */
+ 10295 "00000000" // /* MW 9 */
+ 10296 "00000000" // /* MW 8 */
+ 10297 "01011011" // /* MW 7 */
+ 10298 "00000001" // /* MW 6 */
+ 10299 "00100000" // /* MW 5 */
+ 10300 "00000000" // /* MW 4 */
+ 10301 "11110000" // /* MW 3 */
+ 10302 "00101100" // /* MW 2 */
+ 10303 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10305 "00000001" // /* MW 5 */
+ 10306 "00000000" // /* MW 4 */
+ 10307 "00000000" // /* MW 3 */
+ 10308 "11110000" // /* MW 2 */
+ 10309 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10311 "10100011" // /* MW 3 */
+ 10312 "00011100" // /* MW 2 */
+ 10313 "00001010" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10315 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10317 "00000001" // /* MW 3 */
+ 10318 "00000010" // /* MW 2 */
+ 10319 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10321 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10323 "00000000" // /* MW 3 */
+ 10324 "00101000" // /* MW 2 */
+ 10325 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10327 "10100011" // /* MW 3 */
+ 10328 "00011100" // /* MW 2 */
+ 10329 "00001010" // /* MW 1 */
+.delay_slot
+ 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10331 "10100000" // /* MW 3 */
+ 10332 "01100000" // /* MW 2 */
+ 10333 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10335 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.delay_slot
+ 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10337 "10100011" // /* MW 3 */
+ 10338 "00011100" // /* MW 2 */
+ 10339 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0
+ 10341 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 3 "elementwise_binary_shared.h" 237 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.function_start
+ 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10353 "01110010" // /* MW 9 */
+ 10354 "11110000" // /* MW 8 */
+ 10355 "01100000" // /* MW 7 */
+ 10356 "00000000" // /* MW 6 */
+ 10357 "10001011" // /* MW 5 */
+ 10358 "10001000" // /* MW 4 */
+ 10359 "10000011" // /* MW 3 */
+ 10360 "10000010" // /* MW 2 */
+ 10361 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19 first
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+ 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10363 "10000001" // /* MW 5 */
+ 10364 "11000101" // /* MW 4 */
+ 10365 "01010100" // /* MW 3 */
+ 10366 "00000001" // /* MW 2 */
+ 10367 "01000000" // /* MW 1 */
+ 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10369 "00000000" // /* MW 1 */
+ 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10371 "00000000" // /* MW 1 */
+ 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10373 "00000000" // /* MW 1 */
+ 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10375 "00000000" // /* MW 1 */
+ 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10377 "00000000" // /* MW 1 */
+ 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10379 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 244 12
+.src_ref 3 "elementwise_binary_shared.h" 244 35
+ 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */
+ 10381 "00000001" // /* MW 5 */
+ 10382 "00000000" // /* MW 4 */
+ 10383 "01101000" // /* MW 3 */
+ 10384 "00010100" // /* MW 2 */
+ 10385 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 237
+.delay_slot
+ 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10387 "00000001" // /* MW 5 */
+ 10388 "00000000" // /* MW 4 */
+ 10389 "00000000" // /* MW 3 */
+ 10390 "00001000" // /* MW 2 */
+ 10391 "00000000" // /* MW 1 */
+.delay_slot
+ 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10393 "11100000" // /* MW 3 */
+ 10394 "01010101" // /* MW 2 */
+ 10395 "00011000" // /* MW 1 */
+.delay_slot
+ 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10397 "11100000" // /* MW 3 */
+ 10398 "01100000" // /* MW 2 */
+ 10399 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+.delay_slot
+ 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10401 "00101011" // /* MW 3 */
+ 10402 "00000111" // /* MW 2 */
+ 10403 "00001001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10405 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 247 12 first
+.no_stack_arguments
+ 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10407 "00000001" // /* MW 5 */
+ 10408 "00000000" // /* MW 4 */
+ 10409 "00101000" // /* MW 3 */
+ 10410 "00010011" // /* MW 2 */
+ 10411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10421 "10000001" // /* MW 11 */
+ 10422 "10101101" // /* MW 10 */
+ 10423 "00000000" // /* MW 9 */
+ 10424 "00000000" // /* MW 8 */
+ 10425 "00000000" // /* MW 7 */
+ 10426 "00000000" // /* MW 6 */
+ 10427 "00100000" // /* MW 5 */
+ 10428 "00000000" // /* MW 4 */
+ 10429 "11110000" // /* MW 3 */
+ 10430 "00101100" // /* MW 2 */
+ 10431 "00000000" // /* MW 1 */
+.return_address
+ 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 10433 "00000000" // /* MW 5 */
+ 10434 "00000000" // /* MW 4 */
+ 10435 "01111000" // /* MW 3 */
+ 10436 "00010100" // /* MW 2 */
+ 10437 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10447 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96
+.src_ref 3 "elementwise_binary_shared.h" 245 12 first
+.no_stack_arguments
+ 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10449 "00000001" // /* MW 5 */
+ 10450 "00000000" // /* MW 4 */
+ 10451 "00101000" // /* MW 3 */
+ 10452 "00010011" // /* MW 2 */
+ 10453 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.delay_slot
+ 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10455 "01110000" // /* MW 7 */
+ 10456 "01100000" // /* MW 6 */
+ 10457 "10110000" // /* MW 5 */
+ 10458 "00000000" // /* MW 4 */
+ 10459 "01100000" // /* MW 3 */
+ 10460 "10010001" // /* MW 2 */
+ 10461 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10463 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10469 "10000001" // /* MW 11 */
+ 10470 "10101101" // /* MW 10 */
+ 10471 "00000000" // /* MW 9 */
+ 10472 "00000000" // /* MW 8 */
+ 10473 "00000000" // /* MW 7 */
+ 10474 "00000000" // /* MW 6 */
+ 10475 "00100000" // /* MW 5 */
+ 10476 "00000000" // /* MW 4 */
+ 10477 "11110000" // /* MW 3 */
+ 10478 "00101100" // /* MW 2 */
+ 10479 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.return_address
+ 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10481 "10000000" // /* MW 3 */
+ 10482 "01110001" // /* MW 2 */
+ 10483 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4 first
+ 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10485 "00000000" // /* MW 3 */
+ 10486 "00101000" // /* MW 2 */
+ 10487 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.delay_slot
+ 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10489 "00000001" // /* MW 5 */
+ 10490 "00000000" // /* MW 4 */
+ 10491 "00000000" // /* MW 3 */
+ 10492 "11111000" // /* MW 2 */
+ 10493 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10495 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0
+ 10501 "00000000" // /* MW 1 */
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 205 first
+.src_ref 7 "superkernels.cpp" 210 6
+.function_start
+ 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10513 "10000000" // /* MW 5 */
+ 10514 "11001000" // /* MW 4 */
+ 10515 "11000110" // /* MW 3 */
+ 10516 "00000111" // /* MW 2 */
+ 10517 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6 first
+ 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10519 "11000001" // /* MW 5 */
+ 10520 "10110101" // /* MW 4 */
+ 10521 "11011000" // /* MW 3 */
+ 10522 "11000010" // /* MW 2 */
+ 10523 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 205
+ 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10525 "00000001" // /* MW 5 */
+ 10526 "00000000" // /* MW 4 */
+ 10527 "00000000" // /* MW 3 */
+ 10528 "00001000" // /* MW 2 */
+ 10529 "00000000" // /* MW 1 */
+ 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10531 "01110000" // /* MW 7 */
+ 10532 "11010000" // /* MW 6 */
+ 10533 "00001011" // /* MW 5 */
+ 10534 "00000000" // /* MW 4 */
+ 10535 "10110000" // /* MW 3 */
+ 10536 "01100011" // /* MW 2 */
+ 10537 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+ 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10539 "00010001" // /* MW 9 */
+ 10540 "00101000" // /* MW 8 */
+ 10541 "00110010" // /* MW 7 */
+ 10542 "11110011" // /* MW 6 */
+ 10543 "00000001" // /* MW 5 */
+ 10544 "00000000" // /* MW 4 */
+ 10545 "10110000" // /* MW 3 */
+ 10546 "10000010" // /* MW 2 */
+ 10547 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10549 "11000000" // /* MW 3 */
+ 10550 "11010100" // /* MW 2 */
+ 10551 "00011011" // /* MW 1 */
+ 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10553 "00000000" // /* MW 1 */
+ 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6
+.src_ref 7 "superkernels.cpp" 210 16
+ 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */
+ 10557 "00000001" // /* MW 5 */
+ 10558 "01000000" // /* MW 4 */
+ 10559 "11110000" // /* MW 3 */
+ 10560 "00010100" // /* MW 2 */
+ 10561 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 22 first
+.delay_slot
+ 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10563 "10010000" // /* MW 3 */
+ 10564 "01100010" // /* MW 2 */
+ 10565 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 30
+.delay_slot
+ 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10567 "11111011" // /* MW 3 */
+ 10568 "01100011" // /* MW 2 */
+ 10569 "00010100" // /* MW 1 */
+.delay_slot
+ 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10571 "00111101" // /* MW 3 */
+ 10572 "11110100" // /* MW 2 */
+ 10573 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10575 "01110000" // /* MW 7 */
+ 10576 "01100000" // /* MW 6 */
+ 10577 "00110000" // /* MW 5 */
+ 10578 "00000011" // /* MW 4 */
+ 10579 "00110000" // /* MW 3 */
+ 10580 "11000110" // /* MW 2 */
+ 10581 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4
+.src_ref 7 "superkernels.cpp" 224 2
+.delay_slot
+ 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10583 "00000000" // /* MW 5 */
+ 10584 "11001010" // /* MW 4 */
+ 10585 "11000000" // /* MW 3 */
+ 10586 "00000111" // /* MW 2 */
+ 10587 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10589 "11010000" // /* MW 5 */
+ 10590 "11001000" // /* MW 4 */
+ 10591 "11000100" // /* MW 3 */
+ 10592 "00000111" // /* MW 2 */
+ 10593 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10595 "00010000" // /* MW 9 */
+ 10596 "00110010" // /* MW 8 */
+ 10597 "00110010" // /* MW 7 */
+ 10598 "11110001" // /* MW 6 */
+ 10599 "00000001" // /* MW 5 */
+ 10600 "00000000" // /* MW 4 */
+ 10601 "11100000" // /* MW 3 */
+ 10602 "11000000" // /* MW 2 */
+ 10603 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10605 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */
+ 10607 "00000001" // /* MW 5 */
+ 10608 "00000000" // /* MW 4 */
+ 10609 "00000000" // /* MW 3 */
+ 10610 "00010011" // /* MW 2 */
+ 10611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10615 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10617 "00110001" // /* MW 3 */
+ 10618 "00100000" // /* MW 2 */
+ 10619 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10621 "00000101" // /* MW 3 */
+ 10622 "00100000" // /* MW 2 */
+ 10623 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10625 "00000000" // /* MW 15 */
+ 10626 "00000000" // /* MW 14 */
+ 10627 "01111000" // /* MW 13 */
+ 10628 "10100101" // /* MW 12 */
+ 10629 "00000001" // /* MW 11 */
+ 10630 "00000000" // /* MW 10 */
+ 10631 "00000000" // /* MW 9 */
+ 10632 "10000000" // /* MW 8 */
+ 10633 "00010001" // /* MW 7 */
+ 10634 "00000110" // /* MW 6 */
+ 10635 "00100010" // /* MW 5 */
+ 10636 "00000000" // /* MW 4 */
+ 10637 "11110000" // /* MW 3 */
+ 10638 "00101100" // /* MW 2 */
+ 10639 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18
+.return_address
+ 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10641 "10100000" // /* MW 5 */
+ 10642 "11001000" // /* MW 4 */
+ 10643 "11000100" // /* MW 3 */
+ 10644 "00000111" // /* MW 2 */
+ 10645 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18 first
+.src_ref 7 "superkernels.cpp" 217 65
+ 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10647 "00010000" // /* MW 9 */
+ 10648 "10000000" // /* MW 8 */
+ 10649 "00110010" // /* MW 7 */
+ 10650 "11110001" // /* MW 6 */
+ 10651 "00000001" // /* MW 5 */
+ 10652 "00000000" // /* MW 4 */
+ 10653 "11010000" // /* MW 3 */
+ 10654 "11000010" // /* MW 2 */
+ 10655 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51
+.src_ref 7 "superkernels.cpp" 217 65
+.src_ref 7 "superkernels.cpp" 224 2
+ 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10657 "00010000" // /* MW 9 */
+ 10658 "10000000" // /* MW 8 */
+ 10659 "00110010" // /* MW 7 */
+ 10660 "11110001" // /* MW 6 */
+ 10661 "00000001" // /* MW 5 */
+ 10662 "00000000" // /* MW 4 */
+ 10663 "11010000" // /* MW 3 */
+ 10664 "11000110" // /* MW 2 */
+ 10665 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51 first
+.src_ref 7 "superkernels.cpp" 217 16
+.src_ref 7 "superkernels.cpp" 222 47
+ 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10667 "00010000" // /* MW 9 */
+ 10668 "00101010" // /* MW 8 */
+ 10669 "10110010" // /* MW 7 */
+ 10670 "11110000" // /* MW 6 */
+ 10671 "00000001" // /* MW 5 */
+ 10672 "00000000" // /* MW 4 */
+ 10673 "01010000" // /* MW 3 */
+ 10674 "11001011" // /* MW 2 */
+ 10675 "01001010" // /* MW 1 */
+ 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10677 "00000000" // /* MW 1 */
+ 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10679 "00000000" // /* MW 1 */
+ 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */
+ 10681 "00000000" // /* MW 5 */
+ 10682 "00000000" // /* MW 4 */
+ 10683 "11111000" // /* MW 3 */
+ 10684 "00010100" // /* MW 2 */
+ 10685 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13
+.delay_slot
+ 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10687 "11000000" // /* MW 5 */
+ 10688 "11001000" // /* MW 4 */
+ 10689 "11000000" // /* MW 3 */
+ 10690 "00000111" // /* MW 2 */
+ 10691 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10693 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 27 first
+.delay_slot
+ 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10695 "00001111" // /* MW 3 */
+ 10696 "01100001" // /* MW 2 */
+ 10697 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13 first
+.delay_slot
+ 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10699 "10100011" // /* MW 5 */
+ 10700 "00001100" // /* MW 4 */
+ 10701 "11110000" // /* MW 3 */
+ 10702 "00101100" // /* MW 2 */
+ 10703 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 16 first
+.delay_slot
+ 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10705 "00000000" // /* MW 15 */
+ 10706 "00000000" // /* MW 14 */
+ 10707 "01111000" // /* MW 13 */
+ 10708 "10100101" // /* MW 12 */
+ 10709 "00000001" // /* MW 11 */
+ 10710 "00000000" // /* MW 10 */
+ 10711 "00000000" // /* MW 9 */
+ 10712 "10000000" // /* MW 8 */
+ 10713 "00010001" // /* MW 7 */
+ 10714 "00000110" // /* MW 6 */
+ 10715 "00100001" // /* MW 5 */
+ 10716 "00000000" // /* MW 4 */
+ 10717 "11110000" // /* MW 3 */
+ 10718 "00101100" // /* MW 2 */
+ 10719 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 222 47
+.src_ref 7 "superkernels.cpp" 224 2
+ 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10721 "00000000" // /* MW 15 */
+ 10722 "00000000" // /* MW 14 */
+ 10723 "00010000" // /* MW 13 */
+ 10724 "00101010" // /* MW 12 */
+ 10725 "10110010" // /* MW 11 */
+ 10726 "11110000" // /* MW 10 */
+ 10727 "00000001" // /* MW 9 */
+ 10728 "00000000" // /* MW 8 */
+ 10729 "10001011" // /* MW 7 */
+ 10730 "10000000" // /* MW 6 */
+ 10731 "00100010" // /* MW 5 */
+ 10732 "00000000" // /* MW 4 */
+ 10733 "11110000" // /* MW 3 */
+ 10734 "00101100" // /* MW 2 */
+ 10735 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10737 "00000000" // /* MW 7 */
+ 10738 "11000011" // /* MW 6 */
+ 10739 "10110011" // /* MW 5 */
+ 10740 "00000011" // /* MW 4 */
+ 10741 "01100000" // /* MW 3 */
+ 10742 "10010001" // /* MW 2 */
+ 10743 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10745 "00010000" // /* MW 9 */
+ 10746 "00100000" // /* MW 8 */
+ 10747 "00110010" // /* MW 7 */
+ 10748 "11110000" // /* MW 6 */
+ 10749 "00000001" // /* MW 5 */
+ 10750 "00000000" // /* MW 4 */
+ 10751 "11010000" // /* MW 3 */
+ 10752 "11101110" // /* MW 2 */
+ 10753 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10755 "00010110" // /* MW 3 */
+ 10756 "11111110" // /* MW 2 */
+ 10757 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10759 "00110110" // /* MW 3 */
+ 10760 "11111110" // /* MW 2 */
+ 10761 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10763 "01010110" // /* MW 3 */
+ 10764 "01000110" // /* MW 2 */
+ 10765 "00000111" // /* MW 1 */
+ 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10767 "00000000" // /* MW 1 */
+ 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10769 "00000000" // /* MW 1 */
+ 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10771 "00000000" // /* MW 1 */
+ 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10773 "00000000" // /* MW 1 */
+ 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10775 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10777 "00000010" // /* MW 3 */
+ 10778 "01100001" // /* MW 2 */
+ 10779 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10781 "00010001" // /* MW 3 */
+ 10782 "00000110" // /* MW 2 */
+ 10783 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10785 "11111101" // /* MW 3 */
+ 10786 "11100000" // /* MW 2 */
+ 10787 "00010111" // /* MW 1 */
+ 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10789 "00000000" // /* MW 1 */
+ 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10791 "00000000" // /* MW 1 */
+ 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10793 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10795 "00001000" // /* MW 3 */
+ 10796 "10010011" // /* MW 2 */
+ 10797 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+ 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10799 "10000001" // /* MW 5 */
+ 10800 "10101101" // /* MW 4 */
+ 10801 "10100111" // /* MW 3 */
+ 10802 "00000000" // /* MW 2 */
+ 10803 "00000100" // /* MW 1 */
+ 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10805 "00000000" // /* MW 1 */
+ 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10807 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+ 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10809 "00110110" // /* MW 3 */
+ 10810 "00000110" // /* MW 2 */
+ 10811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10813 "10000001" // /* MW 5 */
+ 10814 "11011101" // /* MW 4 */
+ 10815 "11011100" // /* MW 3 */
+ 10816 "11001010" // /* MW 2 */
+ 10817 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 47 first
+ 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10819 "01110110" // /* MW 3 */
+ 10820 "00000110" // /* MW 2 */
+ 10821 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10823 "10011110" // /* MW 3 */
+ 10824 "01011100" // /* MW 2 */
+ 10825 "00000111" // /* MW 1 */
+ 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10827 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 224 2 first
+.no_stack_arguments
+ 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */
+ 10829 "00000001" // /* MW 5 */
+ 10830 "00000000" // /* MW 4 */
+ 10831 "00111000" // /* MW 3 */
+ 10832 "00010100" // /* MW 2 */
+ 10833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10835 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+.delay_slot
+ 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10837 "00000111" // /* MW 3 */
+ 10838 "01100010" // /* MW 2 */
+ 10839 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.delay_slot
+ 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10841 "00110001" // /* MW 3 */
+ 10842 "00000110" // /* MW 2 */
+ 10843 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45 first
+.delay_slot
+ 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10845 "00001101" // /* MW 3 */
+ 10846 "11100001" // /* MW 2 */
+ 10847 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+.delay_slot
+ 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10849 "00000000" // /* MW 15 */
+ 10850 "00000000" // /* MW 14 */
+ 10851 "10101000" // /* MW 13 */
+ 10852 "10100000" // /* MW 12 */
+ 10853 "00110100" // /* MW 11 */
+ 10854 "00000000" // /* MW 10 */
+ 10855 "00000000" // /* MW 9 */
+ 10856 "00000000" // /* MW 8 */
+ 10857 "01011011" // /* MW 7 */
+ 10858 "00000001" // /* MW 6 */
+ 10859 "00100000" // /* MW 5 */
+ 10860 "00000000" // /* MW 4 */
+ 10861 "11110000" // /* MW 3 */
+ 10862 "00101100" // /* MW 2 */
+ 10863 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+.src_ref 7 "superkernels.cpp" 227 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10865 "00010000" // /* MW 9 */
+ 10866 "00100000" // /* MW 8 */
+ 10867 "00110010" // /* MW 7 */
+ 10868 "11110011" // /* MW 6 */
+ 10869 "00000001" // /* MW 5 */
+ 10870 "00000000" // /* MW 4 */
+ 10871 "11010000" // /* MW 3 */
+ 10872 "11000110" // /* MW 2 */
+ 10873 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10875 "00000101" // /* MW 3 */
+ 10876 "00100000" // /* MW 2 */
+ 10877 "00010000" // /* MW 1 */
+ 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10879 "00000000" // /* MW 1 */
+ 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10881 "00000000" // /* MW 1 */
+ 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10883 "00000000" // /* MW 1 */
+ 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10885 "00000000" // /* MW 1 */
+ 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10887 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10889 "00001000" // /* MW 3 */
+ 10890 "01010001" // /* MW 2 */
+ 10891 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10893 "00010000" // /* MW 9 */
+ 10894 "00110000" // /* MW 8 */
+ 10895 "00110010" // /* MW 7 */
+ 10896 "11110001" // /* MW 6 */
+ 10897 "00000001" // /* MW 5 */
+ 10898 "00000000" // /* MW 4 */
+ 10899 "11010000" // /* MW 3 */
+ 10900 "11001110" // /* MW 2 */
+ 10901 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6 first
+ 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10903 "00110110" // /* MW 3 */
+ 10904 "00000110" // /* MW 2 */
+ 10905 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+ 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10907 "01010110" // /* MW 3 */
+ 10908 "00000110" // /* MW 2 */
+ 10909 "00000010" // /* MW 1 */
+ 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10911 "00000000" // /* MW 1 */
+ 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10913 "00000000" // /* MW 1 */
+ 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10915 "00000000" // /* MW 1 */
+ 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10917 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10919 "00110001" // /* MW 3 */
+ 10920 "00100001" // /* MW 2 */
+ 10921 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10923 "00010001" // /* MW 3 */
+ 10924 "11100110" // /* MW 2 */
+ 10925 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 16 first
+ 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10927 "00101000" // /* MW 3 */
+ 10928 "01100001" // /* MW 2 */
+ 10929 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+ 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */
+ 10931 "00000001" // /* MW 5 */
+ 10932 "01000000" // /* MW 4 */
+ 10933 "01101000" // /* MW 3 */
+ 10934 "00010101" // /* MW 2 */
+ 10935 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14
+ 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10947 "00000001" // /* MW 3 */
+ 10948 "00100000" // /* MW 2 */
+ 10949 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14 first
+ 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10951 "00000000" // /* MW 9 */
+ 10952 "00000000" // /* MW 8 */
+ 10953 "00000000" // /* MW 7 */
+ 10954 "10000000" // /* MW 6 */
+ 10955 "00010001" // /* MW 5 */
+ 10956 "00000110" // /* MW 4 */
+ 10957 "11110110" // /* MW 3 */
+ 10958 "00101100" // /* MW 2 */
+ 10959 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 229
+ 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10961 "00111001" // /* MW 3 */
+ 10962 "11110100" // /* MW 2 */
+ 10963 "00000111" // /* MW 1 */
+ 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10965 "00011001" // /* MW 3 */
+ 10966 "11111011" // /* MW 2 */
+ 10967 "00000111" // /* MW 1 */
+ 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10969 "00000000" // /* MW 1 */
+ 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10971 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10973 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10975 "11110001" // /* MW 3 */
+ 10976 "11111101" // /* MW 2 */
+ 10977 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10979 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10981 "00000000" // /* MW 3 */
+ 10982 "00101000" // /* MW 2 */
+ 10983 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10985 "10100000" // /* MW 3 */
+ 10986 "01100111" // /* MW 2 */
+ 10987 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229
+.delay_slot
+ 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10989 "00000001" // /* MW 5 */
+ 10990 "00000000" // /* MW 4 */
+ 10991 "00000000" // /* MW 3 */
+ 10992 "11111000" // /* MW 2 */
+ 10993 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10995 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10997 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 10999 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 205 first
+.src_ref 3 "elementwise_binary_shared.h" 211 24 first
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.function_start
+ 11008 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11009 "01011000" // /* MW 9 */
+ 11010 "00000000" // /* MW 8 */
+ 11011 "00001000" // /* MW 7 */
+ 11012 "00001011" // /* MW 6 */
+ 11013 "00100000" // /* MW 5 */
+ 11014 "00001000" // /* MW 4 */
+ 11015 "11010000" // /* MW 3 */
+ 11016 "10000101" // /* MW 2 */
+ 11017 "00100011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+ 11018 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11019 "00000001" // /* MW 3 */
+ 11020 "10000000" // /* MW 2 */
+ 11021 "00010111" // /* MW 1 */
+ 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11023 "00000000" // /* MW 1 */
+ 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11025 "00000000" // /* MW 1 */
+ 11026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11027 "00000000" // /* MW 1 */
+ 11028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11029 "00000000" // /* MW 1 */
+ 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11031 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 211 22 first
+ 11032 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11033 "00101001" // /* MW 3 */
+ 11034 "00011100" // /* MW 2 */
+ 11035 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 24 first
+ 11036 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11037 "00101110" // /* MW 3 */
+ 11038 "00011100" // /* MW 2 */
+ 11039 "00000001" // /* MW 1 */
+ 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11041 "00000000" // /* MW 1 */
+ 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11043 "00000000" // /* MW 1 */
+ 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11045 "00000000" // /* MW 1 */
+ 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11047 "00000000" // /* MW 1 */
+ 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11049 "00000000" // /* MW 1 */
+ 11050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11051 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 22
+ 11052 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11053 "00101001" // /* MW 3 */
+ 11054 "00011100" // /* MW 2 */
+ 11055 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 24 first
+ 11056 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11057 "00101110" // /* MW 3 */
+ 11058 "00000100" // /* MW 2 */
+ 11059 "00000001" // /* MW 1 */
+ 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11061 "00000000" // /* MW 1 */
+ 11062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11063 "00000000" // /* MW 1 */
+ 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11065 "00000000" // /* MW 1 */
+ 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11067 "00000000" // /* MW 1 */
+ 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11069 "00000000" // /* MW 1 */
+ 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11071 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 22
+ 11072 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11073 "00101001" // /* MW 3 */
+ 11074 "00011100" // /* MW 2 */
+ 11075 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 24 first
+ 11076 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11077 "01110110" // /* MW 3 */
+ 11078 "00010100" // /* MW 2 */
+ 11079 "00000001" // /* MW 1 */
+ 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11081 "00000000" // /* MW 1 */
+ 11082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11083 "00000000" // /* MW 1 */
+ 11084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11085 "00000000" // /* MW 1 */
+ 11086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11087 "00000000" // /* MW 1 */
+ 11088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11089 "00000000" // /* MW 1 */
+ 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11091 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 22
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11092 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11093 "01110001" // /* MW 3 */
+ 11094 "01001100" // /* MW 2 */
+ 11095 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 34 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11096 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11097 "00010111" // /* MW 3 */
+ 11098 "00000100" // /* MW 2 */
+ 11099 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 217 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11100 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11101 "00000000" // /* MW 3 */
+ 11102 "00101000" // /* MW 2 */
+ 11103 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11104 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11105 "00000000" // /* MW 5 */
+ 11106 "10111110" // /* MW 4 */
+ 11107 "11110000" // /* MW 3 */
+ 11108 "00000000" // /* MW 2 */
+ 11109 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11110 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11111 "00010100" // /* MW 3 */
+ 11112 "11000010" // /* MW 2 */
+ 11113 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11114 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11115 "00100111" // /* MW 3 */
+ 11116 "01110110" // /* MW 2 */
+ 11117 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11118 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11119 "10000010" // /* MW 3 */
+ 11120 "00000001" // /* MW 2 */
+ 11121 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 11123 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 219
+.src_ref 3 "elementwise_binary_shared.h" 219 first
+.function_start
+ 11136 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11137 "00000001" // /* MW 5 */
+ 11138 "00000000" // /* MW 4 */
+ 11139 "00000000" // /* MW 3 */
+ 11140 "00001000" // /* MW 2 */
+ 11141 "00000000" // /* MW 1 */
+ 11142 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11143 "00111101" // /* MW 3 */
+ 11144 "11111000" // /* MW 2 */
+ 11145 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8 first
+.no_stack_arguments
+ 11146 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */
+ 11147 "00000001" // /* MW 5 */
+ 11148 "00000000" // /* MW 4 */
+ 11149 "10000000" // /* MW 3 */
+ 11150 "00010101" // /* MW 2 */
+ 11151 "00000000" // /* MW 1 */
+.delay_slot
+ 11152 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11153 "10100000" // /* MW 3 */
+ 11154 "00010111" // /* MW 2 */
+ 11155 "00011000" // /* MW 1 */
+.delay_slot
+ 11156 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11157 "00010101" // /* MW 3 */
+ 11158 "11111100" // /* MW 2 */
+ 11159 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8
+.delay_slot
+ 11160 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11161 "11000000" // /* MW 3 */
+ 11162 "11010000" // /* MW 2 */
+ 11163 "00011011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11165 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11167 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.src_ref 8 "add_impl.h" 146 29
+.return_address
+ 11168 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11169 "00001000" // /* MW 9 */
+ 11170 "11000100" // /* MW 8 */
+ 11171 "00110011" // /* MW 7 */
+ 11172 "01101000" // /* MW 6 */
+ 11173 "00000000" // /* MW 5 */
+ 11174 "00000001" // /* MW 4 */
+ 11175 "00100000" // /* MW 3 */
+ 11176 "00000111" // /* MW 2 */
+ 11177 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29
+.src_ref 8 "add_impl.h" 147 37
+.src_ref 8 "add_impl.h" 147 39
+ 11178 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11179 "01011000" // /* MW 9 */
+ 11180 "11111101" // /* MW 8 */
+ 11181 "00000111" // /* MW 7 */
+ 11182 "00001000" // /* MW 6 */
+ 11183 "10000000" // /* MW 5 */
+ 11184 "00000001" // /* MW 4 */
+ 11185 "10000000" // /* MW 3 */
+ 11186 "11100010" // /* MW 2 */
+ 11187 "00000001" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29 first
+.src_ref 8 "add_impl.h" 147 39
+ 11188 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11189 "00000001" // /* MW 9 */
+ 11190 "10100000" // /* MW 8 */
+ 11191 "00000111" // /* MW 7 */
+ 11192 "10000000" // /* MW 6 */
+ 11193 "00010001" // /* MW 5 */
+ 11194 "00001010" // /* MW 4 */
+ 11195 "00100000" // /* MW 3 */
+ 11196 "10111110" // /* MW 2 */
+ 11197 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 50 first
+ 11198 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11199 "01001010" // /* MW 3 */
+ 11200 "00000110" // /* MW 2 */
+ 11201 "00000000" // /* MW 1 */
+ 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11203 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11205 "00000000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 37
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11206 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11207 "00010111" // /* MW 3 */
+ 11208 "00000010" // /* MW 2 */
+ 11209 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11210 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11211 "00000000" // /* MW 3 */
+ 11212 "00101000" // /* MW 2 */
+ 11213 "00010000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11214 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11215 "00000101" // /* MW 3 */
+ 11216 "00100010" // /* MW 2 */
+ 11217 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11218 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11219 "00000001" // /* MW 5 */
+ 11220 "00000000" // /* MW 4 */
+ 11221 "00000000" // /* MW 3 */
+ 11222 "11111000" // /* MW 2 */
+ 11223 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11224 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11225 "00100111" // /* MW 3 */
+ 11226 "01110111" // /* MW 2 */
+ 11227 "00010100" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 39
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11228 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11229 "10000010" // /* MW 3 */
+ 11230 "00100001" // /* MW 2 */
+ 11231 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 11233 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_shared.h" 227 first
+.src_ref 3 "elementwise_binary_shared.h" 232 8 first
+.tail_call
+.function_start
+ 11248 "10000100" // J #9808 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 11249 "00000000" // /* MW 5 */
+ 11250 "00000000" // /* MW 4 */
+ 11251 "00101000" // /* MW 3 */
+ 11252 "00010011" // /* MW 2 */
+ 11253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11255 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11257 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11259 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11261 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 11263 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 11264 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11265 "00000001" // /* MW 5 */
+ 11266 "00100001" // /* MW 4 */
+ 11267 "00000000" // /* MW 3 */
+ 11268 "00000000" // /* MW 2 */
+ 11269 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11270 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11271 "11000000" // /* MW 3 */
+ 11272 "01010000" // /* MW 2 */
+ 11273 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11274 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11275 "10010000" // /* MW 3 */
+ 11276 "01100000" // /* MW 2 */
+ 11277 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 11278 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11279 "00010001" // /* MW 3 */
+ 11280 "00000100" // /* MW 2 */
+ 11281 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 11282 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11283 "00010001" // /* MW 3 */
+ 11284 "00010100" // /* MW 2 */
+ 11285 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0
+ 11287 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 11296 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11297 "00101110" // /* MW 3 */
+ 11298 "00011100" // /* MW 2 */
+ 11299 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 11300 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11301 "00000001" // /* MW 5 */
+ 11302 "00000000" // /* MW 4 */
+ 11303 "00000000" // /* MW 3 */
+ 11304 "00001000" // /* MW 2 */
+ 11305 "00000000" // /* MW 1 */
+ 11306 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11307 "00111101" // /* MW 3 */
+ 11308 "11111100" // /* MW 2 */
+ 11309 "00001111" // /* MW 1 */
+ 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11311 "00000000" // /* MW 1 */
+ 11312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11313 "00000000" // /* MW 1 */
+ 11314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11315 "00000000" // /* MW 1 */
+ 11316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11317 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 11318 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11319 "00101001" // /* MW 3 */
+ 11320 "00011100" // /* MW 2 */
+ 11321 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 11322 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11323 "00101110" // /* MW 3 */
+ 11324 "00011100" // /* MW 2 */
+ 11325 "00000001" // /* MW 1 */
+ 11326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11327 "00000000" // /* MW 1 */
+ 11328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11329 "00000000" // /* MW 1 */
+ 11330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11331 "00000000" // /* MW 1 */
+ 11332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11333 "00000000" // /* MW 1 */
+ 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11335 "00000000" // /* MW 1 */
+ 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11337 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 11338 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11339 "00101001" // /* MW 3 */
+ 11340 "00011100" // /* MW 2 */
+ 11341 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 11342 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11343 "00101110" // /* MW 3 */
+ 11344 "00000100" // /* MW 2 */
+ 11345 "00000001" // /* MW 1 */
+ 11346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11347 "00000000" // /* MW 1 */
+ 11348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11349 "00000000" // /* MW 1 */
+ 11350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11351 "00000000" // /* MW 1 */
+ 11352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11353 "00000000" // /* MW 1 */
+ 11354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11355 "00000000" // /* MW 1 */
+ 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11357 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 11358 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11359 "00101001" // /* MW 3 */
+ 11360 "00011100" // /* MW 2 */
+ 11361 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 11362 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11363 "00101110" // /* MW 3 */
+ 11364 "00010100" // /* MW 2 */
+ 11365 "00000001" // /* MW 1 */
+ 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11367 "00000000" // /* MW 1 */
+ 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11369 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 11370 "00000100" // JL #11264 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11264 delay_slots=5 */
+ 11371 "00000001" // /* MW 5 */
+ 11372 "00000000" // /* MW 4 */
+ 11373 "00000000" // /* MW 3 */
+ 11374 "00010110" // /* MW 2 */
+ 11375 "00000000" // /* MW 1 */
+.delay_slot
+ 11376 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11377 "10011101" // /* MW 3 */
+ 11378 "11111011" // /* MW 2 */
+ 11379 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11383 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 11384 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11385 "00101001" // /* MW 3 */
+ 11386 "11011100" // /* MW 2 */
+ 11387 "00001000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+ 11388 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11389 "11000000" // /* MW 3 */
+ 11390 "01100000" // /* MW 2 */
+ 11391 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.return_address
+ 11392 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11393 "00111001" // /* MW 3 */
+ 11394 "11111100" // /* MW 2 */
+ 11395 "00000111" // /* MW 1 */
+ 11396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11397 "00000000" // /* MW 1 */
+ 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11399 "00000000" // /* MW 1 */
+ 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11401 "00000000" // /* MW 1 */
+ 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11403 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11405 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11406 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11407 "10011001" // /* MW 3 */
+ 11408 "11111011" // /* MW 2 */
+ 11409 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11410 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11411 "00000000" // /* MW 3 */
+ 11412 "00101000" // /* MW 2 */
+ 11413 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11419 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11420 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11421 "00000001" // /* MW 3 */
+ 11422 "00100000" // /* MW 2 */
+ 11423 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "mul_impl.h" 134 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11424 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11425 "01110001" // /* MW 9 */
+ 11426 "00000000" // /* MW 8 */
+ 11427 "00000000" // /* MW 7 */
+ 11428 "00000000" // /* MW 6 */
+ 11429 "11111110" // /* MW 5 */
+ 11430 "00111111" // /* MW 4 */
+ 11431 "00110000" // /* MW 3 */
+ 11432 "11000010" // /* MW 2 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 11433 "11101000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 149 first
+.src_ref 3 "elementwise_binary.h" 156 37
+.src_ref 3 "elementwise_binary.h" 168 8 first
+.function_start
+ 11440 "10111010" // MOVA m0, #32; MOVXM ls, #11616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11441 "00010000" // /* MW 9 */
+ 11442 "10110000" // /* MW 8 */
+ 11443 "01111110" // /* MW 7 */
+ 11444 "00001000" // /* MW 6 */
+ 11445 "00000000" // /* MW 5 */
+ 11446 "00000000" // /* MW 4 */
+ 11447 "10000000" // /* MW 3 */
+ 11448 "00000000" // /* MW 2 */
+ 11449 "00000100" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 37 first
+.src_ref 3 "elementwise_binary.h" 168 8 first
+ 11450 "10111010" // LDA r3, [p3], m0; MOVXM le, #11632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11451 "00010000" // /* MW 9 */
+ 11452 "10111000" // /* MW 8 */
+ 11453 "10111110" // /* MW 7 */
+ 11454 "00001001" // /* MW 6 */
+ 11455 "00000000" // /* MW 5 */
+ 11456 "00000000" // /* MW 4 */
+ 11457 "11010000" // /* MW 3 */
+ 11458 "00001110" // /* MW 2 */
+ 11459 "01100001" // /* MW 1 */
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11460 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11461 "01011000" // /* MW 9 */
+ 11462 "00111100" // /* MW 8 */
+ 11463 "00001011" // /* MW 7 */
+ 11464 "01001000" // /* MW 6 */
+ 11465 "00010111" // /* MW 5 */
+ 11466 "00111110" // /* MW 4 */
+ 11467 "11010000" // /* MW 3 */
+ 11468 "10010000" // /* MW 2 */
+ 11469 "01100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11470 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11471 "00010000" // /* MW 9 */
+ 11472 "00110100" // /* MW 8 */
+ 11473 "00110010" // /* MW 7 */
+ 11474 "11110010" // /* MW 6 */
+ 11475 "00000001" // /* MW 5 */
+ 11476 "00000000" // /* MW 4 */
+ 11477 "11010000" // /* MW 3 */
+ 11478 "10000000" // /* MW 2 */
+ 11479 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11480 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11481 "01000010" // /* MW 3 */
+ 11482 "00000100" // /* MW 2 */
+ 11483 "00000100" // /* MW 1 */
+ 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11485 "00000000" // /* MW 1 */
+ 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11487 "00000000" // /* MW 1 */
+ 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11489 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11490 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11491 "00011101" // /* MW 3 */
+ 11492 "11000010" // /* MW 2 */
+ 11493 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 168 8
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 11494 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11495 "11111001" // /* MW 5 */
+ 11496 "11100001" // /* MW 4 */
+ 11497 "10001010" // /* MW 3 */
+ 11498 "00001110" // /* MW 2 */
+ 11499 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11500 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11501 "01101000" // /* MW 5 */
+ 11502 "01010000" // /* MW 4 */
+ 11503 "01110000" // /* MW 3 */
+ 11504 "00010011" // /* MW 2 */
+ 11505 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11506 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11507 "10000000" // /* MW 7 */
+ 11508 "10111010" // /* MW 6 */
+ 11509 "11101000" // /* MW 5 */
+ 11510 "01010000" // /* MW 4 */
+ 11511 "01110000" // /* MW 3 */
+ 11512 "00011011" // /* MW 2 */
+ 11513 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11514 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11515 "01101000" // /* MW 5 */
+ 11516 "01010000" // /* MW 4 */
+ 11517 "01110000" // /* MW 3 */
+ 11518 "00010011" // /* MW 2 */
+ 11519 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11520 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11521 "11101000" // /* MW 5 */
+ 11522 "01010000" // /* MW 4 */
+ 11523 "01110000" // /* MW 3 */
+ 11524 "00011011" // /* MW 2 */
+ 11525 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11526 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11527 "10011011" // /* MW 3 */
+ 11528 "00001000" // /* MW 2 */
+ 11529 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11530 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11531 "01101000" // /* MW 5 */
+ 11532 "01010000" // /* MW 4 */
+ 11533 "01110000" // /* MW 3 */
+ 11534 "00011011" // /* MW 2 */
+ 11535 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11536 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11537 "11101000" // /* MW 5 */
+ 11538 "01010000" // /* MW 4 */
+ 11539 "01110000" // /* MW 3 */
+ 11540 "00010011" // /* MW 2 */
+ 11541 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11542 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11543 "01000001" // /* MW 9 */
+ 11544 "11100010" // /* MW 8 */
+ 11545 "00000000" // /* MW 7 */
+ 11546 "00011101" // /* MW 6 */
+ 11547 "00110100" // /* MW 5 */
+ 11548 "00101000" // /* MW 4 */
+ 11549 "01110000" // /* MW 3 */
+ 11550 "00011011" // /* MW 2 */
+ 11551 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11552 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11553 "01100001" // /* MW 9 */
+ 11554 "11100000" // /* MW 8 */
+ 11555 "00000001" // /* MW 7 */
+ 11556 "00011101" // /* MW 6 */
+ 11557 "01110100" // /* MW 5 */
+ 11558 "00101000" // /* MW 4 */
+ 11559 "01110000" // /* MW 3 */
+ 11560 "00010011" // /* MW 2 */
+ 11561 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11562 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11563 "01000001" // /* MW 9 */
+ 11564 "11100010" // /* MW 8 */
+ 11565 "00000000" // /* MW 7 */
+ 11566 "00011101" // /* MW 6 */
+ 11567 "00110100" // /* MW 5 */
+ 11568 "00101000" // /* MW 4 */
+ 11569 "01110000" // /* MW 3 */
+ 11570 "00011011" // /* MW 2 */
+ 11571 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11572 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11573 "01100001" // /* MW 9 */
+ 11574 "11100000" // /* MW 8 */
+ 11575 "00000001" // /* MW 7 */
+ 11576 "00011101" // /* MW 6 */
+ 11577 "01110100" // /* MW 5 */
+ 11578 "00101000" // /* MW 4 */
+ 11579 "01110000" // /* MW 3 */
+ 11580 "00010011" // /* MW 2 */
+ 11581 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11582 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11583 "01000001" // /* MW 9 */
+ 11584 "11100010" // /* MW 8 */
+ 11585 "00000000" // /* MW 7 */
+ 11586 "00011101" // /* MW 6 */
+ 11587 "00110100" // /* MW 5 */
+ 11588 "00101000" // /* MW 4 */
+ 11589 "01110000" // /* MW 3 */
+ 11590 "00011011" // /* MW 2 */
+ 11591 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11592 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11593 "01100001" // /* MW 9 */
+ 11594 "11100000" // /* MW 8 */
+ 11595 "00000001" // /* MW 7 */
+ 11596 "00011101" // /* MW 6 */
+ 11597 "01110100" // /* MW 5 */
+ 11598 "00101000" // /* MW 4 */
+ 11599 "01110000" // /* MW 3 */
+ 11600 "00010011" // /* MW 2 */
+ 11601 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11602 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11603 "01000001" // /* MW 13 */
+ 11604 "11100010" // /* MW 12 */
+ 11605 "00000000" // /* MW 11 */
+ 11606 "10001100" // /* MW 10 */
+ 11607 "01110000" // /* MW 9 */
+ 11608 "00001000" // /* MW 8 */
+ 11609 "00000000" // /* MW 7 */
+ 11610 "00000000" // /* MW 6 */
+ 11611 "01101000" // /* MW 5 */
+ 11612 "01010000" // /* MW 4 */
+ 11613 "01110000" // /* MW 3 */
+ 11614 "00011011" // /* MW 2 */
+ 11615 "00100001" // /* MW 1 */
+.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 11616 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11617 "00000011" // /* MW 15 */
+ 11618 "00001111" // /* MW 14 */
+ 11619 "01111000" // /* MW 13 */
+ 11620 "10100101" // /* MW 12 */
+ 11621 "00000001" // /* MW 11 */
+ 11622 "00000000" // /* MW 10 */
+ 11623 "00000000" // /* MW 9 */
+ 11624 "00000000" // /* MW 8 */
+ 11625 "10100011" // /* MW 7 */
+ 11626 "00011100" // /* MW 6 */
+ 11627 "11101010" // /* MW 5 */
+ 11628 "01010000" // /* MW 4 */
+ 11629 "01110000" // /* MW 3 */
+ 11630 "00010011" // /* MW 2 */
+ 11631 "00100001" // /* MW 1 */
+.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11632 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11633 "00010010" // /* MW 15 */
+ 11634 "00000111" // /* MW 14 */
+ 11635 "01111000" // /* MW 13 */
+ 11636 "10100101" // /* MW 12 */
+ 11637 "00000001" // /* MW 11 */
+ 11638 "00000000" // /* MW 10 */
+ 11639 "00000000" // /* MW 9 */
+ 11640 "00000000" // /* MW 8 */
+ 11641 "00100011" // /* MW 7 */
+ 11642 "00011100" // /* MW 6 */
+ 11643 "01101010" // /* MW 5 */
+ 11644 "01010000" // /* MW 4 */
+ 11645 "01110000" // /* MW 3 */
+ 11646 "00011011" // /* MW 2 */
+ 11647 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 11648 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11649 "01100001" // /* MW 7 */
+ 11650 "11100000" // /* MW 6 */
+ 11651 "00000001" // /* MW 5 */
+ 11652 "00000010" // /* MW 4 */
+ 11653 "01100000" // /* MW 3 */
+ 11654 "10010100" // /* MW 2 */
+ 11655 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11656 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11657 "01000001" // /* MW 7 */
+ 11658 "11100010" // /* MW 6 */
+ 11659 "00000000" // /* MW 5 */
+ 11660 "00000010" // /* MW 4 */
+ 11661 "01100000" // /* MW 3 */
+ 11662 "10000100" // /* MW 2 */
+ 11663 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11664 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11665 "01100001" // /* MW 7 */
+ 11666 "11100000" // /* MW 6 */
+ 11667 "00000001" // /* MW 5 */
+ 11668 "00000010" // /* MW 4 */
+ 11669 "01100000" // /* MW 3 */
+ 11670 "10010100" // /* MW 2 */
+ 11671 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11672 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11673 "01000001" // /* MW 7 */
+ 11674 "11100010" // /* MW 6 */
+ 11675 "00000000" // /* MW 5 */
+ 11676 "00000010" // /* MW 4 */
+ 11677 "01100000" // /* MW 3 */
+ 11678 "10000100" // /* MW 2 */
+ 11679 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11680 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11681 "01100001" // /* MW 7 */
+ 11682 "11100000" // /* MW 6 */
+ 11683 "00000001" // /* MW 5 */
+ 11684 "00000010" // /* MW 4 */
+ 11685 "01100000" // /* MW 3 */
+ 11686 "10010100" // /* MW 2 */
+ 11687 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11688 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11689 "01000001" // /* MW 7 */
+ 11690 "11100010" // /* MW 6 */
+ 11691 "00000000" // /* MW 5 */
+ 11692 "00000010" // /* MW 4 */
+ 11693 "01100000" // /* MW 3 */
+ 11694 "10000100" // /* MW 2 */
+ 11695 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11696 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11697 "01100001" // /* MW 7 */
+ 11698 "11100000" // /* MW 6 */
+ 11699 "00000001" // /* MW 5 */
+ 11700 "00000010" // /* MW 4 */
+ 11701 "01100000" // /* MW 3 */
+ 11702 "10010100" // /* MW 2 */
+ 11703 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11704 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11705 "00100011" // /* MW 3 */
+ 11706 "00011100" // /* MW 2 */
+ 11707 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 172 4 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11708 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11709 "00000000" // /* MW 5 */
+ 11710 "01010000" // /* MW 4 */
+ 11711 "01100000" // /* MW 3 */
+ 11712 "10010100" // /* MW 2 */
+ 11713 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11714 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11715 "00100011" // /* MW 3 */
+ 11716 "00011100" // /* MW 2 */
+ 11717 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11718 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11719 "10100011" // /* MW 3 */
+ 11720 "00011100" // /* MW 2 */
+ 11721 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 11722 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11723 "00100011" // /* MW 3 */
+ 11724 "00011100" // /* MW 2 */
+ 11725 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 11726 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11727 "10100011" // /* MW 3 */
+ 11728 "00011100" // /* MW 2 */
+ 11729 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0
+ 11731 "00000000" // /* MW 1 */
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.src_ref 7 "superkernels.cpp" 369 first
+.src_ref 7 "superkernels.cpp" 374 6
+.function_start
+ 11744 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11745 "10000000" // /* MW 5 */
+ 11746 "11001000" // /* MW 4 */
+ 11747 "11001000" // /* MW 3 */
+ 11748 "00000111" // /* MW 2 */
+ 11749 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+ 11750 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11751 "11000001" // /* MW 5 */
+ 11752 "10110101" // /* MW 4 */
+ 11753 "11011000" // /* MW 3 */
+ 11754 "11000010" // /* MW 2 */
+ 11755 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 369
+ 11756 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11757 "00000001" // /* MW 5 */
+ 11758 "00000000" // /* MW 4 */
+ 11759 "00000000" // /* MW 3 */
+ 11760 "00001000" // /* MW 2 */
+ 11761 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 22 first
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11762 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11763 "01111001" // /* MW 9 */
+ 11764 "01100000" // /* MW 8 */
+ 11765 "11001010" // /* MW 7 */
+ 11766 "10000001" // /* MW 6 */
+ 11767 "00010100" // /* MW 5 */
+ 11768 "00100011" // /* MW 4 */
+ 11769 "10110000" // /* MW 3 */
+ 11770 "00111010" // /* MW 2 */
+ 11771 "11111111" // /* MW 1 */
+ 11772 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11773 "01110000" // /* MW 7 */
+ 11774 "11010000" // /* MW 6 */
+ 11775 "00001011" // /* MW 5 */
+ 11776 "00000000" // /* MW 4 */
+ 11777 "10110000" // /* MW 3 */
+ 11778 "10000011" // /* MW 2 */
+ 11779 "11111101" // /* MW 1 */
+ 11780 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11781 "00010101" // /* MW 3 */
+ 11782 "11111100" // /* MW 2 */
+ 11783 "00001111" // /* MW 1 */
+ 11784 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11785 "00111101" // /* MW 3 */
+ 11786 "11110000" // /* MW 2 */
+ 11787 "00001111" // /* MW 1 */
+ 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11789 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+.src_ref 7 "superkernels.cpp" 374 16 first
+ 11790 "10000100" // JNZ r16, #11936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11936 delay_slots=5 */
+ 11791 "00000001" // /* MW 5 */
+ 11792 "01000000" // /* MW 4 */
+ 11793 "01010000" // /* MW 3 */
+ 11794 "00010111" // /* MW 2 */
+ 11795 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 30 first
+.delay_slot
+ 11796 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11797 "11111011" // /* MW 3 */
+ 11798 "01100011" // /* MW 2 */
+ 11799 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11800 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11801 "10100000" // /* MW 5 */
+ 11802 "11001000" // /* MW 4 */
+ 11803 "11000100" // /* MW 3 */
+ 11804 "00000111" // /* MW 2 */
+ 11805 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11806 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11807 "01110000" // /* MW 7 */
+ 11808 "01100000" // /* MW 6 */
+ 11809 "00110111" // /* MW 5 */
+ 11810 "00000001" // /* MW 4 */
+ 11811 "00110000" // /* MW 3 */
+ 11812 "11000110" // /* MW 2 */
+ 11813 "01000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 11814 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11815 "11000000" // /* MW 3 */
+ 11816 "11010110" // /* MW 2 */
+ 11817 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 7 "superkernels.cpp" 379 28
+.src_ref 7 "superkernels.cpp" 381 42
+.src_ref 7 "superkernels.cpp" 393 2
+.delay_slot
+ 11818 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11819 "00010001" // /* MW 9 */
+ 11820 "11000000" // /* MW 8 */
+ 11821 "10110010" // /* MW 7 */
+ 11822 "11110011" // /* MW 6 */
+ 11823 "00000001" // /* MW 5 */
+ 11824 "00000000" // /* MW 4 */
+ 11825 "10110000" // /* MW 3 */
+ 11826 "10100011" // /* MW 2 */
+ 11827 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11828 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11829 "00010001" // /* MW 9 */
+ 11830 "00110100" // /* MW 8 */
+ 11831 "00110010" // /* MW 7 */
+ 11832 "11110001" // /* MW 6 */
+ 11833 "00000001" // /* MW 5 */
+ 11834 "00000000" // /* MW 4 */
+ 11835 "01100000" // /* MW 3 */
+ 11836 "10010001" // /* MW 2 */
+ 11837 "00010011" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11838 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11839 "00010000" // /* MW 9 */
+ 11840 "00110010" // /* MW 8 */
+ 11841 "00110010" // /* MW 7 */
+ 11842 "11110001" // /* MW 6 */
+ 11843 "00000001" // /* MW 5 */
+ 11844 "00000000" // /* MW 4 */
+ 11845 "11100000" // /* MW 3 */
+ 11846 "11000000" // /* MW 2 */
+ 11847 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11849 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11850 "00000100" // JL #11296 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */
+ 11851 "00000001" // /* MW 5 */
+ 11852 "00000000" // /* MW 4 */
+ 11853 "00010000" // /* MW 3 */
+ 11854 "00010110" // /* MW 2 */
+ 11855 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11857 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11859 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11860 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11861 "00110001" // /* MW 3 */
+ 11862 "00100000" // /* MW 2 */
+ 11863 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 11864 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11865 "00000101" // /* MW 3 */
+ 11866 "00100000" // /* MW 2 */
+ 11867 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 11868 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11869 "00010001" // /* MW 3 */
+ 11870 "00000110" // /* MW 2 */
+ 11871 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 381 42 first
+.return_address
+ 11872 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11873 "00010000" // /* MW 9 */
+ 11874 "00101000" // /* MW 8 */
+ 11875 "10110010" // /* MW 7 */
+ 11876 "11110000" // /* MW 6 */
+ 11877 "00000001" // /* MW 5 */
+ 11878 "00000000" // /* MW 4 */
+ 11879 "11010000" // /* MW 3 */
+ 11880 "11000010" // /* MW 2 */
+ 11881 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 390 48
+ 11882 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11883 "00010000" // /* MW 9 */
+ 11884 "00101010" // /* MW 8 */
+ 11885 "10110010" // /* MW 7 */
+ 11886 "11110001" // /* MW 6 */
+ 11887 "00000001" // /* MW 5 */
+ 11888 "00000000" // /* MW 4 */
+ 11889 "11010000" // /* MW 3 */
+ 11890 "11000110" // /* MW 2 */
+ 11891 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 28 first
+.src_ref 7 "superkernels.cpp" 382 16
+.src_ref 7 "superkernels.cpp" 391 48
+ 11892 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11893 "00010000" // /* MW 9 */
+ 11894 "00101110" // /* MW 8 */
+ 11895 "10110010" // /* MW 7 */
+ 11896 "11110000" // /* MW 6 */
+ 11897 "00000001" // /* MW 5 */
+ 11898 "00000000" // /* MW 4 */
+ 11899 "01010000" // /* MW 3 */
+ 11900 "11001011" // /* MW 2 */
+ 11901 "11101010" // /* MW 1 */
+ 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11903 "00000000" // /* MW 1 */
+ 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11905 "00000000" // /* MW 1 */
+ 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11907 "00000000" // /* MW 1 */
+ 11908 "10000100" // J #11952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11952 delay_slots=5 */
+ 11909 "00000000" // /* MW 5 */
+ 11910 "00000000" // /* MW 4 */
+ 11911 "01011000" // /* MW 3 */
+ 11912 "00010111" // /* MW 2 */
+ 11913 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13
+.delay_slot
+ 11914 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11915 "11000000" // /* MW 5 */
+ 11916 "11001000" // /* MW 4 */
+ 11917 "11000100" // /* MW 3 */
+ 11918 "00000111" // /* MW 2 */
+ 11919 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 27 first
+.delay_slot
+ 11920 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11921 "00001111" // /* MW 3 */
+ 11922 "01100001" // /* MW 2 */
+ 11923 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13 first
+.delay_slot
+ 11924 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11925 "01010001" // /* MW 3 */
+ 11926 "00000110" // /* MW 2 */
+ 11927 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16 first
+.delay_slot
+ 11928 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11929 "00010001" // /* MW 3 */
+ 11930 "00000110" // /* MW 2 */
+ 11931 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 382 16 first
+.delay_slot
+ 11932 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11933 "00010001" // /* MW 3 */
+ 11934 "00000110" // /* MW 2 */
+ 11935 "00001001" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192
+.src_ref 7 "superkernels.cpp" 390 48
+ 11936 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11937 "10101000" // /* MW 5 */
+ 11938 "11001000" // /* MW 4 */
+ 11939 "11000110" // /* MW 3 */
+ 11940 "00000111" // /* MW 2 */
+ 11941 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48
+ 11942 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11943 "00010000" // /* MW 9 */
+ 11944 "00101110" // /* MW 8 */
+ 11945 "10110010" // /* MW 7 */
+ 11946 "11110000" // /* MW 6 */
+ 11947 "00000001" // /* MW 5 */
+ 11948 "00000000" // /* MW 4 */
+ 11949 "11110000" // /* MW 3 */
+ 11950 "00101100" // /* MW 2 */
+ 11951 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 11952 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11953 "10000110" // /* MW 3 */
+ 11954 "01100111" // /* MW 2 */
+ 11955 "00011000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11956 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11957 "00010000" // /* MW 9 */
+ 11958 "00100000" // /* MW 8 */
+ 11959 "00110010" // /* MW 7 */
+ 11960 "11110001" // /* MW 6 */
+ 11961 "00000001" // /* MW 5 */
+ 11962 "00000000" // /* MW 4 */
+ 11963 "11010000" // /* MW 3 */
+ 11964 "11101110" // /* MW 2 */
+ 11965 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 11966 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11967 "00010110" // /* MW 3 */
+ 11968 "11111110" // /* MW 2 */
+ 11969 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 11970 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11971 "00110110" // /* MW 3 */
+ 11972 "11111110" // /* MW 2 */
+ 11973 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+ 11974 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11975 "01010110" // /* MW 3 */
+ 11976 "00000110" // /* MW 2 */
+ 11977 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 11978 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11979 "01110110" // /* MW 3 */
+ 11980 "01000110" // /* MW 2 */
+ 11981 "00000000" // /* MW 1 */
+ 11982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11983 "00000000" // /* MW 1 */
+ 11984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11985 "00000000" // /* MW 1 */
+ 11986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11987 "00000000" // /* MW 1 */
+ 11988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11989 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 11990 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11991 "00000010" // /* MW 3 */
+ 11992 "01100001" // /* MW 2 */
+ 11993 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+.src_ref 1 "io_buffer_main.h" 218 20
+ 11994 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11995 "00001110" // /* MW 5 */
+ 11996 "01000000" // /* MW 4 */
+ 11997 "00111001" // /* MW 3 */
+ 11998 "11000010" // /* MW 2 */
+ 11999 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+ 12000 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12001 "00010001" // /* MW 3 */
+ 12002 "00000110" // /* MW 2 */
+ 12003 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+.src_ref 1 "io_buffer_main.h" 395 8
+ 12004 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12005 "11111101" // /* MW 3 */
+ 12006 "11100000" // /* MW 2 */
+ 12007 "00010111" // /* MW 1 */
+ 12008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12009 "00000000" // /* MW 1 */
+ 12010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12011 "00000000" // /* MW 1 */
+ 12012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12013 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 12014 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12015 "00001000" // /* MW 3 */
+ 12016 "11010011" // /* MW 2 */
+ 12017 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 12018 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12019 "00000110" // /* MW 3 */
+ 12020 "01100111" // /* MW 2 */
+ 12021 "00011010" // /* MW 1 */
+ 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12023 "00000000" // /* MW 1 */
+ 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12025 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 12026 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12027 "01110110" // /* MW 3 */
+ 12028 "11111111" // /* MW 2 */
+ 12029 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 12030 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12031 "00110110" // /* MW 3 */
+ 12032 "11111110" // /* MW 2 */
+ 12033 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 12034 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12035 "01010110" // /* MW 3 */
+ 12036 "11111110" // /* MW 2 */
+ 12037 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 47 first
+ 12038 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12039 "01110110" // /* MW 3 */
+ 12040 "01010110" // /* MW 2 */
+ 12041 "00000010" // /* MW 1 */
+ 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12043 "00000000" // /* MW 1 */
+ 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12045 "00000000" // /* MW 1 */
+ 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12047 "00000000" // /* MW 1 */
+ 12048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12049 "00000000" // /* MW 1 */
+ 12050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12051 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 12052 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12053 "00010010" // /* MW 3 */
+ 12054 "10100011" // /* MW 2 */
+ 12055 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 12056 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12057 "00110001" // /* MW 3 */
+ 12058 "00000110" // /* MW 2 */
+ 12059 "00001010" // /* MW 1 */
+ 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12061 "00000000" // /* MW 1 */
+ 12062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12063 "00000000" // /* MW 1 */
+ 12064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12065 "00000000" // /* MW 1 */
+ 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12067 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 12068 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12069 "00001000" // /* MW 3 */
+ 12070 "11010011" // /* MW 2 */
+ 12071 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46
+.src_ref 7 "superkernels.cpp" 391 46
+.src_ref 1 "io_buffer_main.h" 324 32
+ 12072 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12073 "01111001" // /* MW 9 */
+ 12074 "01100000" // /* MW 8 */
+ 12075 "11001110" // /* MW 7 */
+ 12076 "00101001" // /* MW 6 */
+ 12077 "00000000" // /* MW 5 */
+ 12078 "00000001" // /* MW 4 */
+ 12079 "01100000" // /* MW 3 */
+ 12080 "00010001" // /* MW 2 */
+ 12081 "11010001" // /* MW 1 */
+ 12082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12083 "00000000" // /* MW 1 */
+ 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12085 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+ 12086 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12087 "00011001" // /* MW 3 */
+ 12088 "11101110" // /* MW 2 */
+ 12089 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 48 first
+ 12090 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12091 "00111011" // /* MW 5 */
+ 12092 "11011000" // /* MW 4 */
+ 12093 "11011111" // /* MW 3 */
+ 12094 "11000110" // /* MW 2 */
+ 12095 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48 first
+.src_ref 7 "superkernels.cpp" 393 2
+ 12096 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12097 "10000001" // /* MW 5 */
+ 12098 "11011101" // /* MW 4 */
+ 12099 "11010110" // /* MW 3 */
+ 12100 "11010010" // /* MW 2 */
+ 12101 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 12102 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12103 "01010110" // /* MW 3 */
+ 12104 "01001110" // /* MW 2 */
+ 12105 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 12106 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12107 "00011110" // /* MW 3 */
+ 12108 "01011101" // /* MW 2 */
+ 12109 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12110 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12111 "11000000" // /* MW 3 */
+ 12112 "01100000" // /* MW 2 */
+ 12113 "00011111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12115 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12116 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12117 "01110110" // /* MW 3 */
+ 12118 "00000110" // /* MW 2 */
+ 12119 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12121 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 393 2 first
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 12122 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */
+ 12123 "00000001" // /* MW 5 */
+ 12124 "00000000" // /* MW 4 */
+ 12125 "01011000" // /* MW 3 */
+ 12126 "00010110" // /* MW 2 */
+ 12127 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12128 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12129 "11000000" // /* MW 3 */
+ 12130 "11010100" // /* MW 2 */
+ 12131 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 12132 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12133 "00001101" // /* MW 3 */
+ 12134 "01100011" // /* MW 2 */
+ 12135 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46 first
+.delay_slot
+ 12136 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12137 "00001101" // /* MW 3 */
+ 12138 "00100001" // /* MW 2 */
+ 12139 "00010101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46
+.delay_slot
+ 12140 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12141 "01000001" // /* MW 3 */
+ 12142 "01101001" // /* MW 2 */
+ 12143 "00011001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12145 "00000000" // /* MW 15 */
+ 12146 "00000000" // /* MW 14 */
+ 12147 "10101000" // /* MW 13 */
+ 12148 "11100010" // /* MW 12 */
+ 12149 "00110100" // /* MW 11 */
+ 12150 "00000000" // /* MW 10 */
+ 12151 "00000000" // /* MW 9 */
+ 12152 "00000000" // /* MW 8 */
+ 12153 "01011011" // /* MW 7 */
+ 12154 "00000001" // /* MW 6 */
+ 12155 "00100000" // /* MW 5 */
+ 12156 "00000000" // /* MW 4 */
+ 12157 "11110000" // /* MW 3 */
+ 12158 "00101100" // /* MW 2 */
+ 12159 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 32 first
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 40
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.return_address
+ 12160 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12161 "01111000" // /* MW 9 */
+ 12162 "11010000" // /* MW 8 */
+ 12163 "10110011" // /* MW 7 */
+ 12164 "00101000" // /* MW 6 */
+ 12165 "00000000" // /* MW 5 */
+ 12166 "00000001" // /* MW 4 */
+ 12167 "11010000" // /* MW 3 */
+ 12168 "11000110" // /* MW 2 */
+ 12169 "11001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19
+ 12170 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12171 "11000000" // /* MW 5 */
+ 12172 "11001000" // /* MW 4 */
+ 12173 "11001100" // /* MW 3 */
+ 12174 "00000111" // /* MW 2 */
+ 12175 "00000000" // /* MW 1 */
+ 12176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12177 "00000000" // /* MW 1 */
+ 12178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12179 "00000000" // /* MW 1 */
+ 12180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12181 "00000000" // /* MW 1 */
+ 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12183 "00000000" // /* MW 1 */
+ 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12185 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 12186 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12187 "00001000" // /* MW 3 */
+ 12188 "01010001" // /* MW 2 */
+ 12189 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 12190 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12191 "00110110" // /* MW 3 */
+ 12192 "11110110" // /* MW 2 */
+ 12193 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 12194 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12195 "00011001" // /* MW 3 */
+ 12196 "11101101" // /* MW 2 */
+ 12197 "00000111" // /* MW 1 */
+ 12198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12199 "00000000" // /* MW 1 */
+ 12200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12201 "00000000" // /* MW 1 */
+ 12202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12203 "00000000" // /* MW 1 */
+ 12204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12205 "00000000" // /* MW 1 */
+ 12206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12207 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 12208 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12209 "00010001" // /* MW 3 */
+ 12210 "00100011" // /* MW 2 */
+ 12211 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 28
+ 12212 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12213 "01100011" // /* MW 5 */
+ 12214 "11101100" // /* MW 4 */
+ 12215 "11010011" // /* MW 3 */
+ 12216 "11000110" // /* MW 2 */
+ 12217 "01001010" // /* MW 1 */
+ 12218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12219 "00000000" // /* MW 1 */
+ 12220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12221 "00000000" // /* MW 1 */
+ 12222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12223 "00000000" // /* MW 1 */
+ 12224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12225 "00000000" // /* MW 1 */
+ 12226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12227 "00000000" // /* MW 1 */
+ 12228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12229 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 12230 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12231 "00001000" // /* MW 3 */
+ 12232 "01010001" // /* MW 2 */
+ 12233 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+.src_ref 7 "superkernels.cpp" 398 14
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 12234 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12235 "00010000" // /* MW 9 */
+ 12236 "00100000" // /* MW 8 */
+ 12237 "10110010" // /* MW 7 */
+ 12238 "11110000" // /* MW 6 */
+ 12239 "00000001" // /* MW 5 */
+ 12240 "00000000" // /* MW 4 */
+ 12241 "11010000" // /* MW 3 */
+ 12242 "11001110" // /* MW 2 */
+ 12243 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19 first
+ 12244 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12245 "01010110" // /* MW 3 */
+ 12246 "00000110" // /* MW 2 */
+ 12247 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 12248 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12249 "00110110" // /* MW 3 */
+ 12250 "00000110" // /* MW 2 */
+ 12251 "00000001" // /* MW 1 */
+ 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12253 "00000000" // /* MW 1 */
+ 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12255 "00000000" // /* MW 1 */
+ 12256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12257 "00000000" // /* MW 1 */
+ 12258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12259 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 12260 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12261 "00110001" // /* MW 3 */
+ 12262 "00100001" // /* MW 2 */
+ 12263 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 12264 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12265 "00010001" // /* MW 3 */
+ 12266 "11100110" // /* MW 2 */
+ 12267 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 16 first
+ 12268 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12269 "00101000" // /* MW 3 */
+ 12270 "01100001" // /* MW 2 */
+ 12271 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 12272 "10000100" // JNZ r16, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */
+ 12273 "00000001" // /* MW 5 */
+ 12274 "01000000" // /* MW 4 */
+ 12275 "00001000" // /* MW 3 */
+ 12276 "00011000" // /* MW 2 */
+ 12277 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12283 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12285 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12287 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14
+ 12288 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12289 "00000001" // /* MW 3 */
+ 12290 "00100000" // /* MW 2 */
+ 12291 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14 first
+ 12292 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12293 "11000001" // /* MW 11 */
+ 12294 "00001000" // /* MW 10 */
+ 12295 "10000011" // /* MW 9 */
+ 12296 "00000000" // /* MW 8 */
+ 12297 "00000000" // /* MW 7 */
+ 12298 "00000000" // /* MW 6 */
+ 12299 "00100000" // /* MW 5 */
+ 12300 "00000000" // /* MW 4 */
+ 12301 "11110000" // /* MW 3 */
+ 12302 "00101100" // /* MW 2 */
+ 12303 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560
+.src_ref 7 "superkernels.cpp" 400
+ 12304 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12305 "00111001" // /* MW 3 */
+ 12306 "11110000" // /* MW 2 */
+ 12307 "00000111" // /* MW 1 */
+ 12308 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12309 "11110001" // /* MW 3 */
+ 12310 "11111101" // /* MW 2 */
+ 12311 "00000111" // /* MW 1 */
+ 12312 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12313 "10011001" // /* MW 3 */
+ 12314 "11110111" // /* MW 2 */
+ 12315 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 12316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12317 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 12318 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12319 "11010001" // /* MW 3 */
+ 12320 "11111001" // /* MW 2 */
+ 12321 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12323 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12325 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12326 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12327 "00000000" // /* MW 3 */
+ 12328 "00101000" // /* MW 2 */
+ 12329 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12330 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12331 "00001011" // /* MW 3 */
+ 12332 "10001110" // /* MW 2 */
+ 12333 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400
+.delay_slot
+ 12334 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12335 "00000001" // /* MW 5 */
+ 12336 "00000000" // /* MW 4 */
+ 12337 "00000000" // /* MW 3 */
+ 12338 "11111000" // /* MW 2 */
+ 12339 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12341 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12343 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0
+ 12345 "00000000" // /* MW 1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0
+.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.src_ref 2 "conv2d_dw_bf16_params.h" 211 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.function_start
+ 12352 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12353 "00010000" // /* MW 9 */
+ 12354 "11100000" // /* MW 8 */
+ 12355 "10110011" // /* MW 7 */
+ 12356 "11110000" // /* MW 6 */
+ 12357 "00000001" // /* MW 5 */
+ 12358 "00000000" // /* MW 4 */
+ 12359 "11010000" // /* MW 3 */
+ 12360 "10000101" // /* MW 2 */
+ 12361 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12362 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12363 "01011000" // /* MW 9 */
+ 12364 "00000000" // /* MW 8 */
+ 12365 "00001000" // /* MW 7 */
+ 12366 "01001011" // /* MW 6 */
+ 12367 "00000000" // /* MW 5 */
+ 12368 "00000001" // /* MW 4 */
+ 12369 "11010000" // /* MW 3 */
+ 12370 "10000001" // /* MW 2 */
+ 12371 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 211
+ 12372 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12373 "00000001" // /* MW 5 */
+ 12374 "00000000" // /* MW 4 */
+ 12375 "00000000" // /* MW 3 */
+ 12376 "00001000" // /* MW 2 */
+ 12377 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32
+ 12378 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12379 "00010001" // /* MW 9 */
+ 12380 "11100000" // /* MW 8 */
+ 12381 "10110011" // /* MW 7 */
+ 12382 "11110011" // /* MW 6 */
+ 12383 "00000001" // /* MW 5 */
+ 12384 "00000000" // /* MW 4 */
+ 12385 "10110000" // /* MW 3 */
+ 12386 "11110011" // /* MW 2 */
+ 12387 "11111110" // /* MW 1 */
+ 12388 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12389 "00111101" // /* MW 3 */
+ 12390 "11111100" // /* MW 2 */
+ 12391 "00001111" // /* MW 1 */
+ 12392 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12393 "11110101" // /* MW 3 */
+ 12394 "11111001" // /* MW 2 */
+ 12395 "00001111" // /* MW 1 */
+ 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12397 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12398 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12399 "00101001" // /* MW 3 */
+ 12400 "00011100" // /* MW 2 */
+ 12401 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12402 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12403 "00001001" // /* MW 3 */
+ 12404 "00011100" // /* MW 2 */
+ 12405 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12406 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12407 "00101110" // /* MW 3 */
+ 12408 "00000100" // /* MW 2 */
+ 12409 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12410 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12411 "00001110" // /* MW 3 */
+ 12412 "00010100" // /* MW 2 */
+ 12413 "00000000" // /* MW 1 */
+ 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12415 "00000000" // /* MW 1 */
+ 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12417 "00000000" // /* MW 1 */
+ 12418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12419 "00000000" // /* MW 1 */
+ 12420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12421 "00000000" // /* MW 1 */
+ 12422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12423 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12424 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12425 "00101001" // /* MW 3 */
+ 12426 "00000100" // /* MW 2 */
+ 12427 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12428 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12429 "00001001" // /* MW 3 */
+ 12430 "00010100" // /* MW 2 */
+ 12431 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first
+ 12432 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12433 "00101010" // /* MW 3 */
+ 12434 "01011110" // /* MW 2 */
+ 12435 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 52
+ 12436 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12437 "01001010" // /* MW 3 */
+ 12438 "11101110" // /* MW 2 */
+ 12439 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12440 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12441 "00101010" // /* MW 3 */
+ 12442 "11101100" // /* MW 2 */
+ 12443 "00000111" // /* MW 1 */
+ 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12445 "00000000" // /* MW 1 */
+ 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12447 "00000000" // /* MW 1 */
+ 12448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12449 "00000000" // /* MW 1 */
+ 12450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12451 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.no_stack_arguments
+ 12452 "00000100" // JL #15664 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */
+ 12453 "00000001" // /* MW 5 */
+ 12454 "00000000" // /* MW 4 */
+ 12455 "10011000" // /* MW 3 */
+ 12456 "00011110" // /* MW 2 */
+ 12457 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 38
+.delay_slot
+ 12458 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12459 "01000011" // /* MW 5 */
+ 12460 "10111110" // /* MW 4 */
+ 12461 "10111000" // /* MW 3 */
+ 12462 "11001010" // /* MW 2 */
+ 12463 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+.delay_slot
+ 12464 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12465 "00010001" // /* MW 5 */
+ 12466 "11000010" // /* MW 4 */
+ 12467 "10110000" // /* MW 3 */
+ 12468 "10000110" // /* MW 2 */
+ 12469 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12470 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12471 "00010101" // /* MW 5 */
+ 12472 "11101111" // /* MW 4 */
+ 12473 "10110111" // /* MW 3 */
+ 12474 "01000010" // /* MW 2 */
+ 12475 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12476 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12477 "11110001" // /* MW 3 */
+ 12478 "00100010" // /* MW 2 */
+ 12479 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12480 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12481 "00000000" // /* MW 15 */
+ 12482 "00000000" // /* MW 14 */
+ 12483 "01111000" // /* MW 13 */
+ 12484 "10100101" // /* MW 12 */
+ 12485 "00000001" // /* MW 11 */
+ 12486 "10010000" // /* MW 10 */
+ 12487 "00001000" // /* MW 9 */
+ 12488 "00011110" // /* MW 8 */
+ 12489 "01011011" // /* MW 7 */
+ 12490 "00000001" // /* MW 6 */
+ 12491 "00100000" // /* MW 5 */
+ 12492 "00000000" // /* MW 4 */
+ 12493 "11110000" // /* MW 3 */
+ 12494 "00101100" // /* MW 2 */
+ 12495 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.return_address
+ 12496 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12497 "00000010" // /* MW 5 */
+ 12498 "01000000" // /* MW 4 */
+ 12499 "00100000" // /* MW 3 */
+ 12500 "11010010" // /* MW 2 */
+ 12501 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first
+ 12502 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12503 "01000011" // /* MW 5 */
+ 12504 "01001000" // /* MW 4 */
+ 12505 "01011000" // /* MW 3 */
+ 12506 "11000101" // /* MW 2 */
+ 12507 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 52
+ 12508 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12509 "01101010" // /* MW 3 */
+ 12510 "11101110" // /* MW 2 */
+ 12511 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12512 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12513 "00110001" // /* MW 3 */
+ 12514 "11101100" // /* MW 2 */
+ 12515 "00000111" // /* MW 1 */
+ 12516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12517 "00000000" // /* MW 1 */
+ 12518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12519 "00000000" // /* MW 1 */
+ 12520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12521 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+ 12522 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12523 "01000110" // /* MW 3 */
+ 12524 "11101001" // /* MW 2 */
+ 12525 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+ 12526 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12527 "00001010" // /* MW 3 */
+ 12528 "00110111" // /* MW 2 */
+ 12529 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first
+ 12530 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12531 "01100011" // /* MW 5 */
+ 12532 "11000110" // /* MW 4 */
+ 12533 "10111000" // /* MW 3 */
+ 12534 "01001110" // /* MW 2 */
+ 12535 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.no_stack_arguments
+ 12536 "00111010" // ST r17, [sp, #-32]; JL #15664 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */
+ 12537 "01000001" // /* MW 9 */
+ 12538 "00000000" // /* MW 8 */
+ 12539 "00000000" // /* MW 7 */
+ 12540 "10100110" // /* MW 6 */
+ 12541 "00000111" // /* MW 5 */
+ 12542 "00000000" // /* MW 4 */
+ 12543 "10110000" // /* MW 3 */
+ 12544 "01000110" // /* MW 2 */
+ 12545 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12546 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12547 "00100010" // /* MW 3 */
+ 12548 "10101001" // /* MW 2 */
+ 12549 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12550 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12551 "00001010" // /* MW 3 */
+ 12552 "01110111" // /* MW 2 */
+ 12553 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.delay_slot
+ 12554 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12555 "00010001" // /* MW 3 */
+ 12556 "00100101" // /* MW 2 */
+ 12557 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12558 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12559 "01110000" // /* MW 3 */
+ 12560 "00100110" // /* MW 2 */
+ 12561 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 87
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12562 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12563 "01100000" // /* MW 13 */
+ 12564 "00101011" // /* MW 12 */
+ 12565 "00000000" // /* MW 11 */
+ 12566 "00001001" // /* MW 10 */
+ 12567 "10011000" // /* MW 9 */
+ 12568 "00111101" // /* MW 8 */
+ 12569 "00100010" // /* MW 7 */
+ 12570 "01000001" // /* MW 6 */
+ 12571 "00100100" // /* MW 5 */
+ 12572 "00000000" // /* MW 4 */
+ 12573 "11110000" // /* MW 3 */
+ 12574 "00101100" // /* MW 2 */
+ 12575 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+.return_address
+ 12576 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12577 "01011000" // /* MW 9 */
+ 12578 "01000010" // /* MW 8 */
+ 12579 "00000000" // /* MW 7 */
+ 12580 "11001000" // /* MW 6 */
+ 12581 "00110111" // /* MW 5 */
+ 12582 "00111111" // /* MW 4 */
+ 12583 "00100000" // /* MW 3 */
+ 12584 "00001110" // /* MW 2 */
+ 12585 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12586 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12587 "01011000" // /* MW 9 */
+ 12588 "11111100" // /* MW 8 */
+ 12589 "00101001" // /* MW 7 */
+ 12590 "00001000" // /* MW 6 */
+ 12591 "10000000" // /* MW 5 */
+ 12592 "00000001" // /* MW 4 */
+ 12593 "00100000" // /* MW 3 */
+ 12594 "11000010" // /* MW 2 */
+ 12595 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53
+ 12596 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12597 "01011000" // /* MW 9 */
+ 12598 "00000010" // /* MW 8 */
+ 12599 "10001000" // /* MW 7 */
+ 12600 "10001000" // /* MW 6 */
+ 12601 "01100000" // /* MW 5 */
+ 12602 "00000000" // /* MW 4 */
+ 12603 "00100000" // /* MW 3 */
+ 12604 "11011010" // /* MW 2 */
+ 12605 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+ 12606 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12607 "01011000" // /* MW 9 */
+ 12608 "00010111" // /* MW 8 */
+ 12609 "10001000" // /* MW 7 */
+ 12610 "00001011" // /* MW 6 */
+ 12611 "01010001" // /* MW 5 */
+ 12612 "00000000" // /* MW 4 */
+ 12613 "01010000" // /* MW 3 */
+ 12614 "01000101" // /* MW 2 */
+ 12615 "11100001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76
+ 12616 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12617 "01011000" // /* MW 9 */
+ 12618 "00100000" // /* MW 8 */
+ 12619 "10000000" // /* MW 7 */
+ 12620 "01001000" // /* MW 6 */
+ 12621 "00100111" // /* MW 5 */
+ 12622 "00111111" // /* MW 4 */
+ 12623 "00100000" // /* MW 3 */
+ 12624 "01010110" // /* MW 2 */
+ 12625 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12626 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12627 "01011000" // /* MW 9 */
+ 12628 "00000001" // /* MW 8 */
+ 12629 "01001000" // /* MW 7 */
+ 12630 "11001011" // /* MW 6 */
+ 12631 "01110000" // /* MW 5 */
+ 12632 "00000001" // /* MW 4 */
+ 12633 "00100000" // /* MW 3 */
+ 12634 "01111010" // /* MW 2 */
+ 12635 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41
+ 12636 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12637 "01011000" // /* MW 9 */
+ 12638 "11000000" // /* MW 8 */
+ 12639 "11101111" // /* MW 7 */
+ 12640 "00001011" // /* MW 6 */
+ 12641 "11010000" // /* MW 5 */
+ 12642 "00000101" // /* MW 4 */
+ 12643 "10000000" // /* MW 3 */
+ 12644 "11000000" // /* MW 2 */
+ 12645 "11101001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12646 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12647 "00100001" // /* MW 3 */
+ 12648 "00101000" // /* MW 2 */
+ 12649 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12650 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12651 "00000110" // /* MW 3 */
+ 12652 "11000111" // /* MW 2 */
+ 12653 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+ 12654 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12655 "00000010" // /* MW 5 */
+ 12656 "00110110" // /* MW 4 */
+ 12657 "01010000" // /* MW 3 */
+ 12658 "11110001" // /* MW 2 */
+ 12659 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69
+ 12660 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12661 "11110101" // /* MW 5 */
+ 12662 "00111111" // /* MW 4 */
+ 12663 "01001011" // /* MW 3 */
+ 12664 "00101000" // /* MW 2 */
+ 12665 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12666 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12667 "00011101" // /* MW 5 */
+ 12668 "00100000" // /* MW 4 */
+ 12669 "11110001" // /* MW 3 */
+ 12670 "11100001" // /* MW 2 */
+ 12671 "01111000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12672 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12673 "01110000" // /* MW 3 */
+ 12674 "00101000" // /* MW 2 */
+ 12675 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+ 12676 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12677 "00000001" // /* MW 5 */
+ 12678 "10100000" // /* MW 4 */
+ 12679 "10010000" // /* MW 3 */
+ 12680 "00000000" // /* MW 2 */
+ 12681 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first
+ 12682 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12683 "00000001" // /* MW 5 */
+ 12684 "10110100" // /* MW 4 */
+ 12685 "10111101" // /* MW 3 */
+ 12686 "11100111" // /* MW 2 */
+ 12687 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first
+ 12688 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12689 "00000010" // /* MW 5 */
+ 12690 "10100011" // /* MW 4 */
+ 12691 "10110000" // /* MW 3 */
+ 12692 "00001101" // /* MW 2 */
+ 12693 "01111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 70
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first
+ 12694 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12695 "11111111" // /* MW 5 */
+ 12696 "00110101" // /* MW 4 */
+ 12697 "10110000" // /* MW 3 */
+ 12698 "11001101" // /* MW 2 */
+ 12699 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first
+ 12700 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12701 "00001111" // /* MW 3 */
+ 12702 "11001101" // /* MW 2 */
+ 12703 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first
+ 12704 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12705 "00011111" // /* MW 3 */
+ 12706 "11011111" // /* MW 2 */
+ 12707 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 79
+ 12708 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12709 "11111111" // /* MW 5 */
+ 12710 "10110011" // /* MW 4 */
+ 12711 "11111001" // /* MW 3 */
+ 12712 "01101011" // /* MW 2 */
+ 12713 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first
+ 12714 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12715 "00000111" // /* MW 3 */
+ 12716 "00110111" // /* MW 2 */
+ 12717 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first
+ 12718 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12719 "11011111" // /* MW 5 */
+ 12720 "10010000" // /* MW 4 */
+ 12721 "00110111" // /* MW 3 */
+ 12722 "11010110" // /* MW 2 */
+ 12723 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+ 12724 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12725 "01010010" // /* MW 3 */
+ 12726 "00111000" // /* MW 2 */
+ 12727 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first
+ 12728 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12729 "00101101" // /* MW 3 */
+ 12730 "00100101" // /* MW 2 */
+ 12731 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+ 12732 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12733 "00111111" // /* MW 5 */
+ 12734 "11001000" // /* MW 4 */
+ 12735 "00111000" // /* MW 3 */
+ 12736 "01001010" // /* MW 2 */
+ 12737 "11100101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first
+ 12738 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12739 "11111011" // /* MW 5 */
+ 12740 "01110010" // /* MW 4 */
+ 12741 "00111111" // /* MW 3 */
+ 12742 "11110010" // /* MW 2 */
+ 12743 "11111001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 47
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first
+ 12744 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12745 "00011111" // /* MW 5 */
+ 12746 "01110000" // /* MW 4 */
+ 12747 "00111001" // /* MW 3 */
+ 12748 "11110010" // /* MW 2 */
+ 12749 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first
+ 12750 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12751 "11111011" // /* MW 5 */
+ 12752 "11001110" // /* MW 4 */
+ 12753 "00111001" // /* MW 3 */
+ 12754 "11001110" // /* MW 2 */
+ 12755 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first
+ 12756 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12757 "11101010" // /* MW 5 */
+ 12758 "10110011" // /* MW 4 */
+ 12759 "10111001" // /* MW 3 */
+ 12760 "00110101" // /* MW 2 */
+ 12761 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first
+ 12762 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12763 "01011011" // /* MW 5 */
+ 12764 "01111011" // /* MW 4 */
+ 12765 "00111001" // /* MW 3 */
+ 12766 "11111110" // /* MW 2 */
+ 12767 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12768 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12769 "11100010" // /* MW 5 */
+ 12770 "00110011" // /* MW 4 */
+ 12771 "11111001" // /* MW 3 */
+ 12772 "00100001" // /* MW 2 */
+ 12773 "10010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first
+ 12774 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12775 "00000100" // /* MW 5 */
+ 12776 "11110011" // /* MW 4 */
+ 12777 "00111111" // /* MW 3 */
+ 12778 "10000010" // /* MW 2 */
+ 12779 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first
+ 12780 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12781 "01101101" // /* MW 3 */
+ 12782 "11111111" // /* MW 2 */
+ 12783 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 73
+ 12784 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12785 "11111111" // /* MW 5 */
+ 12786 "10111111" // /* MW 4 */
+ 12787 "00111001" // /* MW 3 */
+ 12788 "01100110" // /* MW 2 */
+ 12789 "11110000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+ 12790 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12791 "11011011" // /* MW 5 */
+ 12792 "11000110" // /* MW 4 */
+ 12793 "00111000" // /* MW 3 */
+ 12794 "10000110" // /* MW 2 */
+ 12795 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first
+ 12796 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12797 "11111111" // /* MW 5 */
+ 12798 "00110001" // /* MW 4 */
+ 12799 "00111001" // /* MW 3 */
+ 12800 "10100100" // /* MW 2 */
+ 12801 "11000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12802 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12803 "11000011" // /* MW 5 */
+ 12804 "11011011" // /* MW 4 */
+ 12805 "00110011" // /* MW 3 */
+ 12806 "11011010" // /* MW 2 */
+ 12807 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12808 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12809 "01011011" // /* MW 5 */
+ 12810 "01000011" // /* MW 4 */
+ 12811 "00111000" // /* MW 3 */
+ 12812 "11001010" // /* MW 2 */
+ 12813 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12814 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12815 "01011011" // /* MW 5 */
+ 12816 "11111100" // /* MW 4 */
+ 12817 "00111001" // /* MW 3 */
+ 12818 "10011110" // /* MW 2 */
+ 12819 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12820 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12821 "11000001" // /* MW 5 */
+ 12822 "11011010" // /* MW 4 */
+ 12823 "00111110" // /* MW 3 */
+ 12824 "11001110" // /* MW 2 */
+ 12825 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+ 12826 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12827 "11110010" // /* MW 5 */
+ 12828 "10111111" // /* MW 4 */
+ 12829 "00011110" // /* MW 3 */
+ 12830 "00100000" // /* MW 2 */
+ 12831 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12832 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12833 "10100011" // /* MW 5 */
+ 12834 "01000011" // /* MW 4 */
+ 12835 "00111000" // /* MW 3 */
+ 12836 "11011010" // /* MW 2 */
+ 12837 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140
+ 12838 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12839 "01011001" // /* MW 9 */
+ 12840 "11111111" // /* MW 8 */
+ 12841 "00001111" // /* MW 7 */
+ 12842 "01101110" // /* MW 6 */
+ 12843 "01101101" // /* MW 5 */
+ 12844 "00011111" // /* MW 4 */
+ 12845 "00110000" // /* MW 3 */
+ 12846 "11000010" // /* MW 2 */
+ 12847 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first
+ 12848 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12849 "10000001" // /* MW 5 */
+ 12850 "01101010" // /* MW 4 */
+ 12851 "00111110" // /* MW 3 */
+ 12852 "11001010" // /* MW 2 */
+ 12853 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+ 12854 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12855 "11000011" // /* MW 5 */
+ 12856 "01010010" // /* MW 4 */
+ 12857 "00111010" // /* MW 3 */
+ 12858 "11101010" // /* MW 2 */
+ 12859 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41
+ 12860 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12861 "00001000" // /* MW 11 */
+ 12862 "00010000" // /* MW 10 */
+ 12863 "01101101" // /* MW 9 */
+ 12864 "10110010" // /* MW 8 */
+ 12865 "00001000" // /* MW 7 */
+ 12866 "10101011" // /* MW 6 */
+ 12867 "01110001" // /* MW 5 */
+ 12868 "00011110" // /* MW 4 */
+ 12869 "00000111" // /* MW 3 */
+ 12870 "00010001" // /* MW 2 */
+ 12871 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first
+ 12872 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12873 "01110001" // /* MW 3 */
+ 12874 "00011110" // /* MW 2 */
+ 12875 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first
+ 12876 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12877 "11111011" // /* MW 5 */
+ 12878 "01010010" // /* MW 4 */
+ 12879 "00111000" // /* MW 3 */
+ 12880 "11000110" // /* MW 2 */
+ 12881 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+ 12882 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12883 "10000011" // /* MW 5 */
+ 12884 "01000010" // /* MW 4 */
+ 12885 "00111100" // /* MW 3 */
+ 12886 "11000010" // /* MW 2 */
+ 12887 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first
+ 12888 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12889 "11111011" // /* MW 5 */
+ 12890 "01010010" // /* MW 4 */
+ 12891 "00111001" // /* MW 3 */
+ 12892 "11000110" // /* MW 2 */
+ 12893 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12894 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12895 "10000011" // /* MW 5 */
+ 12896 "01000010" // /* MW 4 */
+ 12897 "00111100" // /* MW 3 */
+ 12898 "11000010" // /* MW 2 */
+ 12899 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first
+ 12900 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12901 "01010001" // /* MW 3 */
+ 12902 "00011110" // /* MW 2 */
+ 12903 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first
+ 12904 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12905 "00110001" // /* MW 3 */
+ 12906 "00011110" // /* MW 2 */
+ 12907 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first
+ 12908 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12909 "00010001" // /* MW 3 */
+ 12910 "00001010" // /* MW 2 */
+ 12911 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first
+ 12912 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12913 "00001010" // /* MW 3 */
+ 12914 "00000110" // /* MW 2 */
+ 12915 "00000111" // /* MW 1 */
+ 12916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12917 "00000000" // /* MW 1 */
+ 12918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12919 "00000000" // /* MW 1 */
+ 12920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12921 "00000000" // /* MW 1 */
+ 12922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12923 "00000000" // /* MW 1 */
+ 12924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12925 "00000000" // /* MW 1 */
+ 12926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12927 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 58
+ 12928 "10000100" // JZ r16, #12960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12960 delay_slots=5 */
+ 12929 "00000001" // /* MW 5 */
+ 12930 "00000000" // /* MW 4 */
+ 12931 "01010000" // /* MW 3 */
+ 12932 "00011001" // /* MW 2 */
+ 12933 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12934 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12935 "01100000" // /* MW 3 */
+ 12936 "00111011" // /* MW 2 */
+ 12937 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12938 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12939 "00000000" // /* MW 5 */
+ 12940 "10100000" // /* MW 4 */
+ 12941 "00001001" // /* MW 3 */
+ 12942 "01111111" // /* MW 2 */
+ 12943 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12945 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12947 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12949 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12950 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12951 "00000001" // /* MW 9 */
+ 12952 "00100110" // /* MW 8 */
+ 12953 "00000000" // /* MW 7 */
+ 12954 "00000000" // /* MW 6 */
+ 12955 "01011011" // /* MW 5 */
+ 12956 "00000001" // /* MW 4 */
+ 12957 "11110000" // /* MW 3 */
+ 12958 "00101100" // /* MW 2 */
+ 12959 "00000000" // /* MW 1 */
+.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267
+ 12960 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12961 "00010000" // /* MW 9 */
+ 12962 "00110100" // /* MW 8 */
+ 12963 "00110010" // /* MW 7 */
+ 12964 "11110000" // /* MW 6 */
+ 12965 "00000001" // /* MW 5 */
+ 12966 "00000000" // /* MW 4 */
+ 12967 "00100000" // /* MW 3 */
+ 12968 "10000111" // /* MW 2 */
+ 12969 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12970 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12971 "11100010" // /* MW 5 */
+ 12972 "00000100" // /* MW 4 */
+ 12973 "01010000" // /* MW 3 */
+ 12974 "11000000" // /* MW 2 */
+ 12975 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39
+ 12976 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12977 "11101001" // /* MW 5 */
+ 12978 "00000010" // /* MW 4 */
+ 12979 "00100001" // /* MW 3 */
+ 12980 "10000011" // /* MW 2 */
+ 12981 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12982 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12983 "00100101" // /* MW 5 */
+ 12984 "00000001" // /* MW 4 */
+ 12985 "00100000" // /* MW 3 */
+ 12986 "00111110" // /* MW 2 */
+ 12987 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+ 12988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12989 "00000001" // /* MW 5 */
+ 12990 "00000000" // /* MW 4 */
+ 12991 "00000000" // /* MW 3 */
+ 12992 "11111000" // /* MW 2 */
+ 12993 "11111111" // /* MW 1 */
+ 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12995 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 12996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12997 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 12998 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12999 "00010111" // /* MW 3 */
+ 13000 "00000010" // /* MW 2 */
+ 13001 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13002 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13003 "01000001" // /* MW 5 */
+ 13004 "01110000" // /* MW 4 */
+ 13005 "00001111" // /* MW 3 */
+ 13006 "00000000" // /* MW 2 */
+ 13007 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13008 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13009 "00010110" // /* MW 3 */
+ 13010 "01000000" // /* MW 2 */
+ 13011 "00001000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13012 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13013 "11000000" // /* MW 3 */
+ 13014 "01100000" // /* MW 2 */
+ 13015 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13016 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13017 "00000001" // /* MW 3 */
+ 13018 "00000001" // /* MW 2 */
+ 13019 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0
+ 13023 "00000000" // /* MW 1 */
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 2 "conv2d_dw_bf16.h" 199 first
+.function_start
+ 13024 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13025 "11000000" // /* MW 3 */
+ 13026 "01010110" // /* MW 2 */
+ 13027 "00011100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 82
+ 13028 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13029 "10101001" // /* MW 5 */
+ 13030 "00000001" // /* MW 4 */
+ 13031 "11011110" // /* MW 3 */
+ 13032 "10010011" // /* MW 2 */
+ 13033 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 1 "io_buffer_main.h" 125 25
+ 13034 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13035 "00000010" // /* MW 5 */
+ 13036 "11010001" // /* MW 4 */
+ 13037 "11010110" // /* MW 3 */
+ 13038 "10000011" // /* MW 2 */
+ 13039 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 204 82 first
+ 13040 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13041 "10001010" // /* MW 3 */
+ 13042 "11101000" // /* MW 2 */
+ 13043 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4 first
+ 13044 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13045 "01000110" // /* MW 3 */
+ 13046 "11111101" // /* MW 2 */
+ 13047 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13048 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13049 "00100110" // /* MW 3 */
+ 13050 "00111101" // /* MW 2 */
+ 13051 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13052 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13053 "01000110" // /* MW 3 */
+ 13054 "11111111" // /* MW 2 */
+ 13055 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13056 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13057 "00100110" // /* MW 3 */
+ 13058 "00101111" // /* MW 2 */
+ 13059 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13060 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13061 "00000110" // /* MW 3 */
+ 13062 "00101101" // /* MW 2 */
+ 13063 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4 first
+ 13064 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13065 "01000110" // /* MW 3 */
+ 13066 "11111100" // /* MW 2 */
+ 13067 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13068 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13069 "00100110" // /* MW 3 */
+ 13070 "00111100" // /* MW 2 */
+ 13071 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13072 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13073 "01000110" // /* MW 3 */
+ 13074 "11111110" // /* MW 2 */
+ 13075 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13076 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13077 "00100110" // /* MW 3 */
+ 13078 "00101110" // /* MW 2 */
+ 13079 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13080 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13081 "00000110" // /* MW 3 */
+ 13082 "00101100" // /* MW 2 */
+ 13083 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4 first
+ 13084 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13085 "11000110" // /* MW 3 */
+ 13086 "11111100" // /* MW 2 */
+ 13087 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13088 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13089 "10100110" // /* MW 3 */
+ 13090 "00111100" // /* MW 2 */
+ 13091 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13092 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13093 "11000110" // /* MW 3 */
+ 13094 "11111110" // /* MW 2 */
+ 13095 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13096 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13097 "10100110" // /* MW 3 */
+ 13098 "00101110" // /* MW 2 */
+ 13099 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13100 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13101 "10000110" // /* MW 3 */
+ 13102 "00101100" // /* MW 2 */
+ 13103 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4 first
+ 13104 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13105 "11000110" // /* MW 3 */
+ 13106 "11111111" // /* MW 2 */
+ 13107 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+ 13108 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13109 "10100110" // /* MW 3 */
+ 13110 "00101111" // /* MW 2 */
+ 13111 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13112 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13113 "00010000" // /* MW 9 */
+ 13114 "00110100" // /* MW 8 */
+ 13115 "00110010" // /* MW 7 */
+ 13116 "11110010" // /* MW 6 */
+ 13117 "00000001" // /* MW 5 */
+ 13118 "00000000" // /* MW 4 */
+ 13119 "11010000" // /* MW 3 */
+ 13120 "11110000" // /* MW 2 */
+ 13121 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 13122 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13123 "10000001" // /* MW 5 */
+ 13124 "11000101" // /* MW 4 */
+ 13125 "01011000" // /* MW 3 */
+ 13126 "10011000" // /* MW 2 */
+ 13127 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13128 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13129 "00010000" // /* MW 3 */
+ 13130 "00001111" // /* MW 2 */
+ 13131 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+ 13132 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13133 "01011000" // /* MW 11 */
+ 13134 "00000000" // /* MW 10 */
+ 13135 "01100000" // /* MW 9 */
+ 13136 "01101010" // /* MW 8 */
+ 13137 "00100000" // /* MW 7 */
+ 13138 "00000000" // /* MW 6 */
+ 13139 "01101000" // /* MW 5 */
+ 13140 "00111011" // /* MW 4 */
+ 13141 "01110000" // /* MW 3 */
+ 13142 "10000101" // /* MW 2 */
+ 13143 "10000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43 first
+.src_ref 2 "conv2d_dw_bf16.h" 225 4 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13144 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 13145 "01100000" // /* MW 13 */
+ 13146 "00001001" // /* MW 12 */
+ 13147 "01100010" // /* MW 11 */
+ 13148 "00001011" // /* MW 10 */
+ 13149 "00010000" // /* MW 9 */
+ 13150 "11100000" // /* MW 8 */
+ 13151 "00101101" // /* MW 7 */
+ 13152 "00000100" // /* MW 6 */
+ 13153 "11101001" // /* MW 5 */
+ 13154 "00111000" // /* MW 4 */
+ 13155 "11010000" // /* MW 3 */
+ 13156 "10111000" // /* MW 2 */
+ 13157 "01111111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13158 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13159 "01110010" // /* MW 9 */
+ 13160 "10010000" // /* MW 8 */
+ 13161 "10000000" // /* MW 7 */
+ 13162 "00000010" // /* MW 6 */
+ 13163 "01001011" // /* MW 5 */
+ 13164 "00001100" // /* MW 4 */
+ 13165 "11010001" // /* MW 3 */
+ 13166 "10110100" // /* MW 2 */
+ 13167 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13168 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13169 "01111110" // /* MW 9 */
+ 13170 "11000000" // /* MW 8 */
+ 13171 "11100001" // /* MW 7 */
+ 13172 "00000011" // /* MW 6 */
+ 13173 "10010000" // /* MW 5 */
+ 13174 "10101011" // /* MW 4 */
+ 13175 "11010001" // /* MW 3 */
+ 13176 "00110000" // /* MW 2 */
+ 13177 "01101101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 13178 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13179 "01011110" // /* MW 9 */
+ 13180 "10010000" // /* MW 8 */
+ 13181 "00000111" // /* MW 7 */
+ 13182 "00000010" // /* MW 6 */
+ 13183 "11110100" // /* MW 5 */
+ 13184 "11110000" // /* MW 4 */
+ 13185 "11010001" // /* MW 3 */
+ 13186 "00001010" // /* MW 2 */
+ 13187 "01111001" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13188 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13189 "10000010" // /* MW 5 */
+ 13190 "00000000" // /* MW 4 */
+ 13191 "01010000" // /* MW 3 */
+ 13192 "00011110" // /* MW 2 */
+ 13193 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+ 13194 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13195 "00010000" // /* MW 11 */
+ 13196 "11111000" // /* MW 10 */
+ 13197 "01111001" // /* MW 9 */
+ 13198 "00001100" // /* MW 8 */
+ 13199 "00000000" // /* MW 7 */
+ 13200 "00000000" // /* MW 6 */
+ 13201 "01001011" // /* MW 5 */
+ 13202 "00010000" // /* MW 4 */
+ 13203 "11010110" // /* MW 3 */
+ 13204 "11000000" // /* MW 2 */
+ 13205 "01101001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+ 13206 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13207 "00010000" // /* MW 11 */
+ 13208 "00101000" // /* MW 10 */
+ 13209 "10111010" // /* MW 9 */
+ 13210 "00001101" // /* MW 8 */
+ 13211 "00000000" // /* MW 7 */
+ 13212 "00000000" // /* MW 6 */
+ 13213 "01001011" // /* MW 5 */
+ 13214 "00010000" // /* MW 4 */
+ 13215 "11010010" // /* MW 3 */
+ 13216 "10010010" // /* MW 2 */
+ 13217 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13218 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13219 "00000101" // /* MW 5 */
+ 13220 "01100001" // /* MW 4 */
+ 13221 "10000100" // /* MW 3 */
+ 13222 "00010110" // /* MW 2 */
+ 13223 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+ 13224 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13225 "10001010" // /* MW 3 */
+ 13226 "00000000" // /* MW 2 */
+ 13227 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 244 4
+ 13228 "10111010" // LDA r5, [p3]; MOVXM p3, #13456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13229 "00010000" // /* MW 9 */
+ 13230 "01001000" // /* MW 8 */
+ 13231 "10110010" // /* MW 7 */
+ 13232 "00001101" // /* MW 6 */
+ 13233 "00000000" // /* MW 5 */
+ 13234 "00000000" // /* MW 4 */
+ 13235 "11010000" // /* MW 3 */
+ 13236 "10010110" // /* MW 2 */
+ 13237 "01100000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+ 13238 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13239 "10101000" // /* MW 9 */
+ 13240 "00000001" // /* MW 8 */
+ 13241 "10001110" // /* MW 7 */
+ 13242 "00001010" // /* MW 6 */
+ 13243 "00010100" // /* MW 5 */
+ 13244 "00000000" // /* MW 4 */
+ 13245 "11110000" // /* MW 3 */
+ 13246 "00101100" // /* MW 2 */
+ 13247 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.src_ref 2 "conv2d_dw_bf16.h" 271 12
+.src_ref 2 "conv2d_dw_bf16.h" 272 12
+.src_ref 2 "conv2d_dw_bf16.h" 273 12
+.src_ref 2 "conv2d_dw_bf16.h" 274 12
+.src_ref 2 "conv2d_dw_bf16.h" 275 12
+.src_ref 2 "conv2d_dw_bf16.h" 276 12
+.src_ref 2 "conv2d_dw_bf16.h" 277 12
+ 13248 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13249 "00000000" // /* MW 15 */
+ 13250 "00000000" // /* MW 14 */
+ 13251 "01111000" // /* MW 13 */
+ 13252 "10111001" // /* MW 12 */
+ 13253 "00001110" // /* MW 11 */
+ 13254 "00001000" // /* MW 10 */
+ 13255 "00110110" // /* MW 9 */
+ 13256 "00000000" // /* MW 8 */
+ 13257 "01011011" // /* MW 7 */
+ 13258 "00000001" // /* MW 6 */
+ 13259 "00100000" // /* MW 5 */
+ 13260 "00000000" // /* MW 4 */
+ 13261 "00000000" // /* MW 3 */
+ 13262 "10010001" // /* MW 2 */
+ 13263 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13264 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13265 "01101010" // /* MW 15 */
+ 13266 "01100011" // /* MW 14 */
+ 13267 "10101100" // /* MW 13 */
+ 13268 "00000011" // /* MW 12 */
+ 13269 "00001110" // /* MW 11 */
+ 13270 "00000010" // /* MW 10 */
+ 13271 "11010100" // /* MW 9 */
+ 13272 "00001101" // /* MW 8 */
+ 13273 "01001011" // /* MW 7 */
+ 13274 "00010000" // /* MW 6 */
+ 13275 "00100000" // /* MW 5 */
+ 13276 "00000000" // /* MW 4 */
+ 13277 "11110000" // /* MW 3 */
+ 13278 "00101100" // /* MW 2 */
+ 13279 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13280 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13281 "00011010" // /* MW 15 */
+ 13282 "01001000" // /* MW 14 */
+ 13283 "11001100" // /* MW 13 */
+ 13284 "00111111" // /* MW 12 */
+ 13285 "10111001" // /* MW 11 */
+ 13286 "11011010" // /* MW 10 */
+ 13287 "00101111" // /* MW 9 */
+ 13288 "00000100" // /* MW 8 */
+ 13289 "01001011" // /* MW 7 */
+ 13290 "00010000" // /* MW 6 */
+ 13291 "00100101" // /* MW 5 */
+ 13292 "00000000" // /* MW 4 */
+ 13293 "11010000" // /* MW 3 */
+ 13294 "10100011" // /* MW 2 */
+ 13295 "01000000" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+.loop_nesting 1
+ 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13297 "01101110" // /* MW 9 */
+ 13298 "10000001" // /* MW 8 */
+ 13299 "10000100" // /* MW 7 */
+ 13300 "00000010" // /* MW 6 */
+ 13301 "11110100" // /* MW 5 */
+ 13302 "11110000" // /* MW 4 */
+ 13303 "01110001" // /* MW 3 */
+ 13304 "10110011" // /* MW 2 */
+ 13305 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13307 "00000001" // /* MW 9 */
+ 13308 "10001001" // /* MW 8 */
+ 13309 "10001010" // /* MW 7 */
+ 13310 "01000110" // /* MW 6 */
+ 13311 "00001011" // /* MW 5 */
+ 13312 "10011100" // /* MW 4 */
+ 13313 "11101010" // /* MW 3 */
+ 13314 "00111000" // /* MW 2 */
+ 13315 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13317 "00000001" // /* MW 9 */
+ 13318 "00110101" // /* MW 8 */
+ 13319 "10001001" // /* MW 7 */
+ 13320 "11000110" // /* MW 6 */
+ 13321 "10000110" // /* MW 5 */
+ 13322 "00110000" // /* MW 4 */
+ 13323 "01101010" // /* MW 3 */
+ 13324 "10110001" // /* MW 2 */
+ 13325 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13327 "00000110" // /* MW 3 */
+ 13328 "10001001" // /* MW 2 */
+ 13329 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13331 "10100001" // /* MW 7 */
+ 13332 "01001000" // /* MW 6 */
+ 13333 "10001100" // /* MW 5 */
+ 13334 "11000110" // /* MW 4 */
+ 13335 "10001110" // /* MW 3 */
+ 13336 "10110000" // /* MW 2 */
+ 13337 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13339 "10100001" // /* MW 7 */
+ 13340 "00110110" // /* MW 6 */
+ 13341 "10001010" // /* MW 5 */
+ 13342 "01000110" // /* MW 4 */
+ 13343 "00001111" // /* MW 3 */
+ 13344 "10011100" // /* MW 2 */
+ 13345 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13347 "00001110" // /* MW 3 */
+ 13348 "10001001" // /* MW 2 */
+ 13349 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13351 "11100001" // /* MW 7 */
+ 13352 "10010010" // /* MW 6 */
+ 13353 "10001011" // /* MW 5 */
+ 13354 "01000110" // /* MW 4 */
+ 13355 "00000011" // /* MW 3 */
+ 13356 "00011100" // /* MW 2 */
+ 13357 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13359 "11100001" // /* MW 7 */
+ 13360 "01010110" // /* MW 6 */
+ 13361 "10001000" // /* MW 5 */
+ 13362 "01000110" // /* MW 4 */
+ 13363 "00000111" // /* MW 3 */
+ 13364 "00011100" // /* MW 2 */
+ 13365 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13366 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13367 "01101110" // /* MW 9 */
+ 13368 "01000001" // /* MW 8 */
+ 13369 "00011000" // /* MW 7 */
+ 13370 "00000001" // /* MW 6 */
+ 13371 "00010000" // /* MW 5 */
+ 13372 "00000000" // /* MW 4 */
+ 13373 "11110000" // /* MW 3 */
+ 13374 "00101100" // /* MW 2 */
+ 13375 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13377 "01101010" // /* MW 15 */
+ 13378 "01100011" // /* MW 14 */
+ 13379 "01111100" // /* MW 13 */
+ 13380 "10100101" // /* MW 12 */
+ 13381 "00000001" // /* MW 11 */
+ 13382 "00000000" // /* MW 10 */
+ 13383 "00000000" // /* MW 9 */
+ 13384 "00000000" // /* MW 8 */
+ 13385 "01011011" // /* MW 7 */
+ 13386 "00000001" // /* MW 6 */
+ 13387 "00100000" // /* MW 5 */
+ 13388 "00000000" // /* MW 4 */
+ 13389 "11110000" // /* MW 3 */
+ 13390 "00101100" // /* MW 2 */
+ 13391 "00000000" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 13392 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13393 "00011010" // /* MW 15 */
+ 13394 "01001000" // /* MW 14 */
+ 13395 "01111100" // /* MW 13 */
+ 13396 "10100101" // /* MW 12 */
+ 13397 "00000001" // /* MW 11 */
+ 13398 "00000000" // /* MW 10 */
+ 13399 "00000000" // /* MW 9 */
+ 13400 "00000000" // /* MW 8 */
+ 13401 "01011011" // /* MW 7 */
+ 13402 "00000001" // /* MW 6 */
+ 13403 "00100000" // /* MW 5 */
+ 13404 "00000000" // /* MW 4 */
+ 13405 "11110000" // /* MW 3 */
+ 13406 "00101100" // /* MW 2 */
+ 13407 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13408 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13409 "01101110" // /* MW 9 */
+ 13410 "10000001" // /* MW 8 */
+ 13411 "10000100" // /* MW 7 */
+ 13412 "00000010" // /* MW 6 */
+ 13413 "10010000" // /* MW 5 */
+ 13414 "01110011" // /* MW 4 */
+ 13415 "11110100" // /* MW 3 */
+ 13416 "00001100" // /* MW 2 */
+ 13417 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13418 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13419 "00000001" // /* MW 7 */
+ 13420 "10001001" // /* MW 6 */
+ 13421 "10001010" // /* MW 5 */
+ 13422 "01000110" // /* MW 4 */
+ 13423 "00001011" // /* MW 3 */
+ 13424 "10011100" // /* MW 2 */
+ 13425 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13426 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13427 "00000001" // /* MW 7 */
+ 13428 "00110101" // /* MW 6 */
+ 13429 "10001001" // /* MW 5 */
+ 13430 "11000110" // /* MW 4 */
+ 13431 "10000110" // /* MW 3 */
+ 13432 "00110000" // /* MW 2 */
+ 13433 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13434 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13435 "00000110" // /* MW 3 */
+ 13436 "10001001" // /* MW 2 */
+ 13437 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13438 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13439 "10100001" // /* MW 7 */
+ 13440 "01001000" // /* MW 6 */
+ 13441 "10001100" // /* MW 5 */
+ 13442 "01000110" // /* MW 4 */
+ 13443 "00001111" // /* MW 3 */
+ 13444 "10011100" // /* MW 2 */
+ 13445 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13446 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13447 "10100001" // /* MW 9 */
+ 13448 "00110110" // /* MW 8 */
+ 13449 "10001010" // /* MW 7 */
+ 13450 "11000010" // /* MW 6 */
+ 13451 "10001110" // /* MW 5 */
+ 13452 "10110000" // /* MW 4 */
+ 13453 "11110100" // /* MW 3 */
+ 13454 "00101100" // /* MW 2 */
+ 13455 "00000000" // /* MW 1 */
+.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13456 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13457 "00011101" // /* MW 5 */
+ 13458 "00010010" // /* MW 4 */
+ 13459 "10001011" // /* MW 3 */
+ 13460 "00011110" // /* MW 2 */
+ 13461 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13462 "01011010" // MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13463 "11100001" // /* MW 9 */
+ 13464 "10010010" // /* MW 8 */
+ 13465 "10001011" // /* MW 7 */
+ 13466 "00000010" // /* MW 6 */
+ 13467 "01010100" // /* MW 5 */
+ 13468 "10110111" // /* MW 4 */
+ 13469 "00000001" // /* MW 3 */
+ 13470 "00000000" // /* MW 2 */
+ 13471 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13472 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13473 "11100001" // /* MW 11 */
+ 13474 "01010110" // /* MW 10 */
+ 13475 "10001000" // /* MW 9 */
+ 13476 "00000010" // /* MW 8 */
+ 13477 "01001111" // /* MW 7 */
+ 13478 "10001111" // /* MW 6 */
+ 13479 "00000001" // /* MW 5 */
+ 13480 "00000000" // /* MW 4 */
+ 13481 "01110000" // /* MW 3 */
+ 13482 "10000101" // /* MW 2 */
+ 13483 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13484 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13485 "01111111" // /* MW 3 */
+ 13486 "01110010" // /* MW 2 */
+ 13487 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13488 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13489 "10011011" // /* MW 3 */
+ 13490 "00011101" // /* MW 2 */
+ 13491 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13492 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13493 "01110100" // /* MW 3 */
+ 13494 "00011100" // /* MW 2 */
+ 13495 "00111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13496 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13497 "10110100" // /* MW 3 */
+ 13498 "01011000" // /* MW 2 */
+ 13499 "00111000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13500 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13501 "10010110" // /* MW 3 */
+ 13502 "00010001" // /* MW 2 */
+ 13503 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13504 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13505 "00010110" // /* MW 3 */
+ 13506 "00010000" // /* MW 2 */
+ 13507 "00001011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13508 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13509 "01101100" // /* MW 3 */
+ 13510 "01010000" // /* MW 2 */
+ 13511 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13512 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13513 "00010100" // /* MW 3 */
+ 13514 "01010011" // /* MW 2 */
+ 13515 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13516 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13517 "01110000" // /* MW 7 */
+ 13518 "00110110" // /* MW 6 */
+ 13519 "10101000" // /* MW 5 */
+ 13520 "00000010" // /* MW 4 */
+ 13521 "01100000" // /* MW 3 */
+ 13522 "01000010" // /* MW 2 */
+ 13523 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13524 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13525 "00000011" // /* MW 3 */
+ 13526 "00011100" // /* MW 2 */
+ 13527 "00011101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13528 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13529 "01110000" // /* MW 7 */
+ 13530 "01000101" // /* MW 6 */
+ 13531 "10000000" // /* MW 5 */
+ 13532 "00000001" // /* MW 4 */
+ 13533 "01100000" // /* MW 3 */
+ 13534 "01010010" // /* MW 2 */
+ 13535 "01000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13536 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13537 "01000001" // /* MW 7 */
+ 13538 "01101101" // /* MW 6 */
+ 13539 "10001100" // /* MW 5 */
+ 13540 "01000110" // /* MW 4 */
+ 13541 "00000111" // /* MW 3 */
+ 13542 "00011100" // /* MW 2 */
+ 13543 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13544 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13545 "01000001" // /* MW 7 */
+ 13546 "00000011" // /* MW 6 */
+ 13547 "10001001" // /* MW 5 */
+ 13548 "11000110" // /* MW 4 */
+ 13549 "10000010" // /* MW 3 */
+ 13550 "00110000" // /* MW 2 */
+ 13551 "00000010" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+.loop_nesting 2
+ 13552 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13553 "01101110" // /* MW 9 */
+ 13554 "10000001" // /* MW 8 */
+ 13555 "10000100" // /* MW 7 */
+ 13556 "00000010" // /* MW 6 */
+ 13557 "11110100" // /* MW 5 */
+ 13558 "11110000" // /* MW 4 */
+ 13559 "01110001" // /* MW 3 */
+ 13560 "10110011" // /* MW 2 */
+ 13561 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13562 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13563 "00000001" // /* MW 9 */
+ 13564 "10001001" // /* MW 8 */
+ 13565 "10001010" // /* MW 7 */
+ 13566 "01000110" // /* MW 6 */
+ 13567 "00001011" // /* MW 5 */
+ 13568 "10011100" // /* MW 4 */
+ 13569 "11101010" // /* MW 3 */
+ 13570 "00111000" // /* MW 2 */
+ 13571 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13572 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13573 "00000001" // /* MW 9 */
+ 13574 "00110101" // /* MW 8 */
+ 13575 "10001001" // /* MW 7 */
+ 13576 "11000110" // /* MW 6 */
+ 13577 "10000110" // /* MW 5 */
+ 13578 "00110000" // /* MW 4 */
+ 13579 "01101010" // /* MW 3 */
+ 13580 "10110001" // /* MW 2 */
+ 13581 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13582 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13583 "00000110" // /* MW 3 */
+ 13584 "10001001" // /* MW 2 */
+ 13585 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13586 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13587 "10100001" // /* MW 7 */
+ 13588 "01001000" // /* MW 6 */
+ 13589 "10001100" // /* MW 5 */
+ 13590 "11000110" // /* MW 4 */
+ 13591 "10001110" // /* MW 3 */
+ 13592 "10110000" // /* MW 2 */
+ 13593 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13594 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13595 "10100001" // /* MW 7 */
+ 13596 "00110110" // /* MW 6 */
+ 13597 "10001010" // /* MW 5 */
+ 13598 "01000110" // /* MW 4 */
+ 13599 "00001111" // /* MW 3 */
+ 13600 "10011100" // /* MW 2 */
+ 13601 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13602 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13603 "00001110" // /* MW 3 */
+ 13604 "10001001" // /* MW 2 */
+ 13605 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13606 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13607 "11100001" // /* MW 7 */
+ 13608 "10010010" // /* MW 6 */
+ 13609 "10001011" // /* MW 5 */
+ 13610 "01000110" // /* MW 4 */
+ 13611 "00000011" // /* MW 3 */
+ 13612 "00011100" // /* MW 2 */
+ 13613 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13614 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13615 "11100001" // /* MW 7 */
+ 13616 "01010110" // /* MW 6 */
+ 13617 "10001000" // /* MW 5 */
+ 13618 "01000110" // /* MW 4 */
+ 13619 "00000111" // /* MW 3 */
+ 13620 "00011100" // /* MW 2 */
+ 13621 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13622 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13623 "00000101" // /* MW 5 */
+ 13624 "01100001" // /* MW 4 */
+ 13625 "11110100" // /* MW 3 */
+ 13626 "00101100" // /* MW 2 */
+ 13627 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13628 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13629 "01000001" // /* MW 3 */
+ 13630 "01101101" // /* MW 2 */
+ 13631 "10001100" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 13632 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13633 "00011010" // /* MW 15 */
+ 13634 "01001000" // /* MW 14 */
+ 13635 "01111100" // /* MW 13 */
+ 13636 "10100101" // /* MW 12 */
+ 13637 "00000001" // /* MW 11 */
+ 13638 "00000000" // /* MW 10 */
+ 13639 "00000000" // /* MW 9 */
+ 13640 "00000000" // /* MW 8 */
+ 13641 "01011011" // /* MW 7 */
+ 13642 "00000001" // /* MW 6 */
+ 13643 "00100000" // /* MW 5 */
+ 13644 "00000000" // /* MW 4 */
+ 13645 "11110000" // /* MW 3 */
+ 13646 "00101100" // /* MW 2 */
+ 13647 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 4 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13648 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 13649 "01101000" // /* MW 11 */
+ 13650 "10000001" // /* MW 10 */
+ 13651 "10000100" // /* MW 9 */
+ 13652 "00000010" // /* MW 8 */
+ 13653 "00100111" // /* MW 7 */
+ 13654 "00000100" // /* MW 6 */
+ 13655 "00100000" // /* MW 5 */
+ 13656 "11100111" // /* MW 4 */
+ 13657 "11111000" // /* MW 3 */
+ 13658 "00001100" // /* MW 2 */
+ 13659 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13660 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13661 "00000001" // /* MW 7 */
+ 13662 "10001001" // /* MW 6 */
+ 13663 "10001010" // /* MW 5 */
+ 13664 "01000110" // /* MW 4 */
+ 13665 "00001011" // /* MW 3 */
+ 13666 "10011100" // /* MW 2 */
+ 13667 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13668 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13669 "00000001" // /* MW 7 */
+ 13670 "00110101" // /* MW 6 */
+ 13671 "10001001" // /* MW 5 */
+ 13672 "11000110" // /* MW 4 */
+ 13673 "10000110" // /* MW 3 */
+ 13674 "00110000" // /* MW 2 */
+ 13675 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13676 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13677 "00000110" // /* MW 3 */
+ 13678 "10001001" // /* MW 2 */
+ 13679 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13680 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13681 "10100001" // /* MW 7 */
+ 13682 "01001000" // /* MW 6 */
+ 13683 "10001100" // /* MW 5 */
+ 13684 "01000110" // /* MW 4 */
+ 13685 "00001111" // /* MW 3 */
+ 13686 "10011100" // /* MW 2 */
+ 13687 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13688 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13689 "10100001" // /* MW 7 */
+ 13690 "00110110" // /* MW 6 */
+ 13691 "10001010" // /* MW 5 */
+ 13692 "11000110" // /* MW 4 */
+ 13693 "10001110" // /* MW 3 */
+ 13694 "10110000" // /* MW 2 */
+ 13695 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13696 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13697 "00001110" // /* MW 3 */
+ 13698 "10001001" // /* MW 2 */
+ 13699 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13700 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13701 "11100001" // /* MW 3 */
+ 13702 "10010010" // /* MW 2 */
+ 13703 "10001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13704 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13705 "11100001" // /* MW 3 */
+ 13706 "01010110" // /* MW 2 */
+ 13707 "10001000" // /* MW 1 */
+ 13708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13709 "00000000" // /* MW 1 */
+ 13710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13711 "00000000" // /* MW 1 */
+ 13712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13713 "00000000" // /* MW 1 */
+ 13714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13715 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+ 13716 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13717 "10010110" // /* MW 3 */
+ 13718 "00010001" // /* MW 2 */
+ 13719 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 290 first
+ 13720 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13721 "00000000" // /* MW 5 */
+ 13722 "01010000" // /* MW 4 */
+ 13723 "11000000" // /* MW 3 */
+ 13724 "00000010" // /* MW 2 */
+ 13725 "01100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13726 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13727 "01101100" // /* MW 3 */
+ 13728 "01010000" // /* MW 2 */
+ 13729 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.delay_slot
+ 13730 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13731 "00010100" // /* MW 3 */
+ 13732 "01010011" // /* MW 2 */
+ 13733 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13734 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13735 "01101100" // /* MW 3 */
+ 13736 "01010000" // /* MW 2 */
+ 13737 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.delay_slot
+ 13738 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13739 "00010011" // /* MW 3 */
+ 13740 "10001010" // /* MW 2 */
+ 13741 "00001010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33
+.delay_slot
+ 13742 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13743 "10010011" // /* MW 3 */
+ 13744 "00111010" // /* MW 2 */
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0
+ 13745 "00001010" // /* MW 1 */
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 444 first
+.src_ref 7 "superkernels.cpp" 449 6
+.function_start
+ 13760 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13761 "10000000" // /* MW 5 */
+ 13762 "11001000" // /* MW 4 */
+ 13763 "11001000" // /* MW 3 */
+ 13764 "00000111" // /* MW 2 */
+ 13765 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6 first
+ 13766 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13767 "01000001" // /* MW 5 */
+ 13768 "00101111" // /* MW 4 */
+ 13769 "11010000" // /* MW 3 */
+ 13770 "11000010" // /* MW 2 */
+ 13771 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 444
+ 13772 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13773 "00000001" // /* MW 5 */
+ 13774 "00000000" // /* MW 4 */
+ 13775 "00000000" // /* MW 3 */
+ 13776 "00010000" // /* MW 2 */
+ 13777 "00000000" // /* MW 1 */
+ 13778 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13779 "01110000" // /* MW 7 */
+ 13780 "01110000" // /* MW 6 */
+ 13781 "00101101" // /* MW 5 */
+ 13782 "00000010" // /* MW 4 */
+ 13783 "10110000" // /* MW 3 */
+ 13784 "00111010" // /* MW 2 */
+ 13785 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+ 13786 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13787 "01110000" // /* MW 7 */
+ 13788 "11110000" // /* MW 6 */
+ 13789 "10101000" // /* MW 5 */
+ 13790 "00000001" // /* MW 4 */
+ 13791 "10110000" // /* MW 3 */
+ 13792 "10110110" // /* MW 2 */
+ 13793 "11111111" // /* MW 1 */
+ 13794 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13795 "00011101" // /* MW 3 */
+ 13796 "11101100" // /* MW 2 */
+ 13797 "00001111" // /* MW 1 */
+ 13798 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13799 "10011101" // /* MW 3 */
+ 13800 "11110111" // /* MW 2 */
+ 13801 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+ 13802 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13803 "01110000" // /* MW 7 */
+ 13804 "01100000" // /* MW 6 */
+ 13805 "11001010" // /* MW 5 */
+ 13806 "00000001" // /* MW 4 */
+ 13807 "10110000" // /* MW 3 */
+ 13808 "00000010" // /* MW 2 */
+ 13809 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6
+.src_ref 7 "superkernels.cpp" 449 16
+ 13810 "10000100" // JNZ r16, #13936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13936 delay_slots=5 */
+ 13811 "00000001" // /* MW 5 */
+ 13812 "01000000" // /* MW 4 */
+ 13813 "00111000" // /* MW 3 */
+ 13814 "00011011" // /* MW 2 */
+ 13815 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 13816 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13817 "11000000" // /* MW 3 */
+ 13818 "11010110" // /* MW 2 */
+ 13819 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 22 first
+.delay_slot
+ 13820 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13821 "10010000" // /* MW 3 */
+ 13822 "01100010" // /* MW 2 */
+ 13823 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 30
+.delay_slot
+ 13824 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13825 "11111011" // /* MW 3 */
+ 13826 "01100011" // /* MW 2 */
+ 13827 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13828 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13829 "10100000" // /* MW 5 */
+ 13830 "11001000" // /* MW 4 */
+ 13831 "11000110" // /* MW 3 */
+ 13832 "00000111" // /* MW 2 */
+ 13833 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13834 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13835 "00110001" // /* MW 3 */
+ 13836 "00000110" // /* MW 2 */
+ 13837 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13838 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13839 "00010001" // /* MW 9 */
+ 13840 "00110100" // /* MW 8 */
+ 13841 "10110010" // /* MW 7 */
+ 13842 "11110000" // /* MW 6 */
+ 13843 "00000001" // /* MW 5 */
+ 13844 "00000000" // /* MW 4 */
+ 13845 "01100000" // /* MW 3 */
+ 13846 "10010001" // /* MW 2 */
+ 13847 "11110000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13848 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13849 "00010000" // /* MW 11 */
+ 13850 "00110010" // /* MW 10 */
+ 13851 "10110010" // /* MW 9 */
+ 13852 "11110000" // /* MW 8 */
+ 13853 "00000001" // /* MW 7 */
+ 13854 "00000000" // /* MW 6 */
+ 13855 "10001011" // /* MW 5 */
+ 13856 "10001000" // /* MW 4 */
+ 13857 "11100000" // /* MW 3 */
+ 13858 "11000000" // /* MW 2 */
+ 13859 "00100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13861 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 13862 "00000100" // JL #12352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12352 delay_slots=5 */
+ 13863 "00000001" // /* MW 5 */
+ 13864 "00000000" // /* MW 4 */
+ 13865 "00100000" // /* MW 3 */
+ 13866 "00011000" // /* MW 2 */
+ 13867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13869 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13871 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13872 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13873 "00110001" // /* MW 3 */
+ 13874 "00100000" // /* MW 2 */
+ 13875 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 13876 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13877 "00000101" // /* MW 3 */
+ 13878 "00100000" // /* MW 2 */
+ 13879 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 13880 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13881 "01110000" // /* MW 7 */
+ 13882 "10100101" // /* MW 6 */
+ 13883 "00000001" // /* MW 5 */
+ 13884 "00000000" // /* MW 4 */
+ 13885 "00110000" // /* MW 3 */
+ 13886 "11000010" // /* MW 2 */
+ 13887 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+.src_ref 7 "superkernels.cpp" 461 2
+.return_address
+ 13888 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13889 "00000000" // /* MW 7 */
+ 13890 "10000010" // /* MW 6 */
+ 13891 "00110011" // /* MW 5 */
+ 13892 "00000001" // /* MW 4 */
+ 13893 "01100000" // /* MW 3 */
+ 13894 "10010001" // /* MW 2 */
+ 13895 "00110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 17 first
+ 13896 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13897 "00111010" // /* MW 3 */
+ 13898 "00000110" // /* MW 2 */
+ 13899 "00000010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13
+.src_ref 7 "superkernels.cpp" 453 15 first
+ 13900 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13901 "00010000" // /* MW 9 */
+ 13902 "00110000" // /* MW 8 */
+ 13903 "00110010" // /* MW 7 */
+ 13904 "11110001" // /* MW 6 */
+ 13905 "00000001" // /* MW 5 */
+ 13906 "00000000" // /* MW 4 */
+ 13907 "01010000" // /* MW 3 */
+ 13908 "11000011" // /* MW 2 */
+ 13909 "01000100" // /* MW 1 */
+ 13910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13911 "00000000" // /* MW 1 */
+ 13912 "10000100" // J #13952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13952 delay_slots=5 */
+ 13913 "00000000" // /* MW 5 */
+ 13914 "00000000" // /* MW 4 */
+ 13915 "01000000" // /* MW 3 */
+ 13916 "00011011" // /* MW 2 */
+ 13917 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15
+.src_ref 7 "superkernels.cpp" 457 26
+.delay_slot
+ 13918 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13919 "10110000" // /* MW 5 */
+ 13920 "11001000" // /* MW 4 */
+ 13921 "11000110" // /* MW 3 */
+ 13922 "00000111" // /* MW 2 */
+ 13923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13927 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15 first
+.delay_slot
+ 13928 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13929 "00110001" // /* MW 3 */
+ 13930 "00000110" // /* MW 2 */
+ 13931 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13 first
+.delay_slot
+ 13932 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13933 "00010001" // /* MW 3 */
+ 13934 "00000110" // /* MW 2 */
+ 13935 "00001010" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176
+.src_ref 7 "superkernels.cpp" 457 26
+ 13936 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13937 "00000000" // /* MW 15 */
+ 13938 "00000000" // /* MW 14 */
+ 13939 "00010000" // /* MW 13 */
+ 13940 "00101100" // /* MW 12 */
+ 13941 "10110010" // /* MW 11 */
+ 13942 "11110001" // /* MW 10 */
+ 13943 "00000001" // /* MW 9 */
+ 13944 "00000000" // /* MW 8 */
+ 13945 "01011011" // /* MW 7 */
+ 13946 "00000001" // /* MW 6 */
+ 13947 "00100000" // /* MW 5 */
+ 13948 "00000000" // /* MW 4 */
+ 13949 "11110000" // /* MW 3 */
+ 13950 "00101100" // /* MW 2 */
+ 13951 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 13952 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13953 "10000110" // /* MW 3 */
+ 13954 "01100111" // /* MW 2 */
+ 13955 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15
+.src_ref 1 "io_buffer_main.h" 218 49
+ 13956 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13957 "00010000" // /* MW 9 */
+ 13958 "00101000" // /* MW 8 */
+ 13959 "00110010" // /* MW 7 */
+ 13960 "11110010" // /* MW 6 */
+ 13961 "00000001" // /* MW 5 */
+ 13962 "00000000" // /* MW 4 */
+ 13963 "11010000" // /* MW 3 */
+ 13964 "11101110" // /* MW 2 */
+ 13965 "01011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 13966 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13967 "00010110" // /* MW 3 */
+ 13968 "11111110" // /* MW 2 */
+ 13969 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 13970 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13971 "00110110" // /* MW 3 */
+ 13972 "11111110" // /* MW 2 */
+ 13973 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 13974 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13975 "01010110" // /* MW 3 */
+ 13976 "01000110" // /* MW 2 */
+ 13977 "00000010" // /* MW 1 */
+ 13978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13979 "00000000" // /* MW 1 */
+ 13980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13981 "00000000" // /* MW 1 */
+ 13982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13983 "00000000" // /* MW 1 */
+ 13984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13985 "00000000" // /* MW 1 */
+ 13986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13987 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 13988 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13989 "00000010" // /* MW 3 */
+ 13990 "01100001" // /* MW 2 */
+ 13991 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 13992 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13993 "00010001" // /* MW 3 */
+ 13994 "00000110" // /* MW 2 */
+ 13995 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 13996 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13997 "11111101" // /* MW 3 */
+ 13998 "11100000" // /* MW 2 */
+ 13999 "00010111" // /* MW 1 */
+ 14000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14001 "00000000" // /* MW 1 */
+ 14002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14003 "00000000" // /* MW 1 */
+ 14004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14005 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14006 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14007 "00001000" // /* MW 3 */
+ 14008 "10010011" // /* MW 2 */
+ 14009 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11
+.src_ref 7 "superkernels.cpp" 459 47
+.src_ref 7 "superkernels.cpp" 464 6
+.src_ref 7 "superkernels.cpp" 465 16
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 14010 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14011 "00010000" // /* MW 9 */
+ 14012 "00100000" // /* MW 8 */
+ 14013 "10110010" // /* MW 7 */
+ 14014 "11110011" // /* MW 6 */
+ 14015 "00000001" // /* MW 5 */
+ 14016 "00000000" // /* MW 4 */
+ 14017 "00000000" // /* MW 3 */
+ 14018 "00101111" // /* MW 2 */
+ 14019 "00000000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+ 14020 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14021 "11000001" // /* MW 5 */
+ 14022 "00101011" // /* MW 4 */
+ 14023 "00101000" // /* MW 3 */
+ 14024 "00000000" // /* MW 2 */
+ 14025 "00000110" // /* MW 1 */
+ 14026 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14027 "01011010" // /* MW 3 */
+ 14028 "01101000" // /* MW 2 */
+ 14029 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 14030 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14031 "10000001" // /* MW 5 */
+ 14032 "00101001" // /* MW 4 */
+ 14033 "00100111" // /* MW 3 */
+ 14034 "11010011" // /* MW 2 */
+ 14035 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15 first
+ 14036 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14037 "00110110" // /* MW 3 */
+ 14038 "00000110" // /* MW 2 */
+ 14039 "00000100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 26
+.src_ref 7 "superkernels.cpp" 461 2
+ 14040 "10111010" // LDA r16, [p3]; MOVXM p3, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14041 "00010000" // /* MW 9 */
+ 14042 "11100000" // /* MW 8 */
+ 14043 "10110011" // /* MW 7 */
+ 14044 "11110001" // /* MW 6 */
+ 14045 "00000001" // /* MW 5 */
+ 14046 "00000000" // /* MW 4 */
+ 14047 "11010000" // /* MW 3 */
+ 14048 "11000010" // /* MW 2 */
+ 14049 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 14050 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14051 "01010110" // /* MW 3 */
+ 14052 "00000110" // /* MW 2 */
+ 14053 "00000111" // /* MW 1 */
+ 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14055 "00000000" // /* MW 1 */
+ 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14057 "00000000" // /* MW 1 */
+ 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14059 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 14060 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14061 "01110110" // /* MW 3 */
+ 14062 "00000110" // /* MW 2 */
+ 14063 "00000101" // /* MW 1 */
+ 14064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14065 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 24 first
+ 14066 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14067 "00001111" // /* MW 3 */
+ 14068 "01100001" // /* MW 2 */
+ 14069 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 14070 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14071 "00000111" // /* MW 3 */
+ 14072 "10100010" // /* MW 2 */
+ 14073 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+ 14074 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14075 "11111101" // /* MW 3 */
+ 14076 "00100000" // /* MW 2 */
+ 14077 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2 first
+.no_stack_arguments
+ 14078 "00000100" // JL #13024 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13024 delay_slots=5 */
+ 14079 "00000001" // /* MW 5 */
+ 14080 "00000000" // /* MW 4 */
+ 14081 "01110000" // /* MW 3 */
+ 14082 "00011001" // /* MW 2 */
+ 14083 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+.delay_slot
+ 14084 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14085 "00110001" // /* MW 3 */
+ 14086 "00000110" // /* MW 2 */
+ 14087 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+.delay_slot
+ 14088 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14089 "11000001" // /* MW 3 */
+ 14090 "01001001" // /* MW 2 */
+ 14091 "00011000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 201 10 first
+.delay_slot
+ 14092 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14093 "00100101" // /* MW 3 */
+ 14094 "10110100" // /* MW 2 */
+ 14095 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16 first
+.delay_slot
+ 14096 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14097 "00010101" // /* MW 3 */
+ 14098 "10111011" // /* MW 2 */
+ 14099 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+.delay_slot
+ 14100 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14101 "11000001" // /* MW 11 */
+ 14102 "10001010" // /* MW 10 */
+ 14103 "11011111" // /* MW 9 */
+ 14104 "00000011" // /* MW 8 */
+ 14105 "00000000" // /* MW 7 */
+ 14106 "00000000" // /* MW 6 */
+ 14107 "00100000" // /* MW 5 */
+ 14108 "00000000" // /* MW 4 */
+ 14109 "11110000" // /* MW 3 */
+ 14110 "00101100" // /* MW 2 */
+ 14111 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 14112 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14113 "00001010" // /* MW 3 */
+ 14114 "01100111" // /* MW 2 */
+ 14115 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 14116 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14117 "00010110" // /* MW 3 */
+ 14118 "00000110" // /* MW 2 */
+ 14119 "00000010" // /* MW 1 */
+ 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14121 "00000000" // /* MW 1 */
+ 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14123 "00000000" // /* MW 1 */
+ 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14125 "00000000" // /* MW 1 */
+ 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14127 "00000000" // /* MW 1 */
+ 14128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14129 "00000000" // /* MW 1 */
+ 14130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14131 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 14132 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14133 "11111000" // /* MW 3 */
+ 14134 "00010000" // /* MW 2 */
+ 14135 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 14136 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14137 "00010000" // /* MW 9 */
+ 14138 "00110000" // /* MW 8 */
+ 14139 "10110010" // /* MW 7 */
+ 14140 "11110000" // /* MW 6 */
+ 14141 "00000001" // /* MW 5 */
+ 14142 "00000000" // /* MW 4 */
+ 14143 "11010000" // /* MW 3 */
+ 14144 "11000010" // /* MW 2 */
+ 14145 "01011100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19 first
+ 14146 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14147 "01010110" // /* MW 3 */
+ 14148 "00000110" // /* MW 2 */
+ 14149 "00000001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 14150 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14151 "00110110" // /* MW 3 */
+ 14152 "00000110" // /* MW 2 */
+ 14153 "00000111" // /* MW 1 */
+ 14154 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14155 "10011001" // /* MW 3 */
+ 14156 "11110100" // /* MW 2 */
+ 14157 "00000111" // /* MW 1 */
+ 14158 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14159 "11010001" // /* MW 3 */
+ 14160 "11111001" // /* MW 2 */
+ 14161 "00000111" // /* MW 1 */
+ 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14163 "00000000" // /* MW 1 */
+ 14164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14165 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 14166 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14167 "00000001" // /* MW 3 */
+ 14168 "11100001" // /* MW 2 */
+ 14169 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 14170 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14171 "00010001" // /* MW 3 */
+ 14172 "11100110" // /* MW 2 */
+ 14173 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 16 first
+ 14174 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14175 "00101000" // /* MW 3 */
+ 14176 "01100001" // /* MW 2 */
+ 14177 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 14178 "10000100" // JNZ r16, #14208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14208 delay_slots=5 */
+ 14179 "00000001" // /* MW 5 */
+ 14180 "01000000" // /* MW 4 */
+ 14181 "11000000" // /* MW 3 */
+ 14182 "00011011" // /* MW 2 */
+ 14183 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16
+.delay_slot
+ 14184 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14185 "00000001" // /* MW 3 */
+ 14186 "00110000" // /* MW 2 */
+ 14187 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14189 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14191 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14193 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14195 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16 first
+ 14196 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14197 "11000001" // /* MW 11 */
+ 14198 "10001000" // /* MW 10 */
+ 14199 "10000011" // /* MW 9 */
+ 14200 "00000011" // /* MW 8 */
+ 14201 "00000000" // /* MW 7 */
+ 14202 "00000000" // /* MW 6 */
+ 14203 "00100000" // /* MW 5 */
+ 14204 "00000000" // /* MW 4 */
+ 14205 "11110000" // /* MW 3 */
+ 14206 "00101100" // /* MW 2 */
+ 14207 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 467
+ 14208 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14209 "01000001" // /* MW 5 */
+ 14210 "11101101" // /* MW 4 */
+ 14211 "00101110" // /* MW 3 */
+ 14212 "10110110" // /* MW 2 */
+ 14213 "11111111" // /* MW 1 */
+ 14214 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14215 "11110001" // /* MW 3 */
+ 14216 "11110001" // /* MW 2 */
+ 14217 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467 first
+ 14218 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 14219 "00000000" // /* MW 3 */
+ 14220 "00101000" // /* MW 2 */
+ 14221 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+.delay_slot
+ 14222 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14223 "00000001" // /* MW 5 */
+ 14224 "00000000" // /* MW 4 */
+ 14225 "00000000" // /* MW 3 */
+ 14226 "11110000" // /* MW 2 */
+ 14227 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14229 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14233 "00000000" // /* MW 1 */
+.delay_slot
+ 14234 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14235 "11000000" // /* MW 3 */
+ 14236 "01100010" // /* MW 2 */
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 14237 "00011111" // /* MW 1 */
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+.function superkernel_conv_eltbinary _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+.src_ref 7 "superkernels.cpp" 578
+.src_ref 7 "superkernels.cpp" 578 first
+.function_start
+ 14240 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14241 "00000001" // /* MW 5 */
+ 14242 "00000000" // /* MW 4 */
+ 14243 "00000000" // /* MW 3 */
+ 14244 "00001000" // /* MW 2 */
+ 14245 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6
+ 14246 "00111010" // ST p7, [sp, #-8]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14247 "00010001" // /* MW 9 */
+ 14248 "00100000" // /* MW 8 */
+ 14249 "10110010" // /* MW 7 */
+ 14250 "11110011" // /* MW 6 */
+ 14251 "00000001" // /* MW 5 */
+ 14252 "00000000" // /* MW 4 */
+ 14253 "10110000" // /* MW 3 */
+ 14254 "01110011" // /* MW 2 */
+ 14255 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6 first
+ 14256 "10111010" // LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14257 "01110010" // /* MW 9 */
+ 14258 "01110000" // /* MW 8 */
+ 14259 "00101101" // /* MW 7 */
+ 14260 "10000010" // /* MW 6 */
+ 14261 "00011101" // /* MW 5 */
+ 14262 "11111111" // /* MW 4 */
+ 14263 "11010111" // /* MW 3 */
+ 14264 "11000010" // /* MW 2 */
+ 14265 "11100000" // /* MW 1 */
+ 14266 "10011000" // ST p4, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14267 "00011101" // /* MW 3 */
+ 14268 "11110110" // /* MW 2 */
+ 14269 "00001111" // /* MW 1 */
+ 14270 "10011000" // ST p2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14271 "00011101" // /* MW 3 */
+ 14272 "11110001" // /* MW 2 */
+ 14273 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 7 "superkernels.cpp" 590 35
+.src_ref 7 "superkernels.cpp" 599 105
+.src_ref 7 "superkernels.cpp" 629 34
+ 14274 "00000010" // ST lr, [sp, #-20]; MOV p7, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 14275 "01110000" // /* MW 7 */
+ 14276 "01100000" // /* MW 6 */
+ 14277 "10110011" // /* MW 5 */
+ 14278 "00000011" // /* MW 4 */
+ 14279 "10110000" // /* MW 3 */
+ 14280 "10000111" // /* MW 2 */
+ 14281 "11111101" // /* MW 1 */
+ 14282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14283 "00000000" // /* MW 1 */
+ 14284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14285 "00000000" // /* MW 1 */
+ 14286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14287 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6
+.src_ref 7 "superkernels.cpp" 583 16
+ 14288 "10000100" // JNZ r16, #14688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14688 delay_slots=5 */
+ 14289 "00000001" // /* MW 5 */
+ 14290 "01000000" // /* MW 4 */
+ 14291 "10110000" // /* MW 3 */
+ 14292 "00011100" // /* MW 2 */
+ 14293 "10000000" // /* MW 1 */
+.delay_slot
+ 14294 "10011000" // ST p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14295 "00011101" // /* MW 3 */
+ 14296 "11101000" // /* MW 2 */
+ 14297 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 22 first
+.delay_slot
+ 14298 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14299 "10010000" // /* MW 3 */
+ 14300 "01100010" // /* MW 2 */
+ 14301 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 30
+.delay_slot
+ 14302 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14303 "11111011" // /* MW 3 */
+ 14304 "01100011" // /* MW 2 */
+ 14305 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 11
+.delay_slot
+ 14306 "01000100" // MOVXM p6, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14307 "10100000" // /* MW 5 */
+ 14308 "11001000" // /* MW 4 */
+ 14309 "11001100" // /* MW 3 */
+ 14310 "00000111" // /* MW 2 */
+ 14311 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 11
+.delay_slot
+ 14312 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14313 "00110001" // /* MW 3 */
+ 14314 "00000110" // /* MW 2 */
+ 14315 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 14316 "10111010" // MOVA r0, #1; MOVXM p6, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14317 "00010000" // /* MW 9 */
+ 14318 "00110100" // /* MW 8 */
+ 14319 "00110010" // /* MW 7 */
+ 14320 "11110011" // /* MW 6 */
+ 14321 "00000001" // /* MW 5 */
+ 14322 "00000000" // /* MW 4 */
+ 14323 "00000000" // /* MW 3 */
+ 14324 "00100000" // /* MW 2 */
+ 14325 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 14326 "01110110" // ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14327 "00010000" // /* MW 11 */
+ 14328 "00110010" // /* MW 10 */
+ 14329 "00110010" // /* MW 9 */
+ 14330 "11110000" // /* MW 8 */
+ 14331 "00000001" // /* MW 7 */
+ 14332 "00000000" // /* MW 6 */
+ 14333 "10001011" // /* MW 5 */
+ 14334 "10000100" // /* MW 4 */
+ 14335 "11100110" // /* MW 3 */
+ 14336 "11000000" // /* MW 2 */
+ 14337 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 7 "superkernels.cpp" 587 4
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14338 "10111010" // MOVA r1, #0; MOVXM p1, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14339 "00010000" // /* MW 9 */
+ 14340 "00000000" // /* MW 8 */
+ 14341 "10110011" // /* MW 7 */
+ 14342 "11110000" // /* MW 6 */
+ 14343 "00000001" // /* MW 5 */
+ 14344 "00000000" // /* MW 4 */
+ 14345 "00000000" // /* MW 3 */
+ 14346 "00000001" // /* MW 2 */
+ 14347 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 14348 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */
+ 14349 "00000001" // /* MW 5 */
+ 14350 "00000000" // /* MW 4 */
+ 14351 "01100000" // /* MW 3 */
+ 14352 "00000101" // /* MW 2 */
+ 14353 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14355 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14357 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14358 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14359 "00110001" // /* MW 3 */
+ 14360 "00100000" // /* MW 2 */
+ 14361 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 14362 "00101100" // NOPA; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14363 "00001010" // /* MW 5 */
+ 14364 "01000000" // /* MW 4 */
+ 14365 "11110000" // /* MW 3 */
+ 14366 "00101100" // /* MW 2 */
+ 14367 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 14368 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14369 "00000000" // /* MW 15 */
+ 14370 "00000000" // /* MW 14 */
+ 14371 "01111000" // /* MW 13 */
+ 14372 "01100000" // /* MW 12 */
+ 14373 "00110111" // /* MW 11 */
+ 14374 "00000000" // /* MW 10 */
+ 14375 "00000000" // /* MW 9 */
+ 14376 "10000000" // /* MW 8 */
+ 14377 "00010001" // /* MW 7 */
+ 14378 "00000110" // /* MW 6 */
+ 14379 "00100000" // /* MW 5 */
+ 14380 "00000000" // /* MW 4 */
+ 14381 "11110000" // /* MW 3 */
+ 14382 "00101100" // /* MW 2 */
+ 14383 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 590 35
+.src_ref 7 "superkernels.cpp" 591 4
+.return_address
+ 14384 "01100100" // MOVX r16, #1; MOV dj0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14385 "00000001" // /* MW 5 */
+ 14386 "00000001" // /* MW 4 */
+ 14387 "10100001" // /* MW 3 */
+ 14388 "00000000" // /* MW 2 */
+ 14389 "00000100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 590 35 first
+ 14390 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14391 "01010110" // /* MW 3 */
+ 14392 "00000010" // /* MW 2 */
+ 14393 "00000111" // /* MW 1 */
+ 14394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14395 "00000000" // /* MW 1 */
+ 14396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14397 "00000000" // /* MW 1 */
+ 14398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14399 "00000000" // /* MW 1 */
+ 14400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14401 "00000000" // /* MW 1 */
+ 14402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14403 "00000000" // /* MW 1 */
+ 14404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14405 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4 first
+ 14406 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14407 "00000111" // /* MW 3 */
+ 14408 "10100001" // /* MW 2 */
+ 14409 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4
+ 14410 "10000100" // JNZ r16, #14544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14544 delay_slots=5 */
+ 14411 "00000001" // /* MW 5 */
+ 14412 "01000000" // /* MW 4 */
+ 14413 "01101000" // /* MW 3 */
+ 14414 "00011100" // /* MW 2 */
+ 14415 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 105
+.delay_slot
+ 14416 "11111000" // MOV r17, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14417 "11000000" // /* MW 3 */
+ 14418 "01011110" // /* MW 2 */
+ 14419 "00011100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 105 first
+.delay_slot
+ 14420 "00011000" // ADD.NC dc0, r17, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14421 "10010000" // /* MW 3 */
+ 14422 "11001000" // /* MW 2 */
+ 14423 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14427 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14429 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4 first
+ 14430 "10000100" // JNZ r18, #14512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14512 delay_slots=5 */
+ 14431 "00000001" // /* MW 5 */
+ 14432 "01000000" // /* MW 4 */
+ 14433 "01011000" // /* MW 3 */
+ 14434 "00011100" // /* MW 2 */
+ 14435 "10010000" // /* MW 1 */
+.delay_slot
+ 14436 "01000100" // MOVXM r16, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14437 "00000000" // /* MW 5 */
+ 14438 "00101100" // /* MW 4 */
+ 14439 "11001000" // /* MW 3 */
+ 14440 "00000111" // /* MW 2 */
+ 14441 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 27
+.delay_slot
+ 14442 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14443 "00000001" // /* MW 3 */
+ 14444 "00100010" // /* MW 2 */
+ 14445 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14451 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8 first
+.no_stack_arguments
+ 14452 "00111010" // ST p6, [sp, #-28]; JL #11136 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11136 delay_slots=5 */
+ 14453 "01000001" // /* MW 9 */
+ 14454 "00000000" // /* MW 8 */
+ 14455 "00000000" // /* MW 7 */
+ 14456 "01110000" // /* MW 6 */
+ 14457 "00000101" // /* MW 5 */
+ 14458 "00000000" // /* MW 4 */
+ 14459 "10110000" // /* MW 3 */
+ 14460 "11100011" // /* MW 2 */
+ 14461 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 595 38
+.delay_slot
+ 14462 "01000100" // MOVXM p6, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14463 "10000000" // /* MW 5 */
+ 14464 "11001010" // /* MW 4 */
+ 14465 "11001100" // /* MW 3 */
+ 14466 "00000111" // /* MW 2 */
+ 14467 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8
+.delay_slot
+ 14468 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14469 "10000000" // /* MW 5 */
+ 14470 "11001010" // /* MW 4 */
+ 14471 "11000000" // /* MW 3 */
+ 14472 "00000111" // /* MW 2 */
+ 14473 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8
+.delay_slot
+ 14474 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14475 "10000000" // /* MW 3 */
+ 14476 "01100001" // /* MW 2 */
+ 14477 "00011001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14480 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14481 "00000000" // /* MW 15 */
+ 14482 "00000000" // /* MW 14 */
+ 14483 "01111000" // /* MW 13 */
+ 14484 "10100101" // /* MW 12 */
+ 14485 "00000001" // /* MW 11 */
+ 14486 "00000000" // /* MW 10 */
+ 14487 "00000000" // /* MW 9 */
+ 14488 "00000000" // /* MW 8 */
+ 14489 "01011011" // /* MW 7 */
+ 14490 "00000001" // /* MW 6 */
+ 14491 "00100000" // /* MW 5 */
+ 14492 "00000000" // /* MW 4 */
+ 14493 "11110000" // /* MW 3 */
+ 14494 "00101100" // /* MW 2 */
+ 14495 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 595 38 first
+.return_address
+ 14496 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14497 "00010000" // /* MW 9 */
+ 14498 "00000000" // /* MW 8 */
+ 14499 "00001011" // /* MW 7 */
+ 14500 "11110010" // /* MW 6 */
+ 14501 "00000001" // /* MW 5 */
+ 14502 "00000000" // /* MW 4 */
+ 14503 "11010000" // /* MW 3 */
+ 14504 "11000110" // /* MW 2 */
+ 14505 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14506 "00111100" // LDA p6, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14507 "00100000" // /* MW 5 */
+ 14508 "00000000" // /* MW 4 */
+ 14509 "00100000" // /* MW 3 */
+ 14510 "11100011" // /* MW 2 */
+ 14511 "11111100" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272
+ 14512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14513 "00000000" // /* MW 1 */
+ 14514 "10000100" // J #14592 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=14592 delay_slots=5 */
+ 14515 "00000000" // /* MW 5 */
+ 14516 "00000000" // /* MW 4 */
+ 14517 "10000000" // /* MW 3 */
+ 14518 "00011100" // /* MW 2 */
+ 14519 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14521 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14527 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.delay_slot
+ 14528 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14529 "00000000" // /* MW 15 */
+ 14530 "00000000" // /* MW 14 */
+ 14531 "01111000" // /* MW 13 */
+ 14532 "01100000" // /* MW 12 */
+ 14533 "10110110" // /* MW 11 */
+ 14534 "00000000" // /* MW 10 */
+ 14535 "00000000" // /* MW 9 */
+ 14536 "00000000" // /* MW 8 */
+ 14537 "01011011" // /* MW 7 */
+ 14538 "00000001" // /* MW 6 */
+ 14539 "00100000" // /* MW 5 */
+ 14540 "00000000" // /* MW 4 */
+ 14541 "11110000" // /* MW 3 */
+ 14542 "00101100" // /* MW 2 */
+ 14543 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304
+.src_ref 7 "superkernels.cpp" 599 8 first
+.no_stack_arguments
+ 14544 "00111010" // ST p6, [sp, #-28]; JL #11296 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */
+ 14545 "01000001" // /* MW 9 */
+ 14546 "00000000" // /* MW 8 */
+ 14547 "00000000" // /* MW 7 */
+ 14548 "10000100" // /* MW 6 */
+ 14549 "00000101" // /* MW 5 */
+ 14550 "00000000" // /* MW 4 */
+ 14551 "10110000" // /* MW 3 */
+ 14552 "11100011" // /* MW 2 */
+ 14553 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 600 38
+.delay_slot
+ 14554 "01000100" // MOVXM p6, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14555 "00000000" // /* MW 5 */
+ 14556 "11001011" // /* MW 4 */
+ 14557 "11001100" // /* MW 3 */
+ 14558 "00000111" // /* MW 2 */
+ 14559 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 8
+.delay_slot
+ 14560 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14561 "00000000" // /* MW 5 */
+ 14562 "11001011" // /* MW 4 */
+ 14563 "11000000" // /* MW 3 */
+ 14564 "00000111" // /* MW 2 */
+ 14565 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 8
+.delay_slot
+ 14566 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14567 "10000000" // /* MW 3 */
+ 14568 "01100001" // /* MW 2 */
+ 14569 "00011001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14571 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14572 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14573 "01100111" // /* MW 3 */
+ 14574 "00000001" // /* MW 2 */
+ 14575 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 600 38 first
+.return_address
+ 14576 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14577 "00010000" // /* MW 9 */
+ 14578 "00000000" // /* MW 8 */
+ 14579 "00001011" // /* MW 7 */
+ 14580 "11110010" // /* MW 6 */
+ 14581 "00000001" // /* MW 5 */
+ 14582 "00000000" // /* MW 4 */
+ 14583 "11010000" // /* MW 3 */
+ 14584 "11000110" // /* MW 2 */
+ 14585 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14586 "00111100" // LDA p1, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14587 "00100000" // /* MW 5 */
+ 14588 "00000000" // /* MW 4 */
+ 14589 "00100000" // /* MW 3 */
+ 14590 "10010011" // /* MW 2 */
+ 14591 "11111100" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352
+ 14592 "10011000" // ADD.NC p3, r16, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14593 "00000101" // /* MW 3 */
+ 14594 "01101000" // /* MW 2 */
+ 14595 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 35 first
+.src_ref 7 "superkernels.cpp" 611 18
+ 14596 "10111010" // LDA.u8 r19, [p3], #7; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14597 "00010000" // /* MW 9 */
+ 14598 "00101000" // /* MW 8 */
+ 14599 "00110010" // /* MW 7 */
+ 14600 "11110011" // /* MW 6 */
+ 14601 "00000001" // /* MW 5 */
+ 14602 "00000000" // /* MW 4 */
+ 14603 "01010000" // /* MW 3 */
+ 14604 "11001101" // /* MW 2 */
+ 14605 "01101111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 18 first
+ 14606 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14607 "01010110" // /* MW 3 */
+ 14608 "00000110" // /* MW 2 */
+ 14609 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 37 first
+ 14610 "10011000" // LDA.u16 r21, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14611 "10111010" // /* MW 3 */
+ 14612 "00011110" // /* MW 2 */
+ 14613 "00000011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 73
+ 14614 "10011000" // LDA.u16 r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14615 "00011010" // /* MW 3 */
+ 14616 "00000110" // /* MW 2 */
+ 14617 "00000011" // /* MW 1 */
+ 14618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14619 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 110
+ 14620 "10011000" // LDA.u16 r20, [p3, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14621 "10011010" // /* MW 3 */
+ 14622 "00010110" // /* MW 2 */
+ 14623 "00000011" // /* MW 1 */
+ 14624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14625 "00000000" // /* MW 1 */
+ 14626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14627 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 19
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 14628 "01000100" // MOVXM p0, #508996 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14629 "10001000" // /* MW 5 */
+ 14630 "11001000" // /* MW 4 */
+ 14631 "11000000" // /* MW 3 */
+ 14632 "00000111" // /* MW 2 */
+ 14633 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 57
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 14634 "10011000" // MUL r19, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14635 "01011111" // /* MW 3 */
+ 14636 "11100111" // /* MW 2 */
+ 14637 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 19 first
+.src_ref 7 "superkernels.cpp" 611 16
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14638 "00111010" // ST r19, [p0]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14639 "00010001" // /* MW 9 */
+ 14640 "00101110" // /* MW 8 */
+ 14641 "00110010" // /* MW 7 */
+ 14642 "11110001" // /* MW 6 */
+ 14643 "00000001" // /* MW 5 */
+ 14644 "00000000" // /* MW 4 */
+ 14645 "00110000" // /* MW 3 */
+ 14646 "11001110" // /* MW 2 */
+ 14647 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 94 first
+ 14648 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14649 "00001111" // /* MW 3 */
+ 14650 "11100001" // /* MW 2 */
+ 14651 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 27 first
+ 14652 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14653 "00101111" // /* MW 3 */
+ 14654 "01100011" // /* MW 2 */
+ 14655 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 28 first
+ 14656 "10011000" // MUL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14657 "00001111" // /* MW 3 */
+ 14658 "00100001" // /* MW 2 */
+ 14659 "00010101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 13
+.src_ref 7 "superkernels.cpp" 611 16 first
+ 14660 "01110110" // NOPA; ST r17, [p2]; MOVXM p6, #509024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14661 "00010000" // /* MW 11 */
+ 14662 "00110000" // /* MW 10 */
+ 14663 "00110010" // /* MW 9 */
+ 14664 "11110011" // /* MW 8 */
+ 14665 "00000001" // /* MW 7 */
+ 14666 "10000000" // /* MW 6 */
+ 14667 "00110001" // /* MW 5 */
+ 14668 "00000110" // /* MW 4 */
+ 14669 "11110010" // /* MW 3 */
+ 14670 "00101100" // /* MW 2 */
+ 14671 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 13 first
+ 14672 "11100001" // NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14673 "00000000" // /* MW 15 */
+ 14674 "00000000" // /* MW 14 */
+ 14675 "01111000" // /* MW 13 */
+ 14676 "10100101" // /* MW 12 */
+ 14677 "00000001" // /* MW 11 */
+ 14678 "00000000" // /* MW 10 */
+ 14679 "00000000" // /* MW 9 */
+ 14680 "10000000" // /* MW 8 */
+ 14681 "00010001" // /* MW 7 */
+ 14682 "00000110" // /* MW 6 */
+ 14683 "00100110" // /* MW 5 */
+ 14684 "00000000" // /* MW 4 */
+ 14685 "11110000" // /* MW 3 */
+ 14686 "00101100" // /* MW 2 */
+ 14687 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448
+.src_ref 7 "superkernels.cpp" 614 12
+ 14688 "01000100" // MOVXM p0, #509000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14689 "10010000" // /* MW 5 */
+ 14690 "11001000" // /* MW 4 */
+ 14691 "11000000" // /* MW 3 */
+ 14692 "00000111" // /* MW 2 */
+ 14693 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.src_ref 7 "superkernels.cpp" 616 11
+ 14694 "10111010" // LDA r16, [p0]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14695 "00010000" // /* MW 9 */
+ 14696 "00100000" // /* MW 8 */
+ 14697 "00110010" // /* MW 7 */
+ 14698 "11110001" // /* MW 6 */
+ 14699 "00000001" // /* MW 5 */
+ 14700 "00000000" // /* MW 4 */
+ 14701 "11010000" // /* MW 3 */
+ 14702 "11000010" // /* MW 2 */
+ 14703 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13
+.src_ref 7 "superkernels.cpp" 616 11 first
+ 14704 "10111010" // LDA r17, [p2]; MOVXM p6, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14705 "00010000" // /* MW 9 */
+ 14706 "00100110" // /* MW 8 */
+ 14707 "00110010" // /* MW 7 */
+ 14708 "11110011" // /* MW 6 */
+ 14709 "00000001" // /* MW 5 */
+ 14710 "00000000" // /* MW 4 */
+ 14711 "11010000" // /* MW 3 */
+ 14712 "11000110" // /* MW 2 */
+ 14713 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+ 14714 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14715 "01010110" // /* MW 3 */
+ 14716 "00000110" // /* MW 2 */
+ 14717 "00000110" // /* MW 1 */
+ 14718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14719 "00000000" // /* MW 1 */
+ 14720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14721 "00000000" // /* MW 1 */
+ 14722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14723 "00000000" // /* MW 1 */
+ 14724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14725 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 618 6 first
+.src_ref 7 "superkernels.cpp" 618 17 first
+ 14726 "10000100" // JNZ r16, #14832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14832 delay_slots=5 */
+ 14727 "00000001" // /* MW 5 */
+ 14728 "01000000" // /* MW 4 */
+ 14729 "11111000" // /* MW 3 */
+ 14730 "00011100" // /* MW 2 */
+ 14731 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.src_ref 7 "superkernels.cpp" 616 11 first
+.delay_slot
+ 14732 "00100100" // ADD r17, r17, #1; ADD.NC r19, r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14733 "00000001" // /* MW 5 */
+ 14734 "10110000" // /* MW 4 */
+ 14735 "11101001" // /* MW 3 */
+ 14736 "01000000" // /* MW 2 */
+ 14737 "10001100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+.delay_slot
+ 14738 "00011000" // ADD r18, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14739 "00000111" // /* MW 3 */
+ 14740 "10100100" // /* MW 2 */
+ 14741 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 616 11 first
+.delay_slot
+ 14742 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14743 "00110001" // /* MW 3 */
+ 14744 "00000110" // /* MW 2 */
+ 14745 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+.delay_slot
+ 14746 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14747 "01010001" // /* MW 3 */
+ 14748 "00000110" // /* MW 2 */
+ 14749 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.delay_slot
+ 14750 "10011000" // ST r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14751 "01110001" // /* MW 3 */
+ 14752 "00000110" // /* MW 2 */
+ 14753 "00001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14754 "00011000" // LDA r17, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14755 "00110001" // /* MW 3 */
+ 14756 "11110110" // /* MW 2 */
+ 14757 "00000111" // /* MW 1 */
+ 14758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14759 "00000000" // /* MW 1 */
+ 14760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14761 "00000000" // /* MW 1 */
+ 14762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14763 "00000000" // /* MW 1 */
+ 14764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14765 "00000000" // /* MW 1 */
+ 14766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14767 "00000000" // /* MW 1 */
+ 14768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14769 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 14770 "00011000" // ADD.NC p6, r17, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14771 "10000110" // /* MW 3 */
+ 14772 "01101000" // /* MW 2 */
+ 14773 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14774 "10011000" // LDA r27, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14775 "01110110" // /* MW 3 */
+ 14776 "11111111" // /* MW 2 */
+ 14777 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 14778 "10011000" // LDA r17, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14779 "00110110" // /* MW 3 */
+ 14780 "11111110" // /* MW 2 */
+ 14781 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 14782 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14783 "01010110" // /* MW 3 */
+ 14784 "11111110" // /* MW 2 */
+ 14785 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 14786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14787 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 14788 "10011000" // LDA r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14789 "00110110" // /* MW 3 */
+ 14790 "01000110" // /* MW 2 */
+ 14791 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14793 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14795 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14797 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14799 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14800 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14801 "00010010" // /* MW 3 */
+ 14802 "10100011" // /* MW 2 */
+ 14803 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.src_ref 1 "io_buffer_main.h" 395 8
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14804 "01011100" // ST r17, [p6]; MOVX r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14805 "11111010" // /* MW 5 */
+ 14806 "11000001" // /* MW 4 */
+ 14807 "00111111" // /* MW 3 */
+ 14808 "11000110" // /* MW 2 */
+ 14809 "11000000" // /* MW 1 */
+ 14810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14811 "00000000" // /* MW 1 */
+ 14812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14813 "00000000" // /* MW 1 */
+ 14814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14815 "00000000" // /* MW 1 */
+ 14816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14817 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14818 "01111110" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 14819 "01100000" // /* MW 13 */
+ 14820 "00101011" // /* MW 12 */
+ 14821 "00000000" // /* MW 11 */
+ 14822 "10101111" // /* MW 10 */
+ 14823 "00110100" // /* MW 9 */
+ 14824 "00000000" // /* MW 8 */
+ 14825 "00001000" // /* MW 7 */
+ 14826 "01010011" // /* MW 6 */
+ 14827 "00100100" // /* MW 5 */
+ 14828 "00000000" // /* MW 4 */
+ 14829 "11110000" // /* MW 3 */
+ 14830 "00101100" // /* MW 2 */
+ 14831 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592
+ 14832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14833 "00000000" // /* MW 1 */
+ 14834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14835 "00000000" // /* MW 1 */
+ 14836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14837 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.src_ref 1 "io_buffer_main.h" 125 25
+ 14838 "00011000" // LDA p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14839 "00011001" // /* MW 3 */
+ 14840 "11110101" // /* MW 2 */
+ 14841 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14842 "00011000" // LDA p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14843 "00011001" // /* MW 3 */
+ 14844 "11101000" // /* MW 2 */
+ 14845 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2 first
+.no_stack_arguments
+ 14846 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */
+ 14847 "00000001" // /* MW 5 */
+ 14848 "00000000" // /* MW 4 */
+ 14849 "10111000" // /* MW 3 */
+ 14850 "00001000" // /* MW 2 */
+ 14851 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.delay_slot
+ 14852 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14853 "00000000" // /* MW 5 */
+ 14854 "11001100" // /* MW 4 */
+ 14855 "11000110" // /* MW 3 */
+ 14856 "00000111" // /* MW 2 */
+ 14857 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14859 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14861 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14863 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 14864 "11100001" // NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14865 "00000000" // /* MW 15 */
+ 14866 "00000000" // /* MW 14 */
+ 14867 "01111000" // /* MW 13 */
+ 14868 "10100101" // /* MW 12 */
+ 14869 "00000001" // /* MW 11 */
+ 14870 "00000000" // /* MW 10 */
+ 14871 "00000000" // /* MW 9 */
+ 14872 "00000000" // /* MW 8 */
+ 14873 "10001011" // /* MW 7 */
+ 14874 "10001000" // /* MW 6 */
+ 14875 "00100110" // /* MW 5 */
+ 14876 "00000000" // /* MW 4 */
+ 14877 "11110000" // /* MW 3 */
+ 14878 "00101100" // /* MW 2 */
+ 14879 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6
+.src_ref 1 "io_buffer_main.h" 218 49
+.return_address
+ 14880 "10111010" // LDA r16, [sp, #-16]; MOVXM p1, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14881 "00010000" // /* MW 9 */
+ 14882 "00100100" // /* MW 8 */
+ 14883 "10110010" // /* MW 7 */
+ 14884 "11110000" // /* MW 6 */
+ 14885 "00000001" // /* MW 5 */
+ 14886 "00000000" // /* MW 4 */
+ 14887 "00100000" // /* MW 3 */
+ 14888 "01000010" // /* MW 2 */
+ 14889 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6 first
+.src_ref 7 "superkernels.cpp" 623 20
+ 14890 "10111010" // LDA r17, [p1]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14891 "00010000" // /* MW 9 */
+ 14892 "00100010" // /* MW 8 */
+ 14893 "10110010" // /* MW 7 */
+ 14894 "11110000" // /* MW 6 */
+ 14895 "00000001" // /* MW 5 */
+ 14896 "00000000" // /* MW 4 */
+ 14897 "11010000" // /* MW 3 */
+ 14898 "11000110" // /* MW 2 */
+ 14899 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 20
+ 14900 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14901 "01010110" // /* MW 3 */
+ 14902 "00000110" // /* MW 2 */
+ 14903 "00000001" // /* MW 1 */
+ 14904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14905 "00000000" // /* MW 1 */
+ 14906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14907 "00000000" // /* MW 1 */
+ 14908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14909 "00000000" // /* MW 1 */
+ 14910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14911 "00000000" // /* MW 1 */
+ 14912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14913 "00000000" // /* MW 1 */
+ 14914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14915 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 17
+ 14916 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14917 "00101000" // /* MW 3 */
+ 14918 "01100011" // /* MW 2 */
+ 14919 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6
+ 14920 "10000100" // JNZ r17, #15264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15264 delay_slots=5 */
+ 14921 "00000001" // /* MW 5 */
+ 14922 "01000000" // /* MW 4 */
+ 14923 "11010000" // /* MW 3 */
+ 14924 "00011101" // /* MW 2 */
+ 14925 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14933 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14935 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 629 34
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.src_ref 1 "io_buffer_main.h" 395 8
+ 14936 "10111010" // MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14937 "00001000" // /* MW 9 */
+ 14938 "00000011" // /* MW 8 */
+ 14939 "10110100" // /* MW 7 */
+ 14940 "11101000" // /* MW 6 */
+ 14941 "00010111" // /* MW 5 */
+ 14942 "00111111" // /* MW 4 */
+ 14943 "10000000" // /* MW 3 */
+ 14944 "00000010" // /* MW 2 */
+ 14945 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 52
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14946 "10111010" // LDA r27, [p1], #-4; MOVXM p0, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14947 "00010000" // /* MW 9 */
+ 14948 "00101110" // /* MW 8 */
+ 14949 "00110010" // /* MW 7 */
+ 14950 "11110000" // /* MW 6 */
+ 14951 "00000001" // /* MW 5 */
+ 14952 "00000000" // /* MW 4 */
+ 14953 "11010000" // /* MW 3 */
+ 14954 "11101110" // /* MW 2 */
+ 14955 "00111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 14956 "10011000" // LDA r18, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14957 "01010110" // /* MW 3 */
+ 14958 "11111110" // /* MW 2 */
+ 14959 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 14960 "10011000" // LDA r19, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14961 "01110110" // /* MW 3 */
+ 14962 "11111110" // /* MW 2 */
+ 14963 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 47 first
+ 14964 "10011000" // LDA r20, [p1, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14965 "10010110" // /* MW 3 */
+ 14966 "01010110" // /* MW 2 */
+ 14967 "00000001" // /* MW 1 */
+ 14968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14969 "00000000" // /* MW 1 */
+ 14970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14971 "00000000" // /* MW 1 */
+ 14972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14973 "00000000" // /* MW 1 */
+ 14974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14975 "00000000" // /* MW 1 */
+ 14976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14977 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 14978 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14979 "00100010" // /* MW 3 */
+ 14980 "11100101" // /* MW 2 */
+ 14981 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50
+.src_ref 7 "superkernels.cpp" 630 3
+.src_ref 1 "io_buffer_main.h" 218 20
+ 14982 "01011100" // ST r18, [p1]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14983 "00001010" // /* MW 5 */
+ 14984 "01000000" // /* MW 4 */
+ 14985 "00110000" // /* MW 3 */
+ 14986 "11001010" // /* MW 2 */
+ 14987 "00100000" // /* MW 1 */
+ 14988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14989 "00000000" // /* MW 1 */
+ 14990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14991 "00000000" // /* MW 1 */
+ 14992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14993 "00000000" // /* MW 1 */
+ 14994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14995 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14996 "00011000" // ACQ r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14997 "00011000" // /* MW 3 */
+ 14998 "00010011" // /* MW 2 */
+ 14999 "00010101" // /* MW 1 */
+ 15000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15001 "00000000" // /* MW 1 */
+ 15002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15003 "00000000" // /* MW 1 */
+ 15004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15005 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 52 first
+ 15006 "10011000" // LDA r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15007 "01110110" // /* MW 3 */
+ 15008 "00000110" // /* MW 2 */
+ 15009 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 629 34 first
+ 15010 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15011 "01010110" // /* MW 3 */
+ 15012 "00000010" // /* MW 2 */
+ 15013 "00000111" // /* MW 1 */
+ 15014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15015 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 15016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15017 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 15018 "10011000" // LDA p0, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15019 "00011110" // /* MW 3 */
+ 15020 "01011100" // /* MW 2 */
+ 15021 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15023 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15025 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50 first
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 32
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15026 "10111010" // LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15027 "01111000" // /* MW 9 */
+ 15028 "01100000" // /* MW 8 */
+ 15029 "00110001" // /* MW 7 */
+ 15030 "01101100" // /* MW 6 */
+ 15031 "00111000" // /* MW 5 */
+ 15032 "00100111" // /* MW 4 */
+ 15033 "11010000" // /* MW 3 */
+ 15034 "11000110" // /* MW 2 */
+ 15035 "00101001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15036 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15037 "00000111" // /* MW 3 */
+ 15038 "10100001" // /* MW 2 */
+ 15039 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15040 "10000100" // JNZ r16, #15120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15120 delay_slots=5 */
+ 15041 "00000001" // /* MW 5 */
+ 15042 "01000000" // /* MW 4 */
+ 15043 "10001000" // /* MW 3 */
+ 15044 "00011101" // /* MW 2 */
+ 15045 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 32
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 15046 "00011000" // MOVS p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15047 "10001011" // /* MW 3 */
+ 15048 "10000000" // /* MW 2 */
+ 15049 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15051 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15053 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15055 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50 first
+.delay_slot
+ 15056 "00000010" // ST p1, [sp, #-16]; ADD.NC p1, r19, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 15057 "10100000" // /* MW 7 */
+ 15058 "11100010" // /* MW 6 */
+ 15059 "10110100" // /* MW 5 */
+ 15060 "00000000" // /* MW 4 */
+ 15061 "10110000" // /* MW 3 */
+ 15062 "00010011" // /* MW 2 */
+ 15063 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3 first
+ 15064 "10000100" // JNZ r18, #15152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15152 delay_slots=5 */
+ 15065 "00000001" // /* MW 5 */
+ 15066 "01000000" // /* MW 4 */
+ 15067 "10011000" // /* MW 3 */
+ 15068 "00011101" // /* MW 2 */
+ 15069 "10010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15073 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15075 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15077 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15079 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8 first
+.no_stack_arguments
+ 15080 "00000100" // JL #11248 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11248 delay_slots=5 */
+ 15081 "00000001" // /* MW 5 */
+ 15082 "00000000" // /* MW 4 */
+ 15083 "11111000" // /* MW 3 */
+ 15084 "00010101" // /* MW 2 */
+ 15085 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8
+.delay_slot
+ 15086 "01000100" // MOVXM p3, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15087 "10000000" // /* MW 5 */
+ 15088 "11001010" // /* MW 4 */
+ 15089 "11000110" // /* MW 3 */
+ 15090 "00000111" // /* MW 2 */
+ 15091 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15093 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15095 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15097 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8
+.delay_slot
+ 15098 "11010100" // NOPA; MOV p2, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15099 "10000001" // /* MW 5 */
+ 15100 "11000001" // /* MW 4 */
+ 15101 "11110100" // /* MW 3 */
+ 15102 "00101100" // /* MW 2 */
+ 15103 "00000000" // /* MW 1 */
+.return_address
+ 15104 "10000100" // J #15152 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15152 delay_slots=5 */
+ 15105 "00000000" // /* MW 5 */
+ 15106 "00000000" // /* MW 4 */
+ 15107 "10011000" // /* MW 3 */
+ 15108 "00011101" // /* MW 2 */
+ 15109 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15111 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15113 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15115 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15117 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15119 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880
+.src_ref 7 "superkernels.cpp" 637 8 first
+.no_stack_arguments
+ 15120 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */
+ 15121 "00000001" // /* MW 5 */
+ 15122 "00000000" // /* MW 4 */
+ 15123 "01011000" // /* MW 3 */
+ 15124 "00010110" // /* MW 2 */
+ 15125 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 637 8
+.delay_slot
+ 15126 "01000100" // MOVXM p3, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15127 "00000000" // /* MW 5 */
+ 15128 "11001011" // /* MW 4 */
+ 15129 "11000110" // /* MW 3 */
+ 15130 "00000111" // /* MW 2 */
+ 15131 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 637 8
+.delay_slot
+ 15132 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15133 "11000000" // /* MW 3 */
+ 15134 "01100000" // /* MW 2 */
+ 15135 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15137 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15139 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15140 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 15141 "10000001" // /* MW 11 */
+ 15142 "10101101" // /* MW 10 */
+ 15143 "00000000" // /* MW 9 */
+ 15144 "00000000" // /* MW 8 */
+ 15145 "00000000" // /* MW 7 */
+ 15146 "00000000" // /* MW 6 */
+ 15147 "00100000" // /* MW 5 */
+ 15148 "00000000" // /* MW 4 */
+ 15149 "11110000" // /* MW 3 */
+ 15150 "00101100" // /* MW 2 */
+ 15151 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.return_address
+ 15152 "00011000" // LDA p1, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15153 "10011001" // /* MW 3 */
+ 15154 "11110000" // /* MW 2 */
+ 15155 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+ 15156 "00101100" // LDA p0, [sp, #-12]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15157 "00001010" // /* MW 5 */
+ 15158 "01000100" // /* MW 4 */
+ 15159 "00100000" // /* MW 3 */
+ 15160 "10000011" // /* MW 2 */
+ 15161 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15
+.src_ref 1 "io_buffer_main.h" 324 32 first
+ 15162 "10111010" // LDA r16, [p7, #16]; MOVXM p7, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15163 "00010000" // /* MW 9 */
+ 15164 "00100100" // /* MW 8 */
+ 15165 "10110010" // /* MW 7 */
+ 15166 "11110011" // /* MW 6 */
+ 15167 "00000001" // /* MW 5 */
+ 15168 "00000000" // /* MW 4 */
+ 15169 "11010000" // /* MW 3 */
+ 15170 "11000010" // /* MW 2 */
+ 15171 "11101000" // /* MW 1 */
+ 15172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15173 "00000000" // /* MW 1 */
+ 15174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15175 "00000000" // /* MW 1 */
+ 15176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15177 "00000000" // /* MW 1 */
+ 15178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15179 "00000000" // /* MW 1 */
+ 15180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15181 "00000000" // /* MW 1 */
+ 15182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15183 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 15184 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15185 "00011000" // /* MW 3 */
+ 15186 "00010001" // /* MW 2 */
+ 15187 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 15188 "10011000" // LDA r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15189 "01010110" // /* MW 3 */
+ 15190 "11110110" // /* MW 2 */
+ 15191 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 15192 "10011000" // LDA r16, [p0, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15193 "00010110" // /* MW 3 */
+ 15194 "01010110" // /* MW 2 */
+ 15195 "00000000" // /* MW 1 */
+ 15196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15197 "00000000" // /* MW 1 */
+ 15198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15199 "00000000" // /* MW 1 */
+ 15200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15201 "00000000" // /* MW 1 */
+ 15202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15203 "00000000" // /* MW 1 */
+ 15204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15205 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 15206 "10011000" // SUB r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15207 "00100001" // /* MW 3 */
+ 15208 "01100101" // /* MW 2 */
+ 15209 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 15210 "10011000" // ST r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15211 "01010001" // /* MW 3 */
+ 15212 "11110110" // /* MW 2 */
+ 15213 "00001001" // /* MW 1 */
+ 15214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15215 "00000000" // /* MW 1 */
+ 15216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15217 "00000000" // /* MW 1 */
+ 15218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15219 "00000000" // /* MW 1 */
+ 15220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15221 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 15222 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15223 "00011000" // /* MW 3 */
+ 15224 "00010001" // /* MW 2 */
+ 15225 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 15226 "10011000" // LDA r18, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15227 "01010110" // /* MW 3 */
+ 15228 "11100110" // /* MW 2 */
+ 15229 "00000110" // /* MW 1 */
+ 15230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15231 "00000000" // /* MW 1 */
+ 15232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15233 "00000000" // /* MW 1 */
+ 15234 "10000100" // J #15280 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15280 delay_slots=5 */
+ 15235 "00000000" // /* MW 5 */
+ 15236 "00000000" // /* MW 4 */
+ 15237 "11011000" // /* MW 3 */
+ 15238 "00011101" // /* MW 2 */
+ 15239 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15241 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15243 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15
+.src_ref 7 "superkernels.cpp" 649 14
+.delay_slot
+ 15244 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15245 "00000001" // /* MW 3 */
+ 15246 "00100000" // /* MW 2 */
+ 15247 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15 first
+.src_ref 1 "io_buffer_main.h" 327 32
+.delay_slot
+ 15248 "01011100" // ST r16, [p7]; SUB r17, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15249 "01000011" // /* MW 5 */
+ 15250 "11000110" // /* MW 4 */
+ 15251 "00111000" // /* MW 3 */
+ 15252 "11000010" // /* MW 2 */
+ 15253 "11100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28 first
+.delay_slot
+ 15254 "01111010" // NOPA; ST r17, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15255 "00000000" // /* MW 9 */
+ 15256 "00000000" // /* MW 8 */
+ 15257 "00000000" // /* MW 7 */
+ 15258 "10000000" // /* MW 6 */
+ 15259 "00110001" // /* MW 5 */
+ 15260 "11100110" // /* MW 4 */
+ 15261 "11110110" // /* MW 3 */
+ 15262 "00101100" // /* MW 2 */
+ 15263 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024
+.src_ref 7 "superkernels.cpp" 649 14
+ 15264 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 15265 "00000000" // /* MW 15 */
+ 15266 "00000000" // /* MW 14 */
+ 15267 "01111000" // /* MW 13 */
+ 15268 "10100101" // /* MW 12 */
+ 15269 "00000001" // /* MW 11 */
+ 15270 "00001000" // /* MW 10 */
+ 15271 "00000000" // /* MW 9 */
+ 15272 "00000001" // /* MW 8 */
+ 15273 "01011011" // /* MW 7 */
+ 15274 "00000001" // /* MW 6 */
+ 15275 "00100000" // /* MW 5 */
+ 15276 "00000000" // /* MW 4 */
+ 15277 "11110000" // /* MW 3 */
+ 15278 "00101100" // /* MW 2 */
+ 15279 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040
+.src_ref 7 "superkernels.cpp" 648 19
+.src_ref 7 "superkernels.cpp" 651
+ 15280 "10111010" // LDA lr, [sp, #-20]; MOVXM p7, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15281 "00010000" // /* MW 9 */
+ 15282 "00110000" // /* MW 8 */
+ 15283 "10110010" // /* MW 7 */
+ 15284 "11110011" // /* MW 6 */
+ 15285 "00000001" // /* MW 5 */
+ 15286 "00000000" // /* MW 4 */
+ 15287 "00100000" // /* MW 3 */
+ 15288 "10000111" // /* MW 2 */
+ 15289 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+.src_ref 7 "superkernels.cpp" 648 19 first
+.src_ref 7 "superkernels.cpp" 649 14
+ 15290 "10111010" // LDA r18, [p7]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15291 "00010000" // /* MW 9 */
+ 15292 "00100000" // /* MW 8 */
+ 15293 "00110010" // /* MW 7 */
+ 15294 "11110011" // /* MW 6 */
+ 15295 "00000001" // /* MW 5 */
+ 15296 "00000000" // /* MW 4 */
+ 15297 "11010000" // /* MW 3 */
+ 15298 "11001010" // /* MW 2 */
+ 15299 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+ 15300 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15301 "00110110" // /* MW 3 */
+ 15302 "00000110" // /* MW 2 */
+ 15303 "00000110" // /* MW 1 */
+ 15304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15305 "00000000" // /* MW 1 */
+ 15306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15307 "00000000" // /* MW 1 */
+ 15308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15309 "00000000" // /* MW 1 */
+ 15310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15311 "00000000" // /* MW 1 */
+ 15312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15313 "00000000" // /* MW 1 */
+ 15314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15315 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 16
+ 15316 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15317 "00101000" // /* MW 3 */
+ 15318 "01100011" // /* MW 2 */
+ 15319 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+ 15320 "10000100" // JNZ r17, #15344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15344 delay_slots=5 */
+ 15321 "00000001" // /* MW 5 */
+ 15322 "01000000" // /* MW 4 */
+ 15323 "11111000" // /* MW 3 */
+ 15324 "00011101" // /* MW 2 */
+ 15325 "10001000" // /* MW 1 */
+.delay_slot
+ 15326 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15327 "10011001" // /* MW 3 */
+ 15328 "11111011" // /* MW 2 */
+ 15329 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15331 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15333 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15335 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15337 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 649 14 first
+ 15338 "00001100" // NOPA; ST r16, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15339 "00100011" // /* MW 5 */
+ 15340 "00001100" // /* MW 4 */
+ 15341 "11111100" // /* MW 3 */
+ 15342 "00101100" // /* MW 2 */
+ 15343 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104
+ 15344 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15345 "00011001" // /* MW 3 */
+ 15346 "11111111" // /* MW 2 */
+ 15347 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 651 first
+ 15348 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 15349 "00000000" // /* MW 3 */
+ 15350 "00101000" // /* MW 2 */
+ 15351 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 651
+.delay_slot
+ 15352 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15353 "00000001" // /* MW 5 */
+ 15354 "00000000" // /* MW 4 */
+ 15355 "00000000" // /* MW 3 */
+ 15356 "11111000" // /* MW 2 */
+ 15357 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15359 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15361 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15363 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0
+ 15365 "00000000" // /* MW 1 */
+.label __Z13_b896_wrapperPPv___func_begin0
+.label _Z13_b896_wrapperPPv
+.function _b896_wrapper _Z13_b896_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 21 first
+.src_ref 0 "0_0_reloadable5.cc" 23 79
+.function_start
+ 15376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15377 "11000000" // /* MW 3 */
+ 15378 "01100000" // /* MW 2 */
+ 15379 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 23 79 first
+ 15380 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15381 "00011110" // /* MW 3 */
+ 15382 "00011100" // /* MW 2 */
+ 15383 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 24 79 first
+ 15384 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15385 "10011110" // /* MW 3 */
+ 15386 "00101100" // /* MW 2 */
+ 15387 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 26 81 first
+ 15388 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15389 "10011110" // /* MW 3 */
+ 15390 "11110101" // /* MW 2 */
+ 15391 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 25 47 first
+ 15392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15393 "00011110" // /* MW 3 */
+ 15394 "00000101" // /* MW 2 */
+ 15395 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 22 4 first
+.tail_call
+ 15396 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */
+ 15397 "00000000" // /* MW 5 */
+ 15398 "00000000" // /* MW 4 */
+ 15399 "01110000" // /* MW 3 */
+ 15400 "00001101" // /* MW 2 */
+ 15401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b896_wrapperPPv__end
+.label __Z13_b896_wrapperPPv___func_end0
+ 15411 "00000000" // /* MW 1 */
+.label __Z13_b901_wrapperPPv___func_begin0
+.label _Z13_b901_wrapperPPv
+.function _b901_wrapper _Z13_b901_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 30 first
+.src_ref 0 "0_0_reloadable5.cc" 32 79
+.function_start
+ 15424 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15425 "11000000" // /* MW 3 */
+ 15426 "01100000" // /* MW 2 */
+ 15427 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 32 79 first
+ 15428 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15429 "00011110" // /* MW 3 */
+ 15430 "00101100" // /* MW 2 */
+ 15431 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 34 81 first
+ 15432 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15433 "00011110" // /* MW 3 */
+ 15434 "11110101" // /* MW 2 */
+ 15435 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 33 47 first
+ 15436 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15437 "10011110" // /* MW 3 */
+ 15438 "00000100" // /* MW 2 */
+ 15439 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 31 4 first
+.tail_call
+ 15440 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */
+ 15441 "00000000" // /* MW 5 */
+ 15442 "00000000" // /* MW 4 */
+ 15443 "00011000" // /* MW 3 */
+ 15444 "00010000" // /* MW 2 */
+ 15445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15451 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b901_wrapperPPv__end
+.label __Z13_b901_wrapperPPv___func_end0
+ 15455 "00000000" // /* MW 1 */
+.label __Z13_b906_wrapperPPv___func_begin0
+.label _Z13_b906_wrapperPPv
+.function _b906_wrapper _Z13_b906_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 38 first
+.src_ref 0 "0_0_reloadable5.cc" 40 79
+.function_start
+ 15456 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15457 "11000000" // /* MW 3 */
+ 15458 "01100000" // /* MW 2 */
+ 15459 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 40 79 first
+ 15460 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15461 "00011110" // /* MW 3 */
+ 15462 "00101100" // /* MW 2 */
+ 15463 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 42 81 first
+ 15464 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15465 "00011110" // /* MW 3 */
+ 15466 "11110101" // /* MW 2 */
+ 15467 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 41 47 first
+ 15468 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15469 "10011110" // /* MW 3 */
+ 15470 "00000100" // /* MW 2 */
+ 15471 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 39 4 first
+.tail_call
+ 15472 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */
+ 15473 "00000000" // /* MW 5 */
+ 15474 "00000000" // /* MW 4 */
+ 15475 "11001000" // /* MW 3 */
+ 15476 "00010001" // /* MW 2 */
+ 15477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15483 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15485 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b906_wrapperPPv__end
+.label __Z13_b906_wrapperPPv___func_end0
+ 15487 "00000000" // /* MW 1 */
+.label __Z13_b881_wrapperPPv___func_begin0
+.label _Z13_b881_wrapperPPv
+.function _b881_wrapper _Z13_b881_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 46 first
+.src_ref 0 "0_0_reloadable5.cc" 48 79
+.function_start
+ 15488 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15489 "11000000" // /* MW 3 */
+ 15490 "01100000" // /* MW 2 */
+ 15491 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 48 79 first
+ 15492 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15493 "00011110" // /* MW 3 */
+ 15494 "00101100" // /* MW 2 */
+ 15495 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 50 81 first
+ 15496 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15497 "00011110" // /* MW 3 */
+ 15498 "11110101" // /* MW 2 */
+ 15499 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 49 47 first
+ 15500 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15501 "10011110" // /* MW 3 */
+ 15502 "00000100" // /* MW 2 */
+ 15503 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 47 4 first
+.tail_call
+ 15504 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */
+ 15505 "00000000" // /* MW 5 */
+ 15506 "00000000" // /* MW 4 */
+ 15507 "10001000" // /* MW 3 */
+ 15508 "00010100" // /* MW 2 */
+ 15509 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15511 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15513 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15515 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15517 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b881_wrapperPPv__end
+.label __Z13_b881_wrapperPPv___func_end0
+ 15519 "00000000" // /* MW 1 */
+.label __Z13_b891_wrapperPPv___func_begin0
+.label _Z13_b891_wrapperPPv
+.function _b891_wrapper _Z13_b891_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 54 first
+.src_ref 0 "0_0_reloadable5.cc" 56 79
+.function_start
+ 15520 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15521 "11000000" // /* MW 3 */
+ 15522 "01100000" // /* MW 2 */
+ 15523 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 56 79 first
+ 15524 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15525 "00011110" // /* MW 3 */
+ 15526 "00111100" // /* MW 2 */
+ 15527 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 57 47 first
+ 15528 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15529 "10011110" // /* MW 3 */
+ 15530 "11101100" // /* MW 2 */
+ 15531 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 59 81 first
+ 15532 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15533 "10011110" // /* MW 3 */
+ 15534 "00010101" // /* MW 2 */
+ 15535 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 58 80 first
+ 15536 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15537 "00011110" // /* MW 3 */
+ 15538 "00000101" // /* MW 2 */
+ 15539 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 55 4 first
+.tail_call
+ 15540 "10000100" // J #11744 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */
+ 15541 "00000000" // /* MW 5 */
+ 15542 "00000000" // /* MW 4 */
+ 15543 "11110000" // /* MW 3 */
+ 15544 "00010110" // /* MW 2 */
+ 15545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15549 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15551 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15553 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b891_wrapperPPv__end
+.label __Z13_b891_wrapperPPv___func_end0
+ 15555 "00000000" // /* MW 1 */
+.label __Z13_b924_wrapperPPv___func_begin0
+.label _Z13_b924_wrapperPPv
+.function _b924_wrapper _Z13_b924_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 63 first
+.src_ref 0 "0_0_reloadable5.cc" 65 79
+.function_start
+ 15568 "11111000" // MOV p3, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15569 "11000000" // /* MW 3 */
+ 15570 "01100000" // /* MW 2 */
+ 15571 "00011011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 65 79 first
+ 15572 "10011000" // LDA p0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15573 "00011110" // /* MW 3 */
+ 15574 "00011100" // /* MW 2 */
+ 15575 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 66 79 first
+ 15576 "10011000" // LDA p1, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15577 "10011110" // /* MW 3 */
+ 15578 "00011100" // /* MW 2 */
+ 15579 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 67 80 first
+ 15580 "10011000" // LDA p2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15581 "00011110" // /* MW 3 */
+ 15582 "00101101" // /* MW 2 */
+ 15583 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 69 81 first
+ 15584 "10011000" // LDA p4, [p3, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15585 "00011110" // /* MW 3 */
+ 15586 "11110110" // /* MW 2 */
+ 15587 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 68 47 first
+ 15588 "10011000" // LDA p3, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15589 "10011110" // /* MW 3 */
+ 15590 "00000101" // /* MW 2 */
+ 15591 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 64 4 first
+.tail_call
+ 15592 "10000100" // J #14240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=14240 delay_slots=5 */
+ 15593 "00000000" // /* MW 5 */
+ 15594 "00000000" // /* MW 4 */
+ 15595 "11010000" // /* MW 3 */
+ 15596 "00011011" // /* MW 2 */
+ 15597 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15599 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15601 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15603 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15605 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b924_wrapperPPv__end
+.label __Z13_b924_wrapperPPv___func_end0
+ 15607 "00000000" // /* MW 1 */
+.label __Z13_b919_wrapperPPv___func_begin0
+.label _Z13_b919_wrapperPPv
+.function _b919_wrapper _Z13_b919_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 73 first
+.src_ref 0 "0_0_reloadable5.cc" 75 79
+.function_start
+ 15616 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15617 "11000000" // /* MW 3 */
+ 15618 "01100000" // /* MW 2 */
+ 15619 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 75 79 first
+ 15620 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15621 "00011110" // /* MW 3 */
+ 15622 "00011100" // /* MW 2 */
+ 15623 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 76 79 first
+ 15624 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15625 "10011110" // /* MW 3 */
+ 15626 "00101100" // /* MW 2 */
+ 15627 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 78 81 first
+ 15628 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15629 "10011110" // /* MW 3 */
+ 15630 "11110101" // /* MW 2 */
+ 15631 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 77 47 first
+ 15632 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15633 "00011110" // /* MW 3 */
+ 15634 "00000101" // /* MW 2 */
+ 15635 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 74 4 first
+.tail_call
+ 15636 "10000100" // J #13760 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13760 delay_slots=5 */
+ 15637 "00000000" // /* MW 5 */
+ 15638 "00000000" // /* MW 4 */
+ 15639 "11100000" // /* MW 3 */
+ 15640 "00011010" // /* MW 2 */
+ 15641 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15643 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15647 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15649 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b919_wrapperPPv__end
+.label __Z13_b919_wrapperPPv___func_end0
+ 15651 "00000000" // /* MW 1 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 115 4 first
+.function_start
+ 15664 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15665 "01000001" // /* MW 5 */
+ 15666 "10100000" // /* MW 4 */
+ 15667 "00101111" // /* MW 3 */
+ 15668 "11000000" // /* MW 2 */
+ 15669 "00000000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15670 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15671 "00011100" // /* MW 3 */
+ 15672 "11000110" // /* MW 2 */
+ 15673 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15674 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15675 "00011100" // /* MW 3 */
+ 15676 "11000110" // /* MW 2 */
+ 15677 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15678 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15679 "00011100" // /* MW 3 */
+ 15680 "11000110" // /* MW 2 */
+ 15681 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15682 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15683 "00011100" // /* MW 3 */
+ 15684 "11000110" // /* MW 2 */
+ 15685 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15686 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15687 "00011100" // /* MW 3 */
+ 15688 "11000110" // /* MW 2 */
+ 15689 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15690 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15691 "00011100" // /* MW 3 */
+ 15692 "11000110" // /* MW 2 */
+ 15693 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15694 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15695 "00011100" // /* MW 3 */
+ 15696 "11000110" // /* MW 2 */
+ 15697 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15698 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15699 "00011100" // /* MW 3 */
+ 15700 "11000110" // /* MW 2 */
+ 15701 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15702 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15703 "00011100" // /* MW 3 */
+ 15704 "11000110" // /* MW 2 */
+ 15705 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15706 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15707 "00011100" // /* MW 3 */
+ 15708 "11000110" // /* MW 2 */
+ 15709 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15710 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15711 "00011100" // /* MW 3 */
+ 15712 "11000110" // /* MW 2 */
+ 15713 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15714 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15715 "00011100" // /* MW 3 */
+ 15716 "11000110" // /* MW 2 */
+ 15717 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15718 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15719 "00011100" // /* MW 3 */
+ 15720 "11000110" // /* MW 2 */
+ 15721 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15722 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15723 "00011100" // /* MW 3 */
+ 15724 "11000110" // /* MW 2 */
+ 15725 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15726 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15727 "00011100" // /* MW 3 */
+ 15728 "11000110" // /* MW 2 */
+ 15729 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15730 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15731 "00011100" // /* MW 3 */
+ 15732 "11000110" // /* MW 2 */
+ 15733 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15734 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15735 "00011100" // /* MW 3 */
+ 15736 "11000110" // /* MW 2 */
+ 15737 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15738 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15739 "00011100" // /* MW 3 */
+ 15740 "11000110" // /* MW 2 */
+ 15741 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15742 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15743 "00011100" // /* MW 3 */
+ 15744 "11000110" // /* MW 2 */
+ 15745 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15746 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15747 "00011100" // /* MW 3 */
+ 15748 "11000110" // /* MW 2 */
+ 15749 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15750 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15751 "00011100" // /* MW 3 */
+ 15752 "11000110" // /* MW 2 */
+ 15753 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15754 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15755 "00011100" // /* MW 3 */
+ 15756 "11000110" // /* MW 2 */
+ 15757 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15758 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15759 "00011100" // /* MW 3 */
+ 15760 "11000110" // /* MW 2 */
+ 15761 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15762 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15763 "00011100" // /* MW 3 */
+ 15764 "11000110" // /* MW 2 */
+ 15765 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15766 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15767 "00011100" // /* MW 3 */
+ 15768 "11000110" // /* MW 2 */
+ 15769 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15770 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15771 "00011100" // /* MW 3 */
+ 15772 "11000110" // /* MW 2 */
+ 15773 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15774 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15775 "00011100" // /* MW 3 */
+ 15776 "11000110" // /* MW 2 */
+ 15777 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15778 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15779 "00011100" // /* MW 3 */
+ 15780 "11000110" // /* MW 2 */
+ 15781 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 119 first
+ 15782 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 15783 "00000000" // /* MW 3 */
+ 15784 "00101000" // /* MW 2 */
+ 15785 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19 first
+.delay_slot
+ 15786 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15787 "00011100" // /* MW 3 */
+ 15788 "11000110" // /* MW 2 */
+ 15789 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15790 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15791 "00011100" // /* MW 3 */
+ 15792 "11000110" // /* MW 2 */
+ 15793 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15794 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15795 "00011100" // /* MW 3 */
+ 15796 "11000110" // /* MW 2 */
+ 15797 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15798 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15799 "00011100" // /* MW 3 */
+ 15800 "11000110" // /* MW 2 */
+ 15801 "00010000" // /* MW 1 */
+.delay_slot
+ 15802 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15803 "10100000" // /* MW 3 */
+ 15804 "10011111" // /* MW 2 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+ 15805 "00011000" // /* MW 1 */
+.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src"
+.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer"
+.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv"
+.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common"
+.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2"
+.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p"
+.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib"
+.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend"
+.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc"
+.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail"
+.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib"
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.cmico
new file mode 100644
index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.cmico
@@ -0,0 +1 @@
++Mdec
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.lst
new file mode 100644
index 0000000000000000000000000000000000000000..4a0bb9c3b02d8c2df3b5faeb6f4b950508fce7fd
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.lst
@@ -0,0 +1,5518 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me
+
+// Release: ipp V-2024.06-TGT-241219
+
+.text_segment PM 2352
+.entry_point
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function_start
+ 2352 0x00 0xc6 0xd1 0x21 0x41 0xd4 LDA r17, [p0]; MOV r2, r1
+ 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 2364 0xfe 0xf3 0xb0 0x00 0x2b 0xd0 0x70 0x02 ST p7, [sp, #-12]; MOV r1, r15
+ 2372 0xff 0x87 0xb0 0x01 0xe8 0x90 0x70 0x02 ST lr, [sp, #-4]; MOV r15, r2
+ 2380 0xff 0x06 0xb7 0xc1 0xe0 0x5c ST r1, [sp, #-8]; NEZ r16, r15
+ 2386 0x1e 0x98 0x20 0xf8 MOV r26, r16
+ 2390 0x00 0x00 NOPX
+ 2392 0x1f 0x68 0x82 0x18 ADD.NC p7, r17, #4
+ 2396 0x07 0x1e 0x36 0x98 LDA r17, [p7], #4
+ 2400 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12
+ 2404 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8
+ 2408 0x07 0x07 0x76 0x98 LDA r27, [p7]
+ 2412 0x00 0x00 NOPX
+ 2414 0x00 0x00 NOPX
+ 2416 0x00 0x00 NOPX
+ 2418 0x00 0x00 NOPX
+ 2420 0x00 0x00 NOPX
+ 2422 0x00 0x00 NOPX
+ 2424 0x14 0x63 0x32 0x18 SEL.EQZ r17, r17, r19, r27
+ 2428 0x0f 0xd6 0x31 0x98 ST r17, [p7, #-12]
+ 2432 0x17 0xe2 0xfd 0x18 MOVX r17, #-1
+ 2436 0x00 0x00 NOPX
+ 2438 0x00 0x00 NOPX
+ 2440 0x00 0x00 NOPX
+ 2442 0x14 0x97 0x18 0x18 ACQ.COND r18, r17, r26
+ 2446 0x10 0x24 0x09 0x18 MOVX r18, #2
+ 2450 0x14 0x29 0x2d 0x98 LSHL r20, r16, r18
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 2454 0x18 0x8a 0x20 0xf8 MOV dj0, r20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 2458 0x00 0x4e 0xdf 0xd8 0x8b 0x0c LDA r19, [p0, dj0]; ST dj0, [sp, #-20]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2464 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2466 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2468 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2470 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2472 0x10 0x26 0x05 0x18 MOVX r19, #1
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2476 0x14 0xf4 0xfc 0x98 LTU r26, r19, r15
+ 2480 0xfe 0x6a 0xb0 0x03 0xb4 0xc1 0x00 0x02 ST r26, [sp, #-16]; ADD.NC p7, r19, #4
+ 2488 0x07 0x1e 0x76 0x98 LDA r19, [p7], #4
+ 2492 0x07 0x3e 0xb6 0x98 LDA r21, [p7], #12
+ 2496 0x07 0xee 0x96 0x98 LDA r20, [p7], #-8
+ 2500 0x07 0x07 0x76 0x98 LDA r27, [p7]
+ 2504 0x00 0x00 NOPX
+ 2506 0x00 0x00 NOPX
+ 2508 0x00 0x00 NOPX
+ 2510 0x00 0x00 NOPX
+ 2512 0x00 0x00 NOPX
+ 2514 0x00 0x00 NOPX
+ 2516 0x14 0xe7 0x52 0x18 SEL.EQZ r19, r19, r21, r27
+ 2520 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12]
+ 2524 0x00 0x00 NOPX
+ 2526 0x00 0x00 NOPX
+ 2528 0x00 0x00 NOPX
+ 2530 0x00 0x00 NOPX
+ 2532 0x15 0x17 0x18 0x18 ACQ.COND r20, r17, r26
+ 2536 0x10 0x23 0x2d 0x98 LSHL r17, r0, r18
+ 2540 0x18 0x88 0xa0 0xf8 MOV dj0, r17
+ 2544 0x00 0x07 0xce 0xc9 0x00 0x44 MOVXM p7, #509056
+ 2550 0xe0 0x13 0xdf 0xd4 0x2b 0x0c LDA p1, [p7, dj0]; ST r16, [sp, #-24]
+ 2556 0x00 0x00 NOPX
+ 2558 0x00 0x00 NOPX
+ 2560 0x00 0x00 NOPX
+ 2562 0x00 0x00 NOPX
+ 2564 0x00 0x00 NOPX
+ 2566 0x00 0x00 NOPX
+.no_stack_arguments
+ 2568 0x10 0x30 0x40 0x18 JL p1
+.delay_slot
+ 2572 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 2576 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2578 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2580 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 2582 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM
+.return_address
+ 2592 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1
+ 2598 0x07 0xeb 0x51 0x18 LDA r26, [sp, #-24]
+ 2602 0x07 0xec 0x41 0x18 LDA dj0, [sp, #-20]
+ 2606 0x07 0xf0 0x29 0x18 LDA el0, [sp, #-16]
+ 2610 0x00 0x00 NOPX
+ 2612 0x00 0x00 NOPX
+ 2614 0x00 0x00 NOPX
+ 2616 0x19 0x68 0x88 0x18 ADD.NC p1, r17, #16
+ 2620 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 2624 0x00 0x00 NOPX
+ 2626 0x00 0x00 NOPX
+ 2628 0x00 0x00 NOPX
+ 2630 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 2632 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 2634 0x1e 0xa0 0x1c 0xf8 MOV r26, el0
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2638 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26
+ 2642 0x3e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p1, #-4]; MOV r27, r15
+ 2648 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0]
+ 2652 0x00 0x00 NOPX
+ 2654 0x00 0x00 NOPX
+ 2656 0x00 0x00 NOPX
+ 2658 0x00 0x00 NOPX
+ 2660 0x00 0x00 NOPX
+ 2662 0x14 0x27 0x11 0x98 SUB r19, r16, r17
+ 2666 0x8c 0x66 0x40 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16
+ 2672 0x00 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p1, #-4]
+ 2678 0x00 0x00 NOPX
+ 2680 0x00 0x00 NOPX
+ 2682 0x00 0x00 NOPX
+ 2684 0x00 0x00 NOPX
+ 2686 0x00 0x00 NOPX
+ 2688 0x00 0x00 NOPX
+ 2690 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26
+ 2694 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+ 2698 0x00 0xf6 0x36 0x98 LDA r17, [p0, #-4]
+ 2702 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12]
+ 2706 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8]
+ 2710 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+ 2716 0x00 0x00 NOPX
+ 2718 0x00 0x00 NOPX
+ 2720 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 2724 0x1e 0xe0 0x1c 0xf8 MOV r27, el0
+.delay_slot
+ 2728 0x14 0x21 0x11 0x98 SUB r16, r16, r17
+.delay_slot
+ 2732 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+.delay_slot
+ 2736 0x08 0xf6 0x11 0x98 ST r16, [p0, #-4]
+.delay_slot
+.swstall delay_slot
+ 2740 0x00 0x00 NOPX
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+
+.text_segment PM 2752
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.function_start
+ 2752 0x03 0x85 0xd0 0x00 0x40 0x88 0x49 0x60 0x78 0xba LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1
+ 2762 0x03 0x81 0xd0 0x3e 0x57 0xe9 0x30 0x82 0x48 0xba LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9
+ 2772 0xff 0x81 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r1, #-4; PADDXM [sp], #64
+ 2782 0x01 0x86 0x07 0xfd 0xb5 0x81 0x00 0x28 0x00 0x10 0x58 0x76 MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16
+ 2794 0x00 0x63 0x07 0xf9 0xd5 0xbf 0x57 0xaa 0x88 0x0f 0x58 0x76 MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15
+ 2806 0xfe 0xbe 0xb0 0x60 0x02 0x5c ST r15, [sp, #-12]; MOVX r24, #0
+ 2812 0x00 0x00 NOPX
+ 2814 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 2818 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 2822 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4
+ 2826 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4
+ 2830 0x00 0x00 NOPX
+ 2832 0x00 0x00 NOPX
+ 2834 0x00 0x00 NOPX
+ 2836 0x00 0x00 NOPX
+ 2838 0x00 0x00 NOPX
+ 2840 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 2844 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 2848 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4
+ 2852 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4
+ 2856 0x00 0x00 NOPX
+ 2858 0x00 0x00 NOPX
+ 2860 0x00 0x00 NOPX
+ 2862 0x00 0x00 NOPX
+ 2864 0x00 0x00 NOPX
+ 2866 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 2870 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 2874 0x00 0x04 0x0e 0x98 LDA eh0, [p0]
+ 2878 0x00 0x14 0x2e 0x98 LDA el0, [p0, #4]
+ 2882 0x00 0x00 NOPX
+ 2884 0x00 0x00 NOPX
+ 2886 0x00 0x00 NOPX
+ 2888 0x00 0x00 NOPX
+ 2890 0x00 0x00 NOPX
+ 2892 0x09 0x04 0x09 0x98 ST eh0, [p1]
+ 2896 0x09 0x14 0x29 0x98 ST el0, [p1, #4]
+ 2900 0x02 0xdd 0xaa 0x98 LDA.u8 r13, [p2], #-3
+ 2904 0x02 0x1e 0x2a 0x98 LDA.u8 r17, [p2], #1
+ 2908 0x02 0xbd 0xca 0x98 LDA.u8 r14, [p2], #-5
+ 2912 0x02 0xfd 0xfa 0x98 LDA.u16 r15, [p2], #-2
+ 2916 0x02 0x0a 0x6a 0x98 LDA.u8 r19, [p2], m0
+ 2920 0x02 0xac 0xea 0x98 LDA.u8 r7, [p2], #-6
+ 2924 0x00 0x00 NOPX
+ 2926 0x13 0x42 0x1d 0x98 LSHL r1, r13, r1
+ 2930 0x0c 0x20 0xf9 0x31 0x01 0x24 EQ r16, r1, r16; ADD.NC r18, r17, #1
+ 2936 0x14 0xa4 0x5d 0x98 LSHL r18, r18, r5
+ 2940 0x13 0xf6 0x47 0x98 EQ r27, r15, r4
+ 2944 0xc1 0x4a 0x40 0xb7 0x39 0xe4 SEL.EQZ r5, r24, r5, r27; MOV eh0, r27
+ 2950 0x14 0x7b 0x22 0x18 SEL.EQZ r29, r17, r18, r27
+ 2954 0x11 0xcc 0x67 0x98 EQ r6, r7, r6
+ 2958 0x11 0xb7 0x04 0x98 AND r27, r6, r16
+ 2962 0x7b 0xeb 0xbc 0xbb 0x41 0xe4 LSHL r15, r15, r21; MOV r25, r27
+ 2968 0xfd 0xbe 0xb3 0x9b 0x04 0x5c ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27
+ 2974 0xc8 0x05 0xf8 0x40 0x01 0x84 JNZ r25, #3056
+.delay_slot
+ 2980 0x11 0xb6 0x47 0x98 EQ r27, r6, r4
+.delay_slot
+ 2984 0x13 0x71 0x44 0x98 AND r24, r13, r20
+.delay_slot
+ 2988 0x14 0xfc 0x5d 0x98 LSHL r30, r19, r5
+.delay_slot
+ 2992 0x16 0xe8 0x4d 0x98 LSHL r20, r27, r4
+.delay_slot
+ 2996 0x11 0x8c 0x32 0x18 SEL.EQZ r6, r6, r3, r27
+ 3000 0xd8 0x05 0xf8 0x40 0x01 0x84 JNZ r27, #3056
+.delay_slot
+.swstall delay_slot
+ 3006 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3008 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3010 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3012 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3014 0x00 0x00 NOPX
+ 3016 0x00 0x2f 0x00 0x00 0x01 0x84 0x00 0x00 0x20 0xba MOVA r15, #1; J #3104
+.delay_slot
+ 3026 0x00 0x1a 0x00 0x3e 0x57 0xab 0x88 0x0c 0x58 0xba MOVA r26, #0; MOVX r5, #-3; MOV r28, #12
+.delay_slot
+ 3036 0x05 0x42 0x21 0x20 0x41 0x64 MOVX r21, #4; MOV r2, #16
+.delay_slot
+ 3042 0x10 0x1a 0x0d 0x18 MOVX r13, #3
+.delay_slot
+ 3046 0x10 0x0e 0x3d 0x18 MOVX r7, #15
+.delay_slot
+ 3050 0x00 0x2c 0xff 0x91 0xe2 0x2c NOPA; MOVX r4, #-4
+.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ 3056 0x02 0x02 0x81 0x82 0x0b 0x01 0x50 0x88 0x8f 0xfc 0x58 0x76 MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4
+ 3068 0x20 0x18 0xe0 0x01 0xa0 0x0b 0x88 0x0c 0x58 0xba ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12
+ 3078 0x02 0x02 0x00 0x3e 0x57 0xa9 0xe8 0x01 0x58 0xba MOVA r2, #16; MOVX r5, #-3; MOV r15, #1
+ 3088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x71 0xe9 0xa8 0x03 0x58 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352
+ 3104 0x5d 0xc5 0x50 0x1b 0xb3 0x3c 0x00 0x3c 0x58 0xba LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60
+ 3114 0x41 0x05 0x50 0x03 0x2d 0x12 0x87 0xcd 0x58 0xba LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51
+ 3124 0x00 0x57 0x00 0x3b 0xda 0x91 0x80 0x37 0x58 0xba MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55
+ 3134 0x01 0x03 0x00 0x2b 0xb0 0x3d 0x07 0xbc 0x58 0xba MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68
+ 3144 0x40 0x10 0x00 0x1f 0x6c 0x6c 0x80 0x70 0x58 0xba MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112
+ 3154 0xb5 0x92 0x08 0x1e 0x5d 0x64 EXTEND.u8 r22, r22; MOV m4, #-105
+ 3160 0xfe 0x5a 0xb0 0x2d 0x61 0x6f 0x80 0x31 0x59 0x3a ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49
+ 3170 0xf7 0xba 0x3c 0x1f 0x05 0x64 SUB r30, r30, r29; MOV m6, #-63
+ 3176 0x13 0xc2 0x11 0x98 SUB r1, r15, r1
+ 3180 0x8f 0xc3 0xf0 0xa0 0x1d 0x64 MUL r31, r17, r1; MOV r1, #7
+ 3186 0x16 0xa3 0x21 0x98 SUB r17, r26, r18
+ 3190 0x17 0xfe 0x1d 0x98 LSHL r31, r31, r1
+ 3194 0x55 0x7e 0x30 0x3b 0xf1 0xee 0x80 0x57 0x59 0x3a ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87
+ 3204 0x4d 0x55 0x50 0x2f 0x30 0x3d 0x87 0xb2 0x58 0xba LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3214 0xfd 0x4e 0xb9 0xcc 0x7b 0x5c ST r19, [sp, #-24]; LSHL r19, r19, r3
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3220 0x49 0x54 0xe0 0x3f 0x6b 0x2d 0x00 0xf6 0x58 0xba ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3230 0x45 0x43 0x50 0x27 0x38 0x10 0x87 0x50 0x58 0xba LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3240 0x15 0xfe 0x67 0x98 EQ r31, r23, r6
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3244 0x10 0xe0 0x67 0x98 EQ r16, r3, r6
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3248 0x17 0xf7 0x05 0x98 OR r27, r31, r16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3252 0x11 0xeb 0x54 0x98 AND r21, r7, r21
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3256 0xf7 0xa3 0xd8 0xa0 0x61 0x64 ASHL r30, r30, r17; MOV r17, #24
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3262 0xfc 0x42 0xb0 0x1f 0x29 0x6f 0xcf 0x80 0x49 0x3a ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1
+ 3272 0x43 0xea 0x3f 0x46 0x3b 0x5c ST r26, [p2], #4; LSHL r17, r30, r17
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3278 0x51 0x6a 0x30 0x02 0x00 0xa8 0x50 0x02 ST r26, [p2], m4; MOV m4, #168
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3286 0x5d 0x49 0x57 0xe7 0xf5 0xa7 0xb0 0x2c 0x0d 0xce 0x78 0x76 LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3298 0x16 0xe3 0x15 0x98 OR r17, r27, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3302 0x16 0xb7 0x81 0x98 SUB r27, r26, r24
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3306 0x14 0xb0 0x90 0x18 EXTEND.u8 r24, r18
+ 3310 0x00 0x00 NOPX
+ 3312 0x00 0x00 NOPX
+ 3314 0x00 0x00 NOPX
+ 3316 0x13 0xe5 0x21 0x98 SUB r18, r15, r18
+ 3320 0x0a 0xca 0x51 0x98 ST r18, [p2], m6
+ 3324 0x02 0xaa 0x4a 0x98 LDA.u8 r18, [p2], m5
+ 3328 0x00 0x00 NOPX
+ 3330 0x00 0x00 NOPX
+ 3332 0x00 0x00 NOPX
+ 3334 0x00 0x00 NOPX
+ 3336 0x00 0x00 NOPX
+ 3338 0x00 0x00 NOPX
+ 3340 0x14 0xa4 0xe1 0x98 SUB r18, r18, r14
+ 3344 0x14 0xa5 0xbe 0x98 ASHL r18, r18, r27
+ 3348 0x14 0xa4 0x2d 0x98 LSHL r18, r18, r2
+ 3352 0x00 0x01 0x0d 0xa0 0x00 0x44 MOVXM r27, #65536
+ 3358 0x16 0xe5 0x20 0x98 ADD r18, r27, r18
+ 3362 0x00 0xff 0x0d 0xa0 0x00 0x44 MOVXM r27, #16711680
+ 3368 0xde 0xe4 0x99 0x3f 0xc1 0x64 AND r27, r27, r18; MOV r18, #-16
+ 3374 0xde 0xe2 0xb8 0xbf 0xe1 0x64 OR r27, r27, r17; MOV r17, #-8
+ 3380 0x43 0xee 0x39 0xce 0x3b 0x5c ST r27, [p2], #4; LSHL r19, r19, r17
+ 3386 0x16 0xb5 0x31 0x98 SUB r26, r26, r19
+ 3390 0x15 0x29 0xad 0x98 LSHL r20, r20, r26
+ 3394 0x13 0xb5 0x65 0x98 OR r26, r14, r22
+ 3398 0x4d 0x6a 0x3f 0x69 0x20 0x5c ST r26, [p2], m3; EXTEND.u8 r26, r30
+ 3404 0x49 0x65 0x50 0x37 0x49 0x6f 0xce 0xa8 0xa8 0xba LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3414 0xb5 0xa3 0xb8 0xa3 0xf9 0x64 LSHL r22, r22, r17; MOV r17, #254
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3420 0x45 0x64 0xed 0x6b 0x1f 0x2c ST.s8 r25, [p2], m1; MUL r26, r26, r24
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3426 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3428 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3430 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3432 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3434 0x86 0x5f 0xbd 0xb5 0xca 0xa4 LSHL r25, r16, r15; ADD.NC r27, r21, r25
+ 3440 0xf8 0x06 0xf8 0x40 0x01 0x84 JNZ r31, #3568
+.delay_slot
+ 3446 0x9d 0x41 0xed 0xbb 0xf2 0xa4 ADD r21, r19, #3; ADD.NC r27, r27, r30
+.delay_slot
+ 3452 0x16 0xeb 0x5d 0x98 LSHL r21, r27, r21
+.delay_slot
+ 3456 0x16 0x63 0x14 0x98 AND r17, r25, r17
+.delay_slot
+ 3460 0x51 0x46 0x30 0x0d 0xbe 0x3e 0x28 0x01 0x59 0x3a ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1
+.delay_slot
+ 3470 0x18 0x9b 0x9c 0xf8 MOV el1, r27
+ 3474 0x07 0xe3 0x91 0x18 LDA r28, [sp, #-32]
+ 3478 0x00 0x00 NOPX
+ 3480 0x00 0x00 NOPX
+ 3482 0x00 0x00 NOPX
+ 3484 0x00 0x00 NOPX
+ 3486 0x00 0x00 NOPX
+ 3488 0x00 0x00 NOPX
+ 3490 0xe0 0x06 0xf8 0x40 0x01 0x84 JNZ r28, #3568
+.delay_slot
+.swstall delay_slot
+ 3496 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3498 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3500 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3502 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 3504 0x00 0x00 NOPX
+ 3506 0x00 0xbc 0x00 0x01 0x10 0x8b 0x28 0x40 0x58 0xba MOVA r28, #5; MOVX r17, #4; MOV r25, #64
+ 3516 0x14 0x7e 0xd2 0x18 SEL.EQZ r31, r17, r13, r27
+ 3520 0x16 0x76 0x67 0x98 EQ r27, r25, r6
+ 3524 0xff 0x38 0x4f 0xa0 0x01 0x64 SEL.EQZ r28, r31, r28, r27; MOV r31, #0
+ 3530 0x10 0x32 0x50 0x18 EXTEND.s8 r25, r0
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3534 0x16 0x7d 0xef 0x98 MUL r30, r25, r30
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3538 0xce 0xe3 0x5d 0xc4 0x39 0xe4 LT r27, r25, r17; MOV r27, el1
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3544 0x13 0xe3 0x82 0x18 SEL.EQZ r17, r15, r24, r27
+ 3548 0x14 0x63 0xef 0x98 MUL r17, r17, r30
+ 3552 0x17 0xf9 0xc1 0x98 SUB r28, r31, r28
+ 3556 0x14 0x63 0xce 0x98 ASHL r17, r17, r28
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3560 0x00 0x23 0x14 0x81 0x00 0x00 0x1c 0x22 EXTEND.u8 r17, r17; NOPV
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3568 0x00 0x19 0x00 0x3f 0xc7 0xeb 0x70 0x0e 0x78 0xba MOVA r25, #0; MOVX r28, #-1; MOV r27, el0
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3578 0x16 0x7f 0xc2 0x18 SEL.EQZ r31, r25, r28, r27
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3582 0xfd 0x6e 0x20 0x21 0x04 0x83 0x4f 0x74 0xa8 0xba LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3592 0x09 0x1e 0x00 0x29 0x44 0x83 0xa8 0x09 0x58 0xba MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3602 0x17 0x73 0xe2 0x18 SEL.EQZ r25, r29, r30, r27
+ 3606 0x15 0xf9 0x88 0x98 NE r28, r23, r24
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 3610 0x17 0x7b 0x3d 0x98 LSHL r29, r29, r19
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 3614 0xfd 0xde 0x20 0x00 0x00 0x03 0x0a 0x04 0x10 0xba LDA r23, [sp, #-20]; MOVXM r24, #1032
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3624 0xcc 0xe7 0xbf 0x3a 0xff 0x24 LSHL r19, r25, r19; ADD.NC r30, r26, #-1
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3630 0x14 0xcf 0xe6 0x18 MAC r7, r7, r19, r30
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3634 0xb4 0xd2 0x0b 0xa8 0x29 0x64 EXTEND.u8 r19, r22; MOV r23, #522
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3640 0xbd 0xb0 0x4d 0x21 0x01 0x64 SEL.EQZ r22, r23, r24, r27; MOV r26, #64
+ 3646 0x31 0xb5 0x1d 0xc2 0x39 0xe4 NE r6, r6, r26; MOV r27, eh0
+ 3652 0x11 0xcf 0x24 0x98 AND r7, r7, r18
+ 3656 0xbd 0xde 0x4d 0xa6 0x41 0xe4 SEL.EQZ r23, r23, r15, r27; MOV r27, r6
+ 3662 0x29 0x08 0x49 0x20 0x7d 0x64 SEL.EQZ r4, r5, r4, r27; MOV r18, #31
+ 3668 0x15 0xef 0x24 0x98 AND r23, r23, r18
+ 3672 0xdc 0x1e 0x00 0x20 0x42 0x6e 0x4f 0x70 0x58 0xba MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144
+ 3682 0x17 0xbd 0x22 0x18 SEL.EQZ r30, r30, r18, r27
+ 3686 0x12 0x05 0x00 0x2f 0xa9 0xfe 0x09 0x20 0x58 0xba MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288
+ 3696 0x14 0x20 0x52 0x18 SEL.EQZ r16, r16, r5, r27
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 3700 0x8e 0x09 0xfd 0xbd 0xf2 0xa4 MUL r24, r17, r4; ADD.NC r27, r29, r30
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 3706 0x84 0x3f 0xbd 0xc4 0x39 0xe4 LSHL r16, r16, r31; MOV r27, el1
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3712 0xfb 0xee 0xb7 0x6b 0x5f 0x5c ST r27, [sp, #-36]; MUL r26, r14, r26
+ 3718 0x10 0x84 0x32 0x18 SEL.EQZ r2, r2, r3, r27
+ 3722 0x13 0x7e 0x0c 0x98 LTU r31, r13, r0
+ 3726 0x15 0x31 0x8f 0x98 MUL r24, r20, r24
+ 3730 0x17 0xf7 0xc5 0x98 OR r27, r31, r28
+ 3734 0x10 0x03 0x07 0xee 0x95 0xb7 0xc0 0xee 0x89 0x00 0x58 0x76 MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256
+ 3746 0x1d 0x28 0x40 0xb7 0x39 0xe4 SEL.EQZ r20, r3, r20, r27; MOV eh0, r27
+ 3752 0x00 0x00 0x0f 0xac 0x0c 0x44 MOVXM r31, #1542
+ 3758 0xfd 0x12 0xb0 0x1f 0xb0 0x3c 0x89 0x3f 0xc9 0x3a ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1
+ 3768 0xed 0x8c 0x82 0x1c 0x91 0xad 0xff 0x92 0xcc 0x7f 0xc8 0x76 MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1
+ 3780 0x4d 0x5a 0x30 0x2b 0x57 0xef 0x70 0x8e 0x79 0x3a ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0
+ 3790 0x02 0xd9 0x02 0x1f 0x51 0xa9 0x4e 0x0e 0x00 0x58 0x58 0x76 MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 3802 0x51 0x62 0x3f 0xe3 0x24 0x5c ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 3808 0xfc 0x6e 0x22 0xef 0x91 0xab 0xce 0x0f 0x69 0x90 0x78 0x76 LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3820 0x22 0xf3 0x00 0x29 0xce 0x12 0x8c 0xff 0xc8 0xba MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3830 0xf7 0x1d 0x00 0x3b 0xea 0x73 0x70 0x8e 0x78 0xba MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3840 0xfc 0xee 0x2e 0xca 0x44 0x2c LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3846 0x04 0x1f 0x00 0x3f 0x39 0x93 0x69 0x90 0x78 0xba MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3856 0x17 0xc4 0x22 0x18 SEL.EQZ r2, r31, r2, r27
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3860 0x10 0xeb 0x51 0x98 SUB r21, r3, r21
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3864 0x47 0x8e 0x30 0x04 0x27 0x90 0x6f 0xc0 0x59 0x3a ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3874 0x17 0x38 0x32 0x18 SEL.EQZ r28, r28, r3, r27
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 3878 0x15 0xfe 0x7f 0x98 MUL r31, r23, r7
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 3882 0xfb 0xc6 0x2e 0x0c 0x64 0x2c LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3888 0x5d 0x8e 0x30 0x35 0x29 0x7c 0x80 0x28 0x59 0x3a ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3898 0x45 0x56 0x31 0x0d 0xe4 0x5c ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3904 0xfe 0x02 0x20 0x06 0x38 0xfe 0xa9 0xfc 0xa8 0xba LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3914 0xfd 0xc6 0x22 0x1f 0x11 0x80 0x05 0x06 0x06 0x7a LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3924 0x4f 0xce 0x30 0x00 0x00 0x3e 0x6f 0xf8 0x11 0x3a ST r19, [p2], #28; MOVXM r19, #65520
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3934 0x43 0xd2 0x30 0x3f 0x49 0xa4 0x4b 0xbf 0xc9 0x3a ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3944 0x43 0xc6 0x31 0x56 0x9c 0x5c ST r17, [p2], #4; MSC r21, r21, r2, r20
+ 3950 0x43 0x8a 0x3f 0x7a 0x81 0x5c ST r2, [p2], #4; ADD r30, r30, r20
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 3956 0x43 0xfa 0x38 0x73 0xe3 0x5c ST r30, [p2], #4; SUB r28, r16, r31
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 3962 0x43 0xd6 0x30 0x2d 0xf8 0x30 0x60 0x00 0x59 0x3a ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3972 0x43 0x8c 0x30 0x3e 0x20 0x7e 0x2c 0x7f 0xc9 0x3a ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1
+ 3982 0x43 0x8c 0x38 0xb8 0x0c 0x5c ST dc0, [p2], #4; MAC r14, r14, r17, r0
+ 3988 0x43 0xda 0x30 0x27 0x01 0x24 0x48 0x00 0x59 0x3a ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0
+ 3998 0x43 0xf2 0x30 0x05 0x1f 0x8f 0x70 0x0e 0x79 0x3a ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0
+ 4008 0x43 0x92 0x3b 0xb9 0xdf 0x5c ST r4, [p2], #4; MUL r14, r23, r14
+ 4014 0x43 0xc6 0x30 0x21 0x0f 0x8c 0x08 0x06 0x59 0x3a ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 4024 0x09 0x00 0x02 0x1e 0x11 0x9b 0x00 0x13 0x69 0x90 0x78 0x76 MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 4036 0xfd 0x16 0x20 0x14 0xa4 0x2c LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4042 0x11 0x63 0xaf 0x98 MUL r17, r5, r26
+ 4046 0x43 0x8c 0x30 0x07 0x08 0x6d 0x07 0xc8 0x59 0x3a ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56
+ 4056 0xfc 0x63 0x02 0x48 0x61 0xa0 0xf7 0xed 0xa8 0xc1 0xc8 0x76 MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7
+ 4068 0x41 0x0a 0x36 0xc0 0x7b 0x5c ST r2, [p2], m0; LSHL r16, r13, r3
+ 4074 0x43 0xda 0x38 0x8e 0x41 0x5c ST r22, [p2], #4; ADD r3, r17, r18
+ 4080 0x43 0xca 0x38 0xc8 0x9c 0x5c ST r18, [p2], #4; MSC r18, r18, r17, r4
+ 4086 0x43 0x92 0x32 0x94 0xdb 0x5c ST r4, [p2], #4; LSHL r5, r5, r6
+ 4092 0x43 0x8e 0x30 0x1a 0x38 0x04 0x0f 0xfd 0x59 0x3a ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3
+ 4102 0x10 0xc0 0x0e 0x98 ASHL r0, r3, r0
+ 4106 0x43 0xca 0x37 0x10 0x1f 0x5c ST r18, [p2], #4; MUL r4, r14, r0
+ 4112 0x43 0x8c 0x30 0x0c 0x3b 0x5c ST dc0, [p2], #4; LSHL r3, r0, r1
+ 4118 0xff 0xb6 0x22 0x1c 0x61 0x80 0x03 0xc6 0x31 0xfa LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3
+ 4128 0xff 0x3a 0x22 0x1c 0x91 0xba 0x70 0x30 0x28 0x3f 0xc8 0x76 LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1
+ 4140 0xfe 0xbe 0x22 0x1c 0x31 0x80 0x01 0x41 0xaf 0xfa LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26
+ 4150 0x43 0x8e 0x30 0x50 0x00 0x5c ST r3, [p2], #4; RET lr
+.delay_slot
+ 4156 0x0a 0x5c 0xf1 0x98 ST r7, [p2], #20
+.delay_slot
+ 4160 0x0a 0x1c 0x11 0x98 ST r0, [p2], #4
+.delay_slot
+ 4164 0x0a 0x1c 0x51 0x98 ST r2, [p2], #4
+.delay_slot
+ 4168 0x0a 0x04 0x51 0x98 ST r2, [p2]
+.delay_slot
+ 4172 0x42 0x8a 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r2, [p2, #4]; PADDXM [sp], #-64
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0
+
+.text_segment PM 4192
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.function_start
+ 4192 0x00 0x03 0x82 0x84 0x8b 0x01 0x80 0x08 0x0a 0x60 0x78 0x76 MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2
+ 4204 0x00 0x06 0x88 0x28 0x28 0x34 0x01 0x36 0x00 0x21 0x20 0x09 0x60 0x7e MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4
+ 4218 0x63 0x94 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA dn1, [p3], #4; MOVXM p4, #509032
+ 4228 0x63 0x90 0xd0 0x00 0x00 0x04 0x78 0x78 0x10 0xba LDA m1, [p3], #4; MOVXM ls, #4336
+ 4238 0x60 0x80 0xd0 0x00 0x00 0x05 0xb8 0x90 0x10 0xba LDA m0, [p3]; MOVXM le, #4384
+ 4248 0x7a 0x82 0xd1 0x00 0x01 0x54 LDA r0, [p3, #-12]; MOV dj0, #0
+ 4254 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4]
+ 4258 0x00 0x00 NOPX
+ 4260 0x00 0x00 NOPX
+ 4262 0x00 0x0a 0x80 0x85 0x01 0xf4 VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1
+ 4268 0x3e 0x30 0x14 0x18 VLDB.POP.512.2D x0, [p0, lf0, r24, d1]
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4272 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4276 0x00 0x0a 0x8a 0xe0 0xfd 0x34 VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4282 0xc6 0x02 0x80 0xf5 0x00 0x1c VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4288 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4292 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4294 0x00 0x2c 0xf0 0x00 0x54 0x00 0x01 0xa5 0x7e 0xba NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4304 0x00 0x2c 0xfc 0x60 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV
+.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4336 0x00 0x2c 0xf8 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4352 0x00 0x2c 0xf0 0x00 0xad 0x80 0x03 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4368 0x00 0x2c 0xfc 0x60 0x29 0x00 0x03 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV
+.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4384 0x00 0x2c 0xf0 0x00 0x23 0x00 0x03 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4400 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4402 0x0d 0x80 0x03 0x18 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4406 0x20 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4414 0x18 0x81 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x0
+ 4418 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0]
+ 4422 0xb0 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1
+ 4430 0x20 0x00 0x60 0x00 0x40 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0
+ 4438 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0]
+ 4442 0xb0 0x00 0x60 0x50 0x00 0x5c VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr
+.delay_slot
+ 4448 0x09 0x00 0x03 0x18 VST.FLUSH.512.CONV [p2, sf, r26]
+.delay_slot
+ 4452 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0]
+.delay_slot
+.swstall delay_slot
+ 4456 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4458 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4460 0x00 0x00 NOPX
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0
+
+.text_segment PM 4464
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.function_start
+ 4464 0xf5 0xe0 0x86 0x3f 0x20 0x00 0x80 0x00 0x00 0x0e 0x91 0x11 0x60 0x7e MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128
+ 4478 0x00 0x73 0x07 0xf1 0x95 0xbf 0xc5 0x0a 0x2b 0x60 0x78 0x76 MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3
+ 4490 0x00 0x19 0x07 0xda 0x35 0x81 0x10 0x29 0x34 0x47 0x08 0x76 MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28
+ 4502 0x40 0xca 0xd7 0xf5 0x35 0x80 0x40 0x03 0xa8 0x00 0x10 0x76 LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216
+ 4514 0x0b 0x18 0x87 0xfd 0xd5 0x80 0x7f 0xff 0xef 0xff 0x90 0x76 MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431
+ 4526 0x00 0xb4 0x07 0xe1 0xb5 0x81 0x61 0x0a 0x07 0xec 0x58 0x76 MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20
+ 4538 0x01 0x95 0x07 0xed 0xf5 0x87 0x77 0xca 0x87 0xc4 0x58 0x76 MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60
+ 4550 0xff 0x73 0xb0 0x03 0x80 0x40 0x50 0x02 ST p7, [sp, #-8]; MOV m7, #64
+ 4558 0x0f 0xe4 0x3d 0x98 ST lr, [sp, #-28]
+ 4562 0x00 0x00 NOPX
+ 4564 0x17 0x59 0x20 0x98 ADD r12, r29, r18
+ 4568 0x41 0x32 0x36 0x77 0x9b 0x5c ST r12, [p2], m0; LSHL r29, r12, r28
+ 4574 0x5b 0xf9 0x5e 0xf2 0x2f 0x2c LDA.u8 r30, [p2], #-3; EQ r28, r29, r17
+ 4580 0x02 0xc9 0x2a 0x98 LDA.u8 r9, [p2], m6
+ 4584 0x00 0x00 NOPX
+ 4586 0x00 0x00 NOPX
+ 4588 0x00 0x00 NOPX
+ 4590 0x00 0x00 NOPX
+ 4592 0x00 0x00 NOPX
+ 4594 0x17 0x77 0xec 0x98 LTU r27, r29, r30
+ 4598 0x16 0x5d 0x32 0x18 SEL.EQZ r14, r25, r19, r27
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4602 0x17 0xf6 0xcc 0x98 LTU r27, r31, r12
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4606 0x51 0x70 0xee 0xb7 0xcf 0x2c ST.s8 r28, [p2], m4; EQ r13, r29, r30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4612 0x13 0x7f 0x1d 0x98 LSHL r31, r13, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4616 0x16 0x58 0xe2 0x18 SEL.EQZ r12, r25, r14, r27
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4620 0x17 0xf9 0xc5 0x98 OR r28, r31, r28
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4624 0x8e 0xfd 0x9e 0x3c 0x62 0xa4 LTU r27, r17, r30; ADD.NC r28, r28, r12
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4630 0x16 0x79 0xc2 0x18 SEL.EQZ r28, r25, r28, r27
+ 4634 0x14 0x7f 0xcc 0x98 LTU r31, r17, r28
+ 4638 0x55 0x7e 0x3e 0xf7 0xd1 0x5c ST r31, [p2], m5; NE r29, r29, r30
+ 4644 0x5d 0x79 0x54 0xb2 0x31 0x2c LDA.u8 r30, [p2], m7; NE r12, r9, r17
+ 4650 0x00 0x00 NOPX
+ 4652 0x00 0x00 NOPX
+ 4654 0x00 0x00 NOPX
+ 4656 0x00 0x00 NOPX
+ 4658 0x00 0x00 NOPX
+ 4660 0x00 0x00 NOPX
+ 4662 0xf5 0xad 0x1f 0xbe 0xfc 0x24 NE r22, r30, r22; ADD.NC r31, r30, #-4
+ 4668 0x60 0x09 0x40 0x40 0x01 0x84 JNZ r12, #4736
+.delay_slot
+ 4674 0x17 0x93 0x48 0x98 NE r9, r30, r20
+.delay_slot
+ 4678 0x17 0xfe 0x90 0x18 EXTEND.u8 r31, r31
+.delay_slot
+ 4682 0x12 0x6d 0x64 0x98 AND r22, r9, r22
+.delay_slot
+ 4686 0x17 0xef 0x7c 0x98 LTU r23, r31, r23
+.delay_slot
+ 4690 0x15 0xe1 0x64 0x98 AND r16, r23, r22
+ 4694 0xe8 0x09 0x40 0x40 0x01 0x84 JNZ r29, #4736
+.delay_slot
+ 4700 0x0f 0xeb 0x1d 0x98 ST p6, [sp, #-24]
+.delay_slot
+.swstall delay_slot
+ 4704 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4706 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4708 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4710 0x00 0x00 NOPX
+ 4712 0x00 0x3b 0x00 0x00 0x02 0x56 0x00 0x00 0x20 0xba MOVA r27, #1; J #4784
+.delay_slot
+ 4722 0x18 0x19 0x9c 0xf8 MOV el0, r25
+.delay_slot
+ 4726 0x10 0x26 0x05 0x18 MOVX r19, #1
+.delay_slot
+.swstall delay_slot
+ 4730 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4732 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4734 0x00 0x00 NOPX
+.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ 4736 0x00 0x95 0x07 0xeb 0x1d 0xab 0xbf 0x3c 0x0c 0xce 0x78 0x76 MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25
+ 4748 0x17 0xab 0x5d 0x98 LSHL r21, r30, r21
+ 4752 0x15 0x6b 0x92 0x18 SEL.EQZ r21, r21, r25, r27
+ 4756 0x14 0xf7 0xe7 0x98 EQ r27, r19, r30
+ 4760 0xac 0xf2 0x4d 0xb0 0x41 0xe4 SEL.EQZ r19, r21, r25, r27; MOV r27, r16
+ 4766 0x16 0x67 0x32 0x18 SEL.EQZ r19, r25, r19, r27
+ 4770 0x17 0x29 0x44 0x98 AND r20, r28, r20
+ 4774 0x15 0x36 0xf0 0x18 NEZ r27, r20
+ 4778 0x00 0x2c 0xf9 0xcf 0x8b 0x2c NOPA; OR r19, r19, r28
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320
+ 4784 0x01 0x90 0x82 0x6f 0x71 0xba 0x02 0x5c 0x10 0x00 0x60 0x76 MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832
+.delay_slot
+ 4796 0x02 0x8a 0x67 0x18 ST.s8 r19, [p2], m4
+.delay_slot
+.swstall delay_slot
+ 4800 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4802 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4804 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 4806 0x00 0x00 NOPX
+ 4808 0x00 0xff 0xfa 0x3f 0xfe 0x44 MOVXM r20, #16777215
+ 4814 0x14 0xa5 0x44 0x98 AND r18, r18, r20
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 4818 0x00 0x2c 0xf6 0xec 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r18, [p3, #28]; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4832 0x51 0xd2 0xd0 0x27 0x44 0x82 0xcf 0xfd 0x58 0xba LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 4842 0x00 0x52 0x00 0x29 0x5f 0xfa 0x00 0x24 0x58 0xba MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4852 0x51 0x5a 0xd7 0xd0 0x2d 0xab 0x6b 0x26 0x07 0xcc 0x58 0x76 LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4864 0x51 0x5e 0xd7 0xde 0xd5 0xbf 0x37 0xea 0x00 0xc4 0x58 0x76 LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196
+ 4876 0x02 0xff 0xb6 0x98 LDA r29, [p2], #-4
+ 4880 0x02 0x8b 0xf6 0x98 LDA r31, [p2], m4
+ 4884 0x01 0x06 0xb6 0x98 LDA r21, [p1]
+ 4888 0x00 0xd2 0xda 0x26 0x5b 0x2c LDA r20, [p0]; LSHL r9, r20, r18
+ 4894 0x04 0x07 0xd6 0x98 LDA r30, [p4]
+ 4898 0x15 0xad 0x2d 0x98 LSHL r22, r22, r18
+ 4902 0x00 0x00 NOPX
+ 4904 0x17 0x67 0x3e 0x98 ASHL r19, r29, r19
+ 4908 0x17 0xe3 0x18 0x98 NE r17, r31, r17
+ 4912 0x88 0x09 0xd0 0x40 0x01 0x84 JNZ r17, #5024
+.delay_slot
+ 4918 0xbd 0xa5 0xba 0xb5 0xb2 0xa4 LSHL r22, r23, r18; ADD.NC r21, r21, r22
+.delay_slot
+ 4924 0x9d 0x65 0xb0 0x95 0xb2 0xa4 LSHL r21, r19, r18; ADD.NC dn0, r21, r22
+.delay_slot
+ 4930 0xfa 0x84 0xb0 0x01 0xca 0x68 0xa0 0x02 ST dn0, [sp, #-44]; ADD.NC r14, r9, r20
+.delay_slot
+ 4938 0x1b 0xd0 0x80 0xf8 MOV r15, dn0
+.delay_slot
+ 4942 0x1e 0x6a 0xf9 0x58 ADD.NC p6, r21, r30
+ 4946 0x00 0x07 0xce 0xc8 0xd0 0x44 MOVXM p7, #509032
+ 4952 0xe0 0xc4 0x50 0xb4 0x80 0x2c LDA.s8 r17, [p7]; MOVX vaddSign0, #1
+ 4958 0x00 0x00 NOPX
+ 4960 0xff 0x7f 0x0a 0x20 0x00 0x44 MOVXM r20, #-8454144
+ 4966 0x18 0x02 0x91 0x78 VINSERT.32 x0, x0, #0, r20
+ 4970 0x1d 0x15 0xe0 0xf8 MOV r20, sp
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 4974 0x1f 0x6a 0x5f 0x18 ADD.NC p7, r20, #-66
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 4978 0xe0 0xc6 0xe0 0x01 0x25 0xd4 ST.s16 r17, [p7]; VMOV bmll0, x0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4984 0x14 0x7a 0x80 0x18 MOVX crRnd, r17
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4988 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4992 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4994 0x1c 0x41 0x01 0xb8 VEXTRACT.16 r17, x0, #0, vaddSign0
+ 4998 0x00 0x00 NOPX
+ 5000 0x00 0x00 NOPX
+ 5002 0x07 0x06 0x32 0x98 LDA.s16 r17, [p7]
+ 5006 0x00 0x00 NOPX
+ 5008 0x00 0x00 NOPX
+ 5010 0x00 0x00 NOPX
+ 5012 0x00 0x00 NOPX
+ 5014 0x00 0x00 NOPX
+ 5016 0x00 0x00 NOPX
+ 5018 0x00 0x2c 0xff 0xa4 0x6b 0x0c NOPA; ST r17, [sp, #-48]
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5024 0x0b 0x90 0x81 0x8e 0x0b 0x00 0x01 0xf1 0xb2 0x34 0x10 0x76 MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5036 0x51 0x45 0x50 0x84 0x8b 0x33 0x19 0x92 0x68 0x0b 0x58 0x76 LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5048 0x61 0x96 0x00 0x39 0xb9 0x65 0xaa 0x60 0x78 0xba MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5058 0x03 0x06 0x67 0x18 ST.s8 r19, [p3]
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5062 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 5064 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5070 0x14 0x6b 0x2d 0x98 LSHL r21, r17, r18
+.delay_slot
+ 5074 0x1f 0x6a 0xf9 0x58 ADD.NC p7, r21, r30
+.delay_slot
+ 5078 0x16 0x63 0x11 0x98 SUB r17, r25, r17
+.delay_slot
+ 5082 0x8c 0x65 0xba 0x2c 0x35 0x64 LSHL r17, r17, r18; MOV r20, #781
+.delay_slot
+ 5088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2c 0x9a 0x11 0x8b 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV
+.return_address
+ 5104 0x07 0xd4 0x99 0x18 LDA p1, [sp, #-44]
+.no_stack_arguments
+ 5108 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192
+.delay_slot
+.swstall delay_slot
+ 5114 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5116 0x00 0x00 NOPX
+.delay_slot
+ 5118 0x1b 0x56 0x90 0x18 ADD.NC r13, r13, #32
+.delay_slot
+ 5122 0x1a 0x66 0xa0 0xf8 MOV p2, r13
+.delay_slot
+ 5126 0x00 0x2c 0xf0 0x8f 0x0b 0x00 0x00 0x00 0x00 0x7a NOPA; MOVS p0, r15; NOPX
+.return_address
+ 5136 0xd6 0x9a 0x80 0x01 0x37 0xea 0x33 0x63 0x08 0xba MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116
+ 5146 0x83 0x84 0xd0 0x34 0x62 0x2c LDA dn0, [p4], #4; MOVX r13, #12
+ 5152 0x04 0x1c 0x46 0x98 LDA dj0, [p4], #4
+ 5156 0x04 0x1e 0x26 0x98 LDA dn4, [p4], #4
+ 5160 0x04 0x1e 0x46 0x98 LDA dj4, [p4], #4
+ 5164 0x04 0x1c 0x06 0x98 LDA m0, [p4], #4
+ 5168 0x04 0x1c 0x66 0x98 LDA dc0, [p4], #4
+ 5172 0x04 0x1e 0x66 0x98 LDA dc4, [p4], #4
+ 5176 0x04 0x1e 0xd6 0x98 LDA r22, [p4], #4
+ 5180 0x04 0x1e 0x36 0x98 LDA r17, [p4], #4
+ 5184 0x04 0x1f 0x96 0x98 LDA r28, [p4], #4
+ 5188 0x04 0x1e 0xb6 0x98 LDA r21, [p4], #4
+ 5192 0x04 0x1e 0xf6 0x98 LDA r23, [p4], #4
+ 5196 0x04 0x1d 0x9e 0x98 LDA p3, [p4], #4
+ 5200 0x04 0x1d 0x26 0x98 LDA dn2, [p4], #4
+ 5204 0x04 0x1c 0xa6 0x98 LDA dn1, [p4], #4
+ 5208 0x04 0x1c 0xc6 0x98 LDA dj1, [p4], #4
+ 5212 0x04 0x1e 0xa6 0x98 LDA dn5, [p4], #4
+ 5216 0x04 0x1f 0xd6 0x98 LDA r30, [p4], #4
+ 5220 0x04 0x1f 0xb6 0x98 LDA r29, [p4], #4
+ 5224 0x04 0x1c 0xe6 0x98 LDA dc1, [p4], #4
+ 5228 0x04 0xc2 0x4a 0x98 LDA.u8 r18, [p4, dj6]
+ 5232 0x07 0xd2 0x91 0x18 LDA r20, [sp, #-48]
+ 5236 0x04 0x04 0x56 0x98 LDA r2, [p4]
+ 5240 0x00 0x00 NOPX
+ 5242 0x00 0x00 NOPX
+ 5244 0x00 0x00 NOPX
+ 5246 0x00 0x00 NOPX
+ 5248 0x14 0xe7 0x2c 0x98 LTU r19, r19, r18
+ 5252 0x98 0x0c 0x10 0x40 0x01 0x84 JNZ r19, #6176
+.delay_slot
+ 5258 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.delay_slot
+ 5264 0x02 0x05 0xa7 0x18 ST.s8 r13, [p2]
+.delay_slot
+ 5268 0x1c 0xd1 0x72 0xf8 VBCST.16 x9, r20
+.delay_slot
+.swstall delay_slot
+ 5272 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 5274 0x00 0x00 NOPX
+ 5276 0xfb 0x43 0x20 0x1b 0xb9 0x3f 0x80 0x84 0x58 0xba LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132
+ 5286 0x00 0x13 0x00 0x3d 0x20 0x0a 0x00 0x3c 0x58 0xba MOVA r19, #0; MOVX r18, #-128; MOV m4, #60
+ 5296 0xf8 0x14 0x80 0x01 0xa0 0x0b 0xe4 0xd0 0x78 0xba MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19
+ 5306 0xef 0x98 0x82 0x1c 0x4b 0x1b 0xd4 0x01 0xa7 0xc0 0x78 0x76 MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5318 0xfa 0x96 0x26 0x1c 0x4b 0x01 0xf7 0x89 0xe8 0x07 0x58 0x76 LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5330 0xfb 0xca 0x20 0x00 0x00 0x05 0x32 0xa0 0x10 0xba LDA r18, [sp, #-36]; MOVXM p2, #5440
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5340 0xfc 0x36 0x20 0x34 0x69 0x12 0x8b 0x0c 0x58 0xba LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780
+ 5350 0xfc 0x87 0x29 0xd7 0x20 0x01 0x90 0x0b 0x08 0x00 0x58 0xb6 LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0
+ 5362 0x04 0x88 0x16 0x98 LDA r0, [p4], m4
+ 5366 0x04 0xab 0x26 0x98 LDA dn6, [p4], m5
+ 5370 0x04 0x2f 0x76 0x98 LDA r27, [p4], #8
+ 5374 0x04 0x1e 0x86 0x98 LDA m5, [p4], #4
+ 5378 0x04 0x8a 0xc6 0x98 LDA dj5, [p4], m4
+ 5382 0x04 0x9e 0x06 0x98 LDA m4, [p4], #-28
+ 5386 0x04 0x1c 0x36 0x98 LDA r1, [p4], #4
+ 5390 0x99 0x02 0xdd 0x06 0x02 0x94 LDA r0, [p4], m6; ADD.NC dj6, r6, r0
+ 5396 0x04 0x14 0x76 0x98 LDA r3, [p4, #4]
+ 5400 0x04 0x04 0x96 0x98 LDA r4, [p4]
+ 5404 0x19 0xda 0x00 0xf8 MOV r7, m5
+ 5408 0x1a 0x83 0x99 0x58 ADD.NC dj2, r7, r6
+ 5412 0x1c 0x1b 0x00 0xf8 MOV r16, dj5
+ 5416 0x1a 0x0d 0x99 0x58 ADD.NC m2, r27, r6
+ 5420 0x1e 0x03 0xe0 0x18 ADD.NC m6, r7, #-64
+ 5424 0x18 0xff 0xee 0x10 0xc0 0x24 ADD r3, r3, #-1; ADD.NC m7, r16, #-64
+ 5430 0x00 0x2c 0xf0 0x00 0x10 0x00 0x82 0x80 0x7e 0xba NOPA; NOPB; MOV m1, dj2
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976
+.loop_nesting 1
+ 5440 0xc3 0x85 0x71 0x85 0x0b 0x04 0xe7 0xec 0x33 0x90 0x78 0x76 VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14
+ 5452 0x22 0x81 0x78 0x28 0x2b 0x0e 0x4b 0x02 0x33 0x98 0xa0 0xf6 VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12
+ 5464 0xa0 0x39 0x78 0x28 0x2f 0x5a 0x4b 0x03 0xc6 0x80 0x70 0xf6 VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6
+ 5476 0xd9 0x0d 0x74 0x03 0x2b 0x53 0x0b 0x01 0x82 0x00 0x70 0xf6 VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2
+ 5488 0x71 0x41 0x74 0x12 0xd4 0x01 0xc0 0x00 0x5e 0xba VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0
+ 5498 0xc3 0x95 0x78 0x28 0x28 0x00 0x00 0x05 0xbb 0x40 0x10 0xb6 VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760
+ 5510 0xdd 0x1d 0x78 0x28 0x28 0x00 0x00 0x04 0x7b 0x28 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712
+ 5522 0x80 0xb5 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24]
+ 5528 0xc3 0xa5 0x78 0x22 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]
+ 5534 0xd9 0x2d 0x78 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5540 0x22 0x81 0x78 0x28 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5546 0x83 0xbd 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5552 0x80 0xcd 0x74 0x11 0x14 0x02 0x9a 0xc3 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5562 0x80 0xc5 0x78 0x28 0x2c 0x98 0x8b 0x01 0x9a 0xc1 0xe0 0xf6 VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5574 0x02 0x81 0x73 0x00 0x54 0x1d 0x48 0x14 0xe9 0x4a VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5584 0xcf 0x35 0x76 0x94 0x96 0x00 0x00 0x5c 0x58 0x07 0x49 0x2c 0xe9 0x6e VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5598 0x82 0xbd 0x7a 0x38 0x96 0x00 0x00 0x4c 0x90 0x3e 0x4a 0x55 0x09 0x6e VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5612 0x71 0x01 0x74 0x98 0x96 0x00 0x00 0x54 0x90 0x1e 0xf8 0x60 0x3d 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5626 0x22 0x81 0x70 0x04 0xf9 0x64 0x3d 0x62 VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5634 0xa0 0x09 0x70 0x04 0xfa 0x88 0x3d 0x62 VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5642 0x71 0x01 0x70 0x04 0x4b 0x6d 0x09 0x62 VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5650 0x22 0x81 0x74 0x01 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5656 0x3c 0x11 0x14 0x18 VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5660 0xa0 0x09 0x78 0x28 0x2d 0x72 0x7d 0x82 0xfb 0x8c 0x3d 0x66 VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5672 0x71 0x01 0x74 0x14 0x14 0x1d 0xa0 0x06 0x29 0x4a VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5682 0x22 0x81 0x74 0x01 0x28 0x00 0x00 0x58 0xaa 0x0f 0xa2 0x46 0x09 0x4e VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5696 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248
+.loop_nesting 2
+.begin_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5712 0xa0 0x09 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x01 0x49 0x01 0xed 0x1b 0x50 0x4b VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5728 0x71 0x01 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x00 0x31 0x4b VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5744 0x22 0x81 0x74 0x01 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x12 0x30 0x4b VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296
+.end_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5760 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20
+.loop_nesting 1
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5776 0xa0 0x09 0x7c 0xbc 0x96 0x00 0x00 0x54 0x90 0x1e 0xa3 0x6a 0x09 0x6e VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5790 0x71 0x01 0x7e 0x1c 0x96 0x00 0x00 0x7c 0x38 0x07 0xa0 0x06 0x29 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5804 0x61 0x91 0x61 0x55 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5814 0x6a 0xc1 0x61 0x92 0x07 0xc4 0xa1 0x2a 0x29 0x4a MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5824 0xeb 0x81 0x62 0x92 0x03 0xc4 0xa3 0x6a 0x09 0x4a MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5834 0xb3 0x91 0x6f 0x57 0x22 0x8f 0x00 0xe6 0xa0 0x06 0x29 0x66 PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5846 0x93 0x91 0x62 0x06 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5856 0x02 0x92 0x03 0xc6 0xa1 0x2a 0x29 0x62 VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5864 0x01 0x92 0x07 0xc6 0xa3 0x6a 0x09 0x62 VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20
+ 5872 0x1f 0x8b 0x00 0xf8 MOV dj7, dj5
+ 5876 0x03 0x0b 0xa0 0xe6 0xa1 0x2a 0x29 0x62 MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20
+ 5884 0x03 0x88 0xa0 0xe6 0xa0 0x06 0x29 0x62 MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20
+ 5892 0x00 0xf7 0x23 0x05 0x00 0xe6 0xa3 0x6a 0x09 0x4a PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20
+ 5902 0x71 0x89 0x6e 0xd7 0x25 0x82 0xa0 0xe6 0xa2 0x46 0x09 0x66 PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20
+ 5914 0x62 0x89 0x60 0x03 0xc5 0x80 0x70 0x02 MOVS dc3, dc5; MOV dj7, dj5
+ 5922 0xa0 0x41 0x60 0x01 0x81 0x00 0x70 0x02 MOVS dc5, r2; MOV m3, m1
+ 5930 0xb2 0x12 0xc0 0x00 0x87 0x50 0x70 0x02 VCONV.bf16.fp32 x11, cml1; MOV m1, r29
+ 5938 0xa2 0x02 0xc0 0x02 0xc7 0x90 0x70 0x02 VCONV.bf16.fp32 x10, cml0; MOV dj5, r30
+ 5946 0x13 0x91 0x61 0x3b 0x90 0x01 0xc8 0x60 0x76 0xba PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0
+ 5956 0x62 0x0a 0xc0 0x00 0x83 0x00 0x70 0x02 VCONV.bf16.fp32 x6, cmh0; MOV m1, m3
+ 5964 0x52 0x22 0xc0 0x57 0x20 0x24 0x02 0xfa 0x00 0x00 0x60 0x36 PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096
+.delay_slot
+ 5976 0x72 0x1a 0xc0 0x00 0xa9 0x60 0x70 0x02 VCONV.bf16.fp32 x7, cmh1; MOV r5, p1
+.delay_slot
+ 5984 0x82 0x32 0xc0 0x03 0xa7 0xc0 0x70 0x02 VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7
+.delay_slot
+ 5992 0x12 0x3a 0xc5 0x2b 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5
+.delay_slot
+ 6002 0x22 0x2a 0xc0 0x02 0xc2 0x80 0x70 0x02 VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2
+.delay_slot
+ 6010 0xe1 0x89 0x60 0x00 0x4d 0xc0 0x70 0x02 MOVS dc7, dc3; MOV r2, dc5
+ 6018 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9
+ 6022 0x1b 0xbc 0xec 0xf8 VMAX_LT.bf16 x7, r16, x7, x9
+ 6026 0x3c 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9
+ 6034 0xa2 0xba 0x60 0x01 0xda 0x76 0x70 0x02 VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9
+ 6042 0x20 0xd2 0x60 0x00 0x02 0xfe 0x00 0x00 0x21 0x3a VST x10, [p1]; J #6128
+.delay_slot
+ 6052 0x22 0xba 0x60 0x02 0xa2 0x76 0x70 0x02 VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9
+.delay_slot
+ 6060 0x1b 0x8c 0xec 0xf8 VMAX_LT.bf16 x7, r16, x1, x9
+.delay_slot
+ 6064 0x00 0xd2 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9
+.delay_slot
+ 6072 0x02 0xba 0x60 0x00 0x8a 0x76 0x70 0x02 VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9
+.delay_slot
+ 6080 0x00 0x2c 0xf0 0x00 0x24 0xa2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632
+ 6096 0x09 0xe0 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p1, dj7]
+ 6100 0x0d 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p5, #64]
+ 6104 0x09 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p1]
+ 6108 0x09 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p1, #64]
+ 6112 0x08 0x06 0x13 0x18 VST x8, [p0]
+ 6116 0x08 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p0, #64]
+ 6120 0x94 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664
+ 6128 0xe2 0x92 0x6f 0x57 0x20 0x06 0x35 0x01 0x40 0x00 0x58 0x36 PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0
+.delay_slot
+ 6140 0x1b 0x44 0x80 0xf8 MOV dn3, dn2
+.delay_slot
+ 6144 0x1a 0x49 0xa0 0xf8 MOV dn2, r19
+.delay_slot
+ 6148 0xeb 0x72 0x05 0x1e 0x01 0xf4 PADDB.3D [p7], d2; MOV dj2, dj7
+.delay_slot
+ 6154 0x1a 0x4e 0x80 0xf8 MOV dn2, dn7
+.delay_slot
+.swstall delay_slot
+ 6158 0x00 0x00 NOPX
+.loop_nesting 0
+ 6160 0x00 0x0d 0x58 0x00 0x00 0x84 J #6832
+.delay_slot
+.swstall delay_slot
+ 6166 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6168 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6170 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6172 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6174 0x00 0x00 NOPX
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712
+ 6176 0xfb 0x7e 0x22 0x0c 0x8b 0x04 0xe1 0x08 0xb3 0x90 0x78 0x76 LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14
+ 6188 0x07 0x90 0x82 0x56 0x0b 0x1b 0xd4 0x03 0x62 0x40 0x78 0x76 MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2
+ 6200 0x07 0x94 0x00 0x19 0x31 0x89 0x05 0xd0 0x78 0xba MOVA r20, #60; MOVX r19, #780; MOV m2, r23
+ 6210 0xef 0x98 0x86 0x5c 0x0b 0x01 0x20 0xca 0xc7 0x90 0x78 0x76 MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30
+ 6222 0xfa 0x83 0x25 0x02 0x0b 0x01 0x90 0x08 0x87 0x50 0x78 0x76 LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29
+ 6234 0xfb 0xd6 0x20 0x01 0x80 0x0b 0x45 0x50 0x78 0xba LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21
+ 6244 0xfc 0x36 0x20 0x00 0x00 0x05 0x34 0x58 0x10 0xba LDA r13, [sp, #-32]; MOVXM p2, #6320
+ 6254 0xfc 0x87 0x26 0xdf 0x72 0x94 LDA lr, [sp, #-28]; ADD.NC p3, r31, r14
+ 6260 0x03 0x1d 0xc6 0x98 LDA dj3, [p3], #4
+ 6264 0x03 0x8a 0x06 0x98 LDA m4, [p3], m4
+ 6268 0x03 0x9e 0x86 0x98 LDA m5, [p3], #-28
+ 6272 0x03 0x1e 0xd6 0x98 LDA r22, [p3], #4
+ 6276 0x03 0xca 0xf6 0x98 LDA r23, [p3], m6
+ 6280 0x03 0x17 0xb6 0x98 LDA r29, [p3, #4]
+ 6284 0x03 0x07 0x96 0x98 LDA r28, [p3]
+ 6288 0x00 0x00 NOPX
+ 6290 0x1f 0x98 0x00 0xf8 MOV r30, m4
+ 6294 0x1e 0x07 0x00 0xf8 MOV m6, dj3
+ 6298 0x1f 0xdc 0x00 0xf8 MOV r31, m6
+ 6302 0x1b 0x0f 0xe0 0x18 ADD.NC m3, r31, #-64
+ 6306 0xef 0x7f 0xee 0x1e 0xc0 0x24 ADD r29, r29, #-1; ADD.NC m7, r30, #-64
+ 6312 0x00 0x2b 0x60 0x03 0xc7 0x90 0x70 0x02 NOPS; MOV dj7, r30
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856
+.loop_nesting 1
+ 6320 0xc3 0x85 0x7a 0x28 0x28 0x00 0x00 0x8f 0x96 0x02 0x71 0x81 0x60 0x7e VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496
+ 6334 0xcd 0x0d 0x7a 0x28 0x28 0x00 0x00 0x05 0xbc 0xc8 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544
+ 6346 0x02 0x81 0x76 0x05 0x28 0x05 0xe9 0x6e 0xbf 0x3f 0x48 0xb6 VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3
+ 6358 0x55 0x59 0x73 0x01 0x14 0x01 0x47 0x90 0x7e 0xba VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30
+ 6368 0xc3 0x95 0x76 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25]
+ 6374 0xdd 0x1d 0x7a 0x21 0xa8 0x3c VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0]
+ 6380 0xc3 0xa5 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25]
+ 6386 0xcd 0x2d 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]
+ 6392 0xc3 0xb5 0x76 0x00 0xa8 0x3c VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25]
+ 6398 0xdd 0x3d 0x76 0x03 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25]
+ 6404 0x68 0x45 0x76 0x03 0xa8 0x3c VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25]
+ 6410 0x68 0x4d 0x75 0x12 0x14 0x01 0x69 0x2d 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22
+ 6420 0x02 0x81 0x75 0x14 0x14 0x02 0xa9 0x2f 0xee 0xba VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23
+ 6430 0x55 0x01 0x7a 0x28 0x2a 0x11 0xdb 0xc2 0x48 0x0b 0x69 0x66 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9
+ 6442 0x02 0x81 0x75 0x11 0xdf 0xc2 0x49 0x35 0x69 0x4a VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9
+ 6452 0x4a 0x49 0x69 0x48 VMAC.f dm2, dm2, ex4, ex11, r9
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 6456 0x4b 0x75 0x69 0x48 VMAC.f dm3, dm3, ex10, ex11, r9
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 6460 0x06 0x00 0xaa 0x8b 0x5f 0xc6 0xa1 0x84 0x3d 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6470 0x03 0x01 0x94 0x00 0xa0 0x80 0x3d 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6478 0x03 0x01 0xd4 0x00 0xa2 0x88 0x3d 0x62 VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6486 0x55 0x01 0x75 0x12 0x14 0x1d 0xa3 0x8c 0x3d 0x4a VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032
+.loop_nesting 2
+.begin_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6496 0xa2 0x82 0x82 0x16 0xb7 0xb4 VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6502 0x0a 0x28 0x2a 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x4a VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6512 0x06 0x00 0xa9 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6522 0x03 0x01 0x94 0x00 0x9b 0x68 0x09 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6530 0x02 0x81 0x76 0x03 0xa8 0x00 0x00 0x00 0x05 0x6c 0x9a 0x46 0x09 0x6e VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080
+.end_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6544 0x55 0x01 0x7a 0x24 0x28 0x01 0x5b 0x00 0x00 0x01 0x45 0xaf 0xe8 0x00 0x00 0xe1 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV
+.loop_nesting 1
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6560 0x07 0x0c 0xff 0x97 0x25 0x9c 0x8b 0x00 0x85 0xad 0xe0 0xf6 PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6572 0x93 0x91 0x6f 0x17 0x22 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x66 PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6584 0x73 0x91 0x6f 0x97 0x21 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x66 PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6596 0x02 0x88 0xa0 0xe6 0x9b 0x68 0x09 0x62 MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6604 0x02 0xb7 0x20 0x9b 0x80 0xe6 0x9a 0x46 0x09 0x4a PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19
+ 6614 0x19 0x0b 0x5b 0xd8 VSHUFFLE ex2, ex1, ex6, r22
+ 6618 0x1a 0x8b 0x5f 0xd8 VSHUFFLE ex5, ex1, ex6, r23
+ 6622 0x01 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x62 VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19
+ 6630 0x02 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x62 VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19
+ 6638 0x9a 0x46 0x09 0x48 VMAC.f dm2, dm2, ex3, ex0, r19
+ 6642 0x9b 0x68 0x09 0x48 VMAC.f dm3, dm3, ex4, ex0, r19
+ 6646 0x00 0x00 NOPX
+ 6648 0x00 0x00 NOPX
+ 6650 0x0d 0x10 0x16 0x18 VCONV.bf16.fp32 x10, cml0
+ 6654 0x0d 0x90 0x96 0x18 VCONV.bf16.fp32 x11, cml1
+ 6658 0x12 0x1a 0xc0 0x2a 0x03 0x4e 0x00 0x00 0x61 0x3a VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768
+.delay_slot
+ 6668 0x0b 0x10 0x56 0x18 VCONV.bf16.fp32 x6, cmh0
+.delay_slot
+ 6672 0x09 0x11 0x96 0x18 VCONV.bf16.fp32 x2, cml3
+.delay_slot
+ 6676 0x0b 0x91 0xd6 0x18 VCONV.bf16.fp32 x7, cmh3
+.delay_slot
+ 6680 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2
+.delay_slot
+ 6684 0x0c 0x11 0x56 0x18 VCONV.bf16.fp32 x8, cmh2
+ 6688 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9
+ 6692 0x18 0x8c 0xec 0xf8 VMAX_LT.bf16 x1, r16, x1, x9
+ 6696 0xac 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9
+ 6704 0x82 0x8a 0x60 0x00 0x5a 0x76 0x70 0x02 VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9
+ 6712 0xa0 0xd2 0x60 0x00 0x03 0x52 0x00 0x00 0x21 0x3a VST x10, [p5]; J #6800
+.delay_slot
+ 6722 0xa2 0x8a 0x60 0x02 0x8a 0x76 0x70 0x02 VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9
+.delay_slot
+ 6730 0x18 0xbc 0xec 0xf8 VMAX_LT.bf16 x1, r16, x7, x9
+.delay_slot
+ 6734 0x6c 0x52 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9
+.delay_slot
+ 6742 0x00 0x2c 0xf7 0x14 0x53 0x02 0x22 0x76 0x72 0xba NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9
+.delay_slot
+ 6752 0x00 0x2c 0xf0 0x00 0x24 0xe2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304
+ 6768 0x0d 0x60 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p5, dj3]
+ 6772 0x0c 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p4, #64]
+ 6776 0x0d 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p5]
+ 6780 0x0d 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p5, #64]
+ 6784 0x0b 0x61 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p3, dj3]
+ 6788 0x0f 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p7, #64]
+ 6792 0x9c 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336
+ 6800 0x62 0xc2 0x6e 0xf5 0x40 0x5c VST x8, [p3, #64]; JNZD r29, r29, p2
+.delay_slot
+ 6806 0x3f 0x8b 0x90 0x18 PADDB [p7], m4
+.delay_slot
+.swstall delay_slot
+ 6810 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6812 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6814 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6816 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368
+.loop_nesting 0
+ 6832 0x07 0xed 0xf1 0x18 LDA r15, [sp, #-20]
+ 6836 0x07 0xf1 0x91 0x18 LDA r12, [sp, #-16]
+ 6840 0x07 0xf5 0x31 0x18 LDA r9, [sp, #-12]
+ 6844 0x07 0xeb 0x19 0x18 LDA p6, [sp, #-24]
+ 6848 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+ 6852 0x07 0xfd 0xd1 0x18 LDA r14, [sp, #-4]
+ 6856 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 6860 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 6866 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6868 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6870 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 6872 0x00 0x00 NOPX
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0
+
+.text_segment PM 6880
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 6880 0x00 0x20 0x00 0x00 0x01 0xf2 0x32 0x20 0x10 0xba MOVA r0, #1; MOVXM p4, #508992
+ 6890 0x80 0xc2 0xd0 0x00 0x10 0x08 0x4b 0xd0 0x78 0xba LDA r16, [p4]; MOVX r1, #0; MOV r2, r15
+ 6900 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 6906 0x0f 0xf0 0x55 0x98 ST r2, [sp, #-16]
+ 6910 0x00 0x00 NOPX
+ 6912 0x00 0x00 NOPX
+ 6914 0x00 0x00 NOPX
+ 6916 0x00 0x00 NOPX
+ 6918 0x80 0x0d 0xd8 0x40 0x01 0x84 JNZ r16, #7088
+.delay_slot
+ 6924 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8]
+.delay_slot
+ 6928 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4]
+.delay_slot
+ 6932 0x0f 0xed 0x9d 0x98 ST p3, [sp, #-20]
+.delay_slot
+ 6936 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 6940 0x00 0x07 0xc7 0xac 0x00 0x44 MOVXM r15, #509440
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 6946 0xd0 0x91 0x60 0x00 0x01 0xf3 0xb2 0x34 0x11 0x3a MOVS p6, p1; MOVXM p7, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 6956 0xe0 0xc0 0xe1 0x8f 0x0b 0x00 0x01 0xf3 0xb2 0x32 0x10 0x76 ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6968 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6970 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6972 0x00 0x05 0x60 0x00 0x01 0x04 JL #2752
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6978 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6980 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 6984 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 6988 0xe0 0xc2 0x30 0x03 0xb0 0x60 0x70 0x02 ST r16, [p7]; MOV p7, p0
+.delay_slot
+ 6996 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x32 0x60 0x70 0xf6 NOPA; NOPB; NOPS; MOV p0, p2
+.return_address
+ 7008 0x1a 0x67 0x85 0x98 ADD.NC p2, r15, #11
+ 7012 0x4f 0xc1 0x50 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA.u8 r16, [p2], #7; MOVXM p1, #508996
+ 7022 0x43 0xcf 0x50 0x00 0x01 0xf0 0x32 0x30 0x10 0xba LDA.u16 r19, [p2], #2; MOVXM p0, #509024
+ 7032 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2]
+ 7036 0x00 0x00 NOPX
+ 7038 0x02 0x16 0x5a 0x98 LDA.u16 r18, [p2, #2]
+ 7042 0x00 0x00 NOPX
+ 7044 0x00 0x00 NOPX
+ 7046 0x20 0xc2 0x30 0x00 0xb6 0x60 0x70 0x02 ST r16, [p1]; MOV p1, p6
+ 7054 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16
+ 7058 0x00 0x00 NOPX
+ 7060 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+ 7064 0x00 0x00 NOPX
+ 7066 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16
+ 7070 0x00 0x00 NOPX
+ 7072 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 7088 0xfd 0xbe 0x20 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r15, [sp, #-20]; MOVXM p6, #509000
+ 7098 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x26 0x10 0xba LDA r16, [p6]; MOVXM p2, #509004
+ 7108 0x40 0xc6 0xd0 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba LDA r17, [p2]; MOVXM p7, #508992
+ 7118 0x07 0x06 0x56 0x98 LDA r18, [p7]
+ 7122 0x00 0x00 NOPX
+ 7124 0x00 0x00 NOPX
+ 7126 0x00 0x00 NOPX
+ 7128 0x00 0x00 NOPX
+ 7130 0x80 0x0e 0x18 0x40 0x01 0x84 JNZ r16, #7216
+.delay_slot
+ 7136 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 7140 0x40 0xc6 0x39 0x44 0x0e 0x5c ST r17, [p2]; ADD r17, r18, #1
+.delay_slot
+ 7146 0x14 0x26 0x07 0x18 ADD r19, r16, #1
+.delay_slot
+ 7150 0x0e 0x06 0x71 0x98 ST r19, [p6]
+.delay_slot
+ 7154 0x0f 0x06 0x31 0x98 ST r17, [p7]
+ 7158 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12
+ 7162 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4
+ 7166 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4
+ 7170 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7174 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7176 0x02 0x46 0x16 0x98 LDA r16, [p2, #16]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7180 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7182 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7184 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7186 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7188 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 0x0a 0x06 0x11 0x98 ST r16, [p2]
+ 7196 0x17 0xe2 0xfd 0x18 MOVX r17, #-1
+ 7200 0x00 0x00 NOPX
+ 7202 0x00 0x00 NOPX
+ 7204 0x00 0x00 NOPX
+ 7206 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336
+.no_stack_arguments
+ 7216 0x00 0x08 0xb8 0x00 0x01 0x04 JL #4464
+.delay_slot
+ 7222 0x00 0x07 0xc6 0xcc 0x00 0x44 MOVXM p3, #509440
+.delay_slot
+.swstall delay_slot
+ 7228 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7230 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7232 0x00 0x00 NOPX
+.delay_slot
+ 7234 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x53 0x3d 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p2, r15; NOPV
+.return_address
+ 7248 0xc0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r16, [p6]; MOVXM p1, #508996
+ 7258 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 7262 0x07 0xf0 0x11 0x18 LDA r0, [sp, #-16]
+ 7266 0x00 0x00 NOPX
+ 7268 0x00 0x00 NOPX
+ 7270 0x00 0x00 NOPX
+ 7272 0x00 0x00 NOPX
+ 7274 0x00 0x00 NOPX
+ 7276 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 7280 0x80 0x0e 0x60 0x40 0x01 0x84 JNZ r16, #7360
+.delay_slot
+ 7286 0x10 0x30 0x01 0x18 MOVX r24, #0
+.delay_slot
+.swstall delay_slot
+ 7290 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7292 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7294 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7296 0x00 0x00 NOPX
+ 7298 0x04 0x00 0xa2 0xcf 0x14 0x24 MOVX r16, #1; ADD.NC p1, r15, #20
+ 7304 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 7308 0x00 0x00 NOPX
+ 7310 0x00 0x00 NOPX
+ 7312 0x00 0x00 NOPX
+ 7314 0x00 0x00 NOPX
+ 7316 0x00 0x00 NOPX
+ 7318 0x00 0x00 NOPX
+ 7320 0x14 0x51 0x08 0x18 REL r17, r16
+ 7324 0x3c 0xc6 0xdc 0x0e 0x23 0x0c LDA r17, [p1, #-8]; ST r24, [p6]
+ 7330 0x00 0x00 NOPX
+ 7332 0x00 0x00 NOPX
+ 7334 0x00 0x00 NOPX
+ 7336 0x00 0x00 NOPX
+ 7338 0x00 0x00 NOPX
+ 7340 0x00 0x00 NOPX
+ 7342 0x14 0x21 0x11 0x98 SUB r16, r16, r17
+ 7346 0x00 0x2c 0xf3 0xcc 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p1, #-8]; NOPM; NOPV
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480
+ 7360 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x30 0x10 0xba LDA r16, [p7]; MOVXM p6, #509024
+ 7370 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 7374 0x07 0xf8 0x99 0x18 LDA p1, [sp, #-8]
+ 7378 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 7382 0x00 0x00 NOPX
+ 7384 0x00 0x00 NOPX
+ 7386 0x00 0x00 NOPX
+ 7388 0x00 0x00 NOPX
+ 7390 0x14 0x61 0x08 0x98 NE r16, r17, r16
+ 7394 0x80 0x0e 0x80 0x40 0x01 0x84 JNZ r16, #7424
+.delay_slot
+.swstall delay_slot
+ 7400 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7402 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7404 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7406 0x00 0x00 NOPX
+.delay_slot
+ 7408 0x1b 0xd0 0x20 0xf8 MOV r15, r0
+ 7412 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544
+ 7424 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4]
+ 7428 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 7432 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 7438 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7440 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7442 0x00 0x00 NOPX
+.delay_slot
+ 7444 0x0f 0x84 0x8b 0x18 MOVS p7, p1
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 7456
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.function_start
+ 7456 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64
+.delay_slot
+ 7462 0x18 0x50 0xc0 0xf8 MOV r1, p0
+.delay_slot
+ 7466 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32
+.delay_slot
+ 7470 0x08 0x04 0x11 0x98 ST r0, [p0]
+.delay_slot
+ 7474 0x08 0x14 0x11 0x98 ST r0, [p0, #4]
+.delay_slot
+.swstall delay_slot
+ 7478 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 7488
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.function_start
+ 7488 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 7492 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 7498 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8]
+ 7502 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4]
+ 7506 0x00 0x00 NOPX
+ 7508 0x00 0x00 NOPX
+ 7510 0x00 0x00 NOPX
+ 7512 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 7516 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 7520 0x00 0x00 NOPX
+ 7522 0x00 0x00 NOPX
+ 7524 0x00 0x00 NOPX
+ 7526 0x00 0x00 NOPX
+ 7528 0x00 0x00 NOPX
+ 7530 0x00 0x00 NOPX
+ 7532 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 7536 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 7540 0x00 0x00 NOPX
+ 7542 0x00 0x00 NOPX
+ 7544 0x00 0x00 NOPX
+ 7546 0x00 0x00 NOPX
+ 7548 0x00 0x00 NOPX
+ 7550 0x00 0x00 NOPX
+ 7552 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 7556 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4]
+ 7560 0x00 0x00 NOPX
+ 7562 0x00 0x00 NOPX
+.no_stack_arguments
+ 7564 0x00 0x0e 0x90 0x00 0x01 0x04 JL #7456
+.delay_slot
+.swstall delay_slot
+ 7570 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7572 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7574 0x00 0x00 NOPX
+.delay_slot
+ 7576 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12
+.delay_slot
+ 7580 0x1b 0xd0 0xc0 0xf8 MOV r15, p0
+.return_address
+ 7584 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16
+ 7594 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3
+ 7604 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128
+ 7614 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0]
+ 7618 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7620 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7622 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7626 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7630 0x10 0x22 0x05 0x18 MOVX r17, #1
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7634 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7640 0x14 0x77 0x27 0x98 EQ r27, r17, r18
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7644 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27
+.delay_slot
+.swstall delay_slot
+ 7648 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+
+.text_segment PM 7664
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.function_start
+ 7664 0x02 0x80 0x80 0x00 0x10 0xc8 0x08 0x60 0x78 0xba MOVA m0, #20; MOVX r1, #6; MOV r0, p0
+ 7674 0x00 0x00 0xa0 0xc0 0x0c 0x24 MOVX r0, #1; ADD.NC p0, r0, #12
+ 7680 0x00 0x08 0x4a 0x98 LDA.u8 r2, [p0], m0
+ 7684 0x00 0x00 NOPX
+ 7686 0x00 0x00 NOPX
+ 7688 0x00 0x00 NOPX
+ 7690 0x00 0x00 NOPX
+ 7692 0x00 0x00 NOPX
+ 7694 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 7698 0x10 0x80 0x08 0x98 NE r0, r2, r0
+.delay_slot
+ 7702 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1
+.delay_slot
+ 7706 0x02 0x82 0x31 0x0d 0xe0 0x5c ST r0, [p0, #4]; NEZ r3, r2
+.delay_slot
+ 7712 0x10 0xc4 0x1d 0x98 LSHL r2, r3, r1
+.delay_slot
+ 7716 0x08 0x04 0x51 0x98 ST r2, [p0]
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 7728
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function_start
+ 7728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 7734 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4]
+.no_stack_arguments
+ 7738 0x00 0x0e 0xa0 0x00 0x01 0x04 JL #7488
+.delay_slot
+ 7744 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8]
+.delay_slot
+ 7748 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 7752 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7754 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7756 0x00 0x01 0x67 0x98 NOPA
+.return_address
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7760 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7764 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+.tail_call
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7768 0x00 0x0e 0xf8 0x00 0x00 0x84 J #7664
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7774 0x18 0x6e 0xc0 0xf8 MOV p0, p7
+.delay_slot
+ 7778 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 7784 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7786 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7788 0x00 0x00 NOPX
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+
+.text_segment PM 7792
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function_start
+ 7792 0x67 0x82 0xd0 0x00 0x51 0x54 LDA r0, [p3], #12; MOV m0, #20
+ 7798 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3
+ 7804 0x00 0x00 NOPX
+ 7806 0x00 0x00 NOPX
+ 7808 0x00 0x00 NOPX
+ 7810 0x00 0x00 NOPX
+ 7812 0x00 0x00 NOPX
+ 7814 0x00 0x00 NOPX
+ 7816 0x08 0x0f 0x60 0x40 0x01 0x84 JNZ r1, #7872
+.delay_slot
+ 7822 0x17 0xc4 0xe9 0x18 MOVX r2, #-6
+.delay_slot
+ 7826 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2
+.delay_slot
+.swstall delay_slot
+ 7830 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7832 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7834 0x00 0x00 NOPX
+ 7836 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0]
+ 7840 0x00 0x00 NOPX
+ 7842 0x00 0x00 NOPX
+ 7844 0x00 0x00 NOPX
+ 7846 0x00 0x0f 0x70 0x00 0x00 0x84 J #7904
+.delay_slot
+.swstall delay_slot
+ 7852 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 7854 0x00 0x00 NOPX
+.delay_slot
+ 7856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1
+.delay_slot
+.swstall delay_slot
+ 7860 0x00 0x00 NOPX
+.delay_slot
+ 7862 0x00 0x2c 0xf0 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p0]; NOPX
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80
+ 7872 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1]
+ 7876 0x00 0x00 NOPX
+ 7878 0x00 0x00 NOPX
+ 7880 0x00 0x00 NOPX
+ 7882 0x00 0x00 NOPX
+ 7884 0x00 0x00 NOPX
+ 7886 0x00 0x00 NOPX
+ 7888 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1
+ 7892 0x00 0x00 NOPX
+ 7894 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112
+ 7904 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3
+ 7914 0x62 0x90 0xd0 0x00 0x00 0x04 0x7f 0xa8 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #8016
+ 7924 0x00 0x00 0x16 0xfe 0xe0 0x44 MOVXM le, #8048
+ 7930 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032
+ 7936 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4]
+ 7940 0x00 0x00 NOPX
+ 7942 0x00 0x00 NOPX
+ 7944 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0
+ 7948 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1
+ 7952 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7956 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7960 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7966 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7974 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7978 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7986 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7990 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7998 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8002 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0
+.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8016 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8020 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8032 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8048 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8064 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8066 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8074 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8076 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8084 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8086 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr
+.delay_slot
+.swstall delay_slot
+ 8092 0x00 0x00 NOPX
+.delay_slot
+ 8094 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 8098 0x00 0x00 NOPX
+.delay_slot
+ 8100 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 8104 0x00 0x00 NOPX
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 8112
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.function_start
+ 8112 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+ 8118 0xff 0x87 0xb0 0x02 0x0a 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p2
+ 8126 0x50 0x91 0x60 0x01 0xb4 0x03 0x00 0x02 MOVS p2, p1; ADD.NC p3, r16, #12
+ 8134 0x65 0xed 0x58 0x21 0x81 0xd4 LDA.u8 r27, [p3], #2; MOV r16, p0
+ 8140 0x73 0xca 0x58 0xab 0xc1 0xd4 LDA.s16 r18, [p3], #-14; MOV r17, sp
+ 8146 0x18 0x68 0xc0 0x18 ADD.NC p0, r17, #-128
+ 8150 0x08 0x07 0x2b 0x18 VST sfh, [p0]
+ 8154 0x00 0x06 0x57 0x18 ST.s16 r18, [p0]
+ 8158 0x00 0x00 NOPX
+ 8160 0x00 0x00 NOPX
+.no_stack_arguments
+ 8162 0x00 0x0f 0x38 0x00 0x01 0x04 JL #7792
+.delay_slot
+ 8168 0x1c 0x50 0xc0 0xf8 MOV r17, p0
+.delay_slot
+.swstall delay_slot
+ 8172 0x00 0x00 NOPX
+.delay_slot
+ 8174 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27
+.delay_slot
+ 8178 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18
+.delay_slot
+ 8184 0x00 0x2b 0x60 0x00 0x34 0x10 0x70 0x02 NOPS; MOV p0, r16
+.return_address
+ 8192 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+ 8196 0x00 0x00 NOPX
+ 8198 0x00 0x00 NOPX
+ 8200 0x00 0x00 NOPX
+ 8202 0x00 0x00 NOPX
+ 8204 0x00 0x00 NOPX
+ 8206 0x00 0x00 NOPX
+ 8208 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 8212 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 8218 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8220 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8222 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8224 0x00 0x00 NOPX
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0
+
+.text_segment PM 8240
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 8240 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992
+ 8246 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID
+ 8252 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 8258 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15
+ 8266 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008
+ 8276 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+ 8280 0x00 0x00 NOPX
+ 8282 0x00 0x00 NOPX
+ 8284 0x80 0x10 0x80 0x40 0x01 0x84 JNZ r16, #8448
+.delay_slot
+ 8290 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 8294 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 8298 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 8302 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0
+.delay_slot
+ 8310 0x00 0x07 0xc0 0xc9 0x80 0x44 MOVXM p0, #509120
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8316 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8322 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8332 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 8334 0x00 0x0f 0x18 0x00 0x01 0x04 JL #7728
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8340 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8342 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8344 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 8348 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 8352 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV
+.return_address
+ 8368 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+ 8374 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r16, [p2]; MOVXM p2, #509120
+ 8384 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r17, [p2]; MOVXM p2, #509120
+ 8394 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012
+ 8404 0x00 0x00 NOPX
+ 8406 0x00 0x00 NOPX
+ 8408 0x00 0x10 0x88 0x00 0x00 0x84 J #8464
+.delay_slot
+ 8414 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024
+.delay_slot
+.swstall delay_slot
+ 8420 0x00 0x00 NOPX
+.delay_slot
+ 8422 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 8426 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0]
+.delay_slot
+ 8432 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 8448 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+ 8464 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12
+ 8472 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992
+ 8482 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4
+ 8486 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4
+ 8490 0x07 0x46 0x56 0x98 LDA r18, [p7, #16]
+ 8494 0x00 0x00 NOPX
+ 8496 0x00 0x00 NOPX
+ 8498 0x00 0x00 NOPX
+ 8500 0x00 0x00 NOPX
+ 8502 0x00 0x00 NOPX
+ 8504 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 8508 0x0f 0x06 0x11 0x98 ST r16, [p7]
+ 8512 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 8516 0x00 0x00 NOPX
+ 8518 0x00 0x00 NOPX
+ 8520 0x00 0x00 NOPX
+ 8522 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 8526 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3
+ 8532 0x00 0x00 NOPX
+ 8534 0x00 0x00 NOPX
+ 8536 0x00 0x06 0x36 0x98 LDA r17, [p0]
+ 8540 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7
+ 8546 0x01 0x06 0x76 0x98 LDA r19, [p1]
+ 8550 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20
+ 8554 0x00 0x00 NOPX
+.no_stack_arguments
+ 8556 0x00 0x0f 0xd8 0x00 0x01 0x04 JL #8112
+.delay_slot
+.swstall delay_slot
+ 8562 0x00 0x00 NOPX
+.delay_slot
+ 8564 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 8568 0x08 0x06 0x31 0x98 ST r17, [p0]
+.delay_slot
+ 8572 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+.delay_slot
+ 8576 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV
+.return_address
+ 8592 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992
+ 8602 0x10 0x20 0x05 0x18 MOVX r16, #1
+ 8606 0x00 0x00 NOPX
+ 8608 0x00 0x00 NOPX
+ 8610 0x00 0x00 NOPX
+ 8612 0x00 0x00 NOPX
+ 8614 0x00 0x00 NOPX
+ 8616 0x14 0x51 0x08 0x18 REL r17, r16
+ 8620 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024
+ 8630 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 8634 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 8638 0x00 0x00 NOPX
+ 8640 0x00 0x00 NOPX
+ 8642 0x00 0x00 NOPX
+ 8644 0x00 0x00 NOPX
+ 8646 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 8650 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 8654 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 8658 0x80 0x10 0xf8 0x40 0x01 0x84 JNZ r16, #8688
+.delay_slot
+.swstall delay_slot
+ 8664 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8666 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8668 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8670 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8672 0x00 0x00 NOPX
+ 8674 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 8678 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 8688 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 8692 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8]
+ 8696 0x00 0x00 NOPX
+ 8698 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 8700 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 8702 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8706 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8708 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8712 0x1f 0x67 0xa0 0xf8 MOV p7, r15
+.delay_slot
+ 8716 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 8722 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8724 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8726 0x00 0x00 NOPX
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 8736
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.function_start
+ 8736 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 8740 0x00 0x00 NOPX
+ 8742 0x00 0x00 NOPX
+ 8744 0x00 0x00 NOPX
+ 8746 0x00 0x00 NOPX
+ 8748 0x00 0x00 NOPX
+ 8750 0x00 0x00 NOPX
+ 8752 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 8756 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 8760 0x00 0x00 NOPX
+ 8762 0x00 0x00 NOPX
+ 8764 0x00 0x00 NOPX
+ 8766 0x00 0x00 NOPX
+ 8768 0x00 0x00 NOPX
+ 8770 0x00 0x00 NOPX
+ 8772 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 8776 0x01 0x6c 0x2e 0x98 LDA el0, [p1], #24
+ 8780 0x01 0x04 0x12 0x98 LDA.s16 r0, [p1]
+ 8784 0x00 0x00 NOPX
+ 8786 0x00 0x00 NOPX
+ 8788 0x00 0x00 NOPX
+ 8790 0x00 0x00 NOPX
+ 8792 0x00 0x00 NOPX
+ 8794 0x08 0x6c 0x29 0x98 ST el0, [p0], #24
+ 8798 0x00 0x04 0x17 0x18 ST.s16 r0, [p0]
+ 8802 0x00 0x00 NOPX
+ 8804 0x00 0x00 NOPX
+ 8806 0x00 0x00 NOPX
+ 8808 0x00 0x00 NOPX
+ 8810 0x00 0x00 NOPX
+ 8812 0x00 0x00 NOPX
+ 8814 0x01 0x24 0x12 0x98 LDA.s16 r0, [p1, #4]
+ 8818 0x00 0x14 0x17 0x18 ST.s16 r0, [p0, #2]
+ 8822 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+ 8826 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8828 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8830 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8832 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 8834 0x00 0x00 NOPX
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 8848
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.function_start
+ 8848 0xfb 0xc2 0x80 0x3a 0x68 0x00 0x00 0x08 0x79 0x88 0x10 0xb6 MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976
+ 8860 0xff 0x51 0x00 0x39 0x68 0x00 0x00 0x09 0xb9 0xa0 0x10 0xb6 MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024
+ 8872 0x18 0x14 0xc0 0xf8 MOV r0, p2
+ 8876 0x1a 0x60 0x10 0x18 ADD.NC p2, r0, #32
+ 8880 0x02 0x1c 0x52 0x98 LDA.s16 r2, [p2], #2
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8884 0x02 0x00 0x16 0x98 LDA r0, [p2, dj0]
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8888 0x40 0x86 0x50 0x3a 0x68 0x3c LDA.s16 r1, [p2]; VLDB x4, [p0], #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8894 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8896 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8898 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8900 0x38 0x1c 0xb4 0x18 VLDB x2, [p0], #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8904 0x18 0x09 0x72 0xf8 VBCST.16 x0, r2
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8908 0x00 0x3a 0x68 0x01 0x18 0xed 0x50 0x36 0x78 0x3a VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8918 0x1d 0x78 0xfe 0x98 ADD.NC lc, r17, #-3
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8922 0x18 0x85 0x72 0xf8 VBCST.16 x1, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8926 0x19 0xa8 0xac 0xf8 VMIN_GE.bf16 x3, r16, x5, x1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8930 0x00 0x2c 0xf0 0x39 0x68 0x00 0x00 0x31 0x06 0xcf 0x00 0x2b 0x60 0x7e NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8944 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV
+.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8976 0x00 0x2c 0xf0 0x3a 0x69 0x1d 0xd3 0x00 0x00 0x00 0xd4 0x56 0x78 0x00 0x00 0xe1 NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8992 0x00 0x2c 0xf0 0x39 0x68 0x01 0x5b 0x00 0x00 0x01 0x88 0x36 0x78 0x00 0x00 0xe1 NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9008 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV
+.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9040 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9048 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0
+ 9052 0x23 0x9a 0x60 0x01 0xd8 0x56 0x70 0x02 VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1
+ 9060 0x05 0x00 0x05 0x40 0xd9 0xe4 RET lr; VMAX_LT.bf16 x5, r16, x4, x0
+.delay_slot
+ 9066 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1
+.delay_slot
+ 9074 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0
+.delay_slot
+ 9078 0x1b 0xb0 0xac 0xf8 VMIN_GE.bf16 x7, r16, x6, x1
+.delay_slot
+ 9082 0x09 0x1c 0xd3 0x18 VST x3, [p1], #64
+.delay_slot
+ 9086 0x09 0x1d 0xd3 0x18 VST x7, [p1], #64
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0
+
+.text_segment PM 9104
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 9104 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992
+ 9110 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID
+ 9116 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 9122 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15
+ 9130 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008
+ 9140 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+ 9144 0x00 0x00 NOPX
+ 9146 0x00 0x00 NOPX
+ 9148 0x80 0x12 0x30 0x40 0x01 0x84 JNZ r16, #9312
+.delay_slot
+ 9154 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 9158 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 9162 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 9166 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0
+.delay_slot
+ 9174 0x00 0x07 0xc0 0xcb 0x80 0x44 MOVXM p0, #509376
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9180 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9186 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9196 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9198 0x00 0x11 0x10 0x00 0x01 0x04 JL #8736
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9204 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9206 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9208 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 9212 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 9216 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV
+.return_address
+ 9232 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+ 9238 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0xe0 0x10 0xba LDA r16, [p2]; MOVXM p2, #509376
+ 9248 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0xe0 0x10 0xba LDA r17, [p2]; MOVXM p2, #509376
+ 9258 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #509012
+ 9268 0x00 0x00 NOPX
+ 9270 0x00 0x00 NOPX
+ 9272 0x00 0x12 0x38 0x00 0x00 0x84 J #9328
+.delay_slot
+ 9278 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024
+.delay_slot
+.swstall delay_slot
+ 9284 0x00 0x00 NOPX
+.delay_slot
+ 9286 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 9290 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0]
+.delay_slot
+ 9296 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 9312 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+ 9328 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12
+ 9336 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992
+ 9346 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4
+ 9350 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4
+ 9354 0x07 0x46 0x56 0x98 LDA r18, [p7, #16]
+ 9358 0x00 0x00 NOPX
+ 9360 0x00 0x00 NOPX
+ 9362 0x00 0x00 NOPX
+ 9364 0x00 0x00 NOPX
+ 9366 0x00 0x00 NOPX
+ 9368 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 9372 0x0f 0x06 0x11 0x98 ST r16, [p7]
+ 9376 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 9380 0x00 0x00 NOPX
+ 9382 0x00 0x00 NOPX
+ 9384 0x00 0x00 NOPX
+ 9386 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 9390 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3
+ 9396 0x00 0x00 NOPX
+ 9398 0x00 0x00 NOPX
+ 9400 0x00 0x06 0x36 0x98 LDA r17, [p0]
+ 9404 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7
+ 9410 0x01 0x06 0x76 0x98 LDA r19, [p1]
+ 9414 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20
+ 9418 0x00 0x00 NOPX
+.no_stack_arguments
+ 9420 0x00 0x11 0x48 0x00 0x01 0x04 JL #8848
+.delay_slot
+.swstall delay_slot
+ 9426 0x00 0x00 NOPX
+.delay_slot
+ 9428 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 9432 0x08 0x06 0x31 0x98 ST r17, [p0]
+.delay_slot
+ 9436 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+.delay_slot
+ 9440 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV
+.return_address
+ 9456 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992
+ 9466 0x10 0x20 0x05 0x18 MOVX r16, #1
+ 9470 0x00 0x00 NOPX
+ 9472 0x00 0x00 NOPX
+ 9474 0x00 0x00 NOPX
+ 9476 0x00 0x00 NOPX
+ 9478 0x00 0x00 NOPX
+ 9480 0x14 0x51 0x08 0x18 REL r17, r16
+ 9484 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024
+ 9494 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 9498 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 9502 0x00 0x00 NOPX
+ 9504 0x00 0x00 NOPX
+ 9506 0x00 0x00 NOPX
+ 9508 0x00 0x00 NOPX
+ 9510 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 9514 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 9518 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 9522 0x80 0x12 0xa8 0x40 0x01 0x84 JNZ r16, #9552
+.delay_slot
+.swstall delay_slot
+ 9528 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9530 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9532 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9534 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9536 0x00 0x00 NOPX
+ 9538 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 9542 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 9552 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 9556 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8]
+ 9560 0x00 0x00 NOPX
+ 9562 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 9564 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 9566 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9570 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9572 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9576 0x1f 0x67 0xa0 0xf8 MOV p7, r15
+.delay_slot
+ 9580 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 9586 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9588 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9590 0x00 0x00 NOPX
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 9600
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.function_start
+ 9600 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0
+ 9610 0x17 0x80 0x01 0x18 MOVX r0, #-128
+ 9614 0x00 0x00 NOPX
+ 9616 0x00 0x00 NOPX
+ 9618 0x00 0x00 NOPX
+ 9620 0x00 0x00 NOPX
+ 9622 0x00 0x00 NOPX
+ 9624 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 9628 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 9632 0x00 0x00 NOPX
+ 9634 0x00 0x00 NOPX
+ 9636 0x00 0x00 NOPX
+ 9638 0x00 0x00 NOPX
+ 9640 0x00 0x00 NOPX
+ 9642 0x00 0x00 NOPX
+ 9644 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 9648 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 9652 0x00 0x00 NOPX
+ 9654 0x00 0x00 NOPX
+ 9656 0x00 0x00 NOPX
+ 9658 0x00 0x00 NOPX
+ 9660 0x00 0x00 NOPX
+ 9662 0x00 0x00 NOPX
+ 9664 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 9668 0x01 0x14 0x76 0x98 LDA r3, [p1, #4]
+ 9672 0x00 0x00 NOPX
+ 9674 0x00 0x00 NOPX
+ 9676 0x00 0x00 NOPX
+ 9678 0x00 0x00 NOPX
+ 9680 0x00 0x00 NOPX
+ 9682 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9684 0x08 0x4c 0x71 0x98 ST r3, [p0], #16
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9688 0x00 0x04 0x17 0x18 ST.s16 r0, [p0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9692 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9696 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9702 0x10 0xc2 0x14 0x98 AND r1, r3, r1
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9706 0x10 0x76 0x27 0x98 EQ r27, r1, r2
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9710 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27
+.delay_slot
+.swstall delay_slot
+ 9714 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 9728
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.function_start
+ 9728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 9734 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8]
+.no_stack_arguments
+ 9738 0x00 0x12 0xc0 0x00 0x01 0x04 JL #9600
+.delay_slot
+ 9744 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4]
+.delay_slot
+ 9748 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 9752 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9754 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9756 0x00 0x01 0x67 0x98 NOPA
+.return_address
+ 9760 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8]
+ 9764 0x00 0x00 NOPX
+ 9766 0x00 0x00 NOPX
+ 9768 0x00 0x00 NOPX
+ 9770 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9772 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9774 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9778 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9782 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9784 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9786 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9788 0x10 0x20 0x09 0x18 MOVX r16, #2
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9792 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 9808
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0
+.function_start
+ 9808 0x18 0x16 0xc0 0xf8 MOV r0, p3
+ 9812 0x1b 0x60 0x07 0x18 ADD.NC p3, r0, #14
+ 9816 0x03 0x1c 0x52 0x98 LDA.s16 r2, [p3], #2
+ 9820 0x03 0x04 0x96 0x98 LDA r4, [p3]
+ 9824 0x00 0x00 NOPX
+ 9826 0x00 0x00 NOPX
+ 9828 0x00 0x00 NOPX
+ 9830 0x00 0x00 NOPX
+ 9832 0x10 0x06 0x09 0x18 MOVX r3, #2
+ 9836 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+ 9842 0x10 0xc6 0x4c 0x98 LTU r3, r3, r4
+ 9846 0x00 0x01 0x00 0x06 0x04 0xe2 0x10 0x00 0x60 0xba MOVA r1, #0; JNZ r3, #10000
+.delay_slot
+ 9856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1
+.delay_slot
+ 9860 0x18 0x5e 0xc0 0xf8 MOV r1, p7
+.delay_slot
+ 9864 0x1f 0x65 0xe0 0xf8 MOV p7, sp
+.delay_slot
+ 9868 0xff 0xf2 0x0a 0xdd 0x81 0xf4 PADDB [p7], #-64; MOV p5, p7
+.delay_slot
+ 9874 0x0f 0x04 0x13 0x18 VST x0, [p7]
+ 9878 0x01 0x82 0x84 0x80 0x0b 0x00 0x04 0xb9 0x72 0xba MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2
+ 9888 0x80 0x01 0x54 0x01 0x01 0x54 LDA.u8 r0, [p4, dj0]; MOV m2, #64
+ 9894 0x00 0x00 NOPX
+ 9896 0x00 0x00 NOPX
+ 9898 0x00 0x00 NOPX
+ 9900 0x00 0x00 NOPX
+ 9902 0x00 0x00 NOPX
+ 9904 0x00 0x00 NOPX
+ 9906 0x00 0x13 0x70 0x40 0x01 0x84 JNZ r0, #9952
+.delay_slot
+ 9912 0x18 0x00 0x00 0xb8 MOV m0, #0
+.delay_slot
+ 9916 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032
+.delay_slot
+.swstall delay_slot
+ 9922 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9924 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9926 0x00 0x00 NOPX
+ 9928 0x00 0x04 0x80 0x00 0x04 0xde 0x00 0x00 0x20 0xba MOVA m1, #0; J #9968
+.delay_slot
+.swstall delay_slot
+ 9938 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9940 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9942 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9944 0x00 0x00 NOPX
+.delay_slot
+ 9946 0x00 0x2c 0xf0 0x08 0x26 0x0c NOPA; VST x0, [p0]
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144
+ 9952 0x19 0x00 0x80 0xb8 MOV m1, #64
+ 9956 0x00 0x2c 0xf0 0x00 0x21 0x04 0x13 0x01 0x00 0x00 0x50 0xf6 NOPA; NOPB; VST x0, [p1]; MOV m2, #0
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160
+ 9968 0x00 0x13 0xc8 0x00 0x00 0x84 J #10128
+.delay_slot
+ 9974 0x13 0x91 0x60 0x03 0xb0 0x60 0x70 0x02 MOVS p0, p7; MOV p7, p0
+.delay_slot
+.swstall delay_slot
+ 9982 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9984 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9986 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 9988 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192
+ 10000 0x10 0x04 0x0d 0x18 MOVX r2, #3
+ 10004 0x10 0x84 0x47 0x98 EQ r2, r2, r4
+ 10008 0x10 0x13 0xa0 0x40 0x01 0x84 JNZ r2, #10048
+.delay_slot
+ 10014 0x3f 0x80 0x00 0x20 0x00 0x44 MOVXM r0, #1065353216
+.delay_slot
+ 10020 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032
+.delay_slot
+.swstall delay_slot
+ 10026 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10028 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10030 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10032 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xe0 0x00 0x08 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10048 0x80 0x80 0x50 0x02 0xd2 0x00 0x47 0xbe 0x58 0xba LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10058 0x18 0x00 0x80 0xb8 MOV m0, #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10062 0x19 0x00 0x00 0xb8 MOV m1, #0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10066 0x1a 0x00 0x80 0xb8 MOV m2, #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10070 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10072 0x18 0x00 0x11 0x78 VINSERT.32 x0, x0, #0, r0
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10076 0xa0 0x02 0xe2 0x01 0x25 0xd4 ST.s16 r0, [p5, dj0]; VMOV bmll1, x0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10082 0x10 0x3a 0x80 0x18 MOVX crRnd, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10086 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10090 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10092 0x18 0x01 0x01 0xb8 VEXTRACT.16 r0, x0, #0, vaddSign0
+ 10096 0x00 0x00 NOPX
+ 10098 0x00 0x00 NOPX
+ 10100 0x05 0x00 0x12 0x98 LDA.s16 r0, [p5, dj0]
+ 10104 0x00 0x00 NOPX
+ 10106 0x00 0x00 NOPX
+ 10108 0x00 0x00 NOPX
+ 10110 0x00 0x00 NOPX
+ 10112 0x00 0x00 NOPX
+ 10114 0x00 0x00 NOPX
+ 10116 0x18 0x01 0x72 0xf8 VBCST.16 x0, r0
+ 10120 0x00 0x00 NOPX
+ 10122 0x00 0x2c 0xff 0xf8 0x66 0x0c NOPA; VST x0, [sp, #-64]
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320
+ 10128 0x78 0x8a 0xde 0x50 0xe8 0x00 0x00 0x08 0x7c 0x00 0x10 0xb6 LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10140 0xff 0x63 0x02 0x90 0x68 0x00 0x00 0x09 0xbc 0x18 0x10 0xb6 MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10152 0x01 0x05 0x7e 0x50 0xe8 0x00 0xf1 0x12 VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10160 0x80 0x90 0x52 0x90 0x68 0x3c LDA.s8 r4, [p4]; VLDB x0, [p1], m2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10166 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10168 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10172 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10174 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10178 0x05 0x71 0x7e 0x86 0x01 0x02 0x01 0x62 ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10186 0x29 0x03 0x7e 0x50 0xe8 0x3c VLDA x0, [p1], m2; VLDB x1, [p7], m1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10192 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x09 0xd4 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0
+.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10240 0x29 0x03 0x7e 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10256 0x01 0x05 0x70 0x00 0x22 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10272 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10288 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0
+.loop_nesting 0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10304 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10310 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 10314 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 10316 0x01 0x02 0x01 0x48 VMAC.f dm1, dm0, x1, x0, r0
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10320 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10322 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10326 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+ 10330 0x1f 0x60 0xa0 0xf8 MOV p7, r1
+.delay_slot
+.swstall delay_slot
+ 10334 0x00 0x00 NOPX
+.delay_slot
+ 10336 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 10340 0x00 0x00 NOPX
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0
+
+.text_segment PM 10352
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.function_start
+ 10352 0x01 0x82 0x83 0x88 0x8b 0x00 0x60 0xf0 0x72 0xba MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr
+ 10362 0x40 0x01 0x54 0xc5 0x81 0xd4 LDA.u8 r0, [p2, dj0]; MOV p2, p1
+ 10368 0x00 0x00 NOPX
+ 10370 0x00 0x00 NOPX
+ 10372 0x00 0x00 NOPX
+ 10374 0x00 0x00 NOPX
+ 10376 0x00 0x00 NOPX
+ 10378 0x00 0x00 NOPX
+ 10380 0x00 0x14 0x68 0x00 0x01 0x84 JZ r0, #10448
+.delay_slot
+ 10386 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+.delay_slot
+ 10392 0x18 0x55 0xe0 0xf8 MOV r1, sp
+.delay_slot
+ 10396 0x19 0x60 0xe0 0x18 ADD.NC p1, r1, #-64
+.delay_slot
+ 10400 0x09 0x07 0x2b 0x18 VST sfh, [p1]
+.delay_slot
+.swstall delay_slot
+ 10404 0x00 0x00 NOPX
+.no_stack_arguments
+ 10406 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808
+.delay_slot
+.swstall delay_slot
+ 10412 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10414 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10416 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10418 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10420 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.return_address
+ 10432 0x00 0x14 0x78 0x00 0x00 0x84 J #10480
+.delay_slot
+.swstall delay_slot
+ 10438 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10440 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10442 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10444 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10446 0x00 0x00 NOPX
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96
+.no_stack_arguments
+ 10448 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808
+.delay_slot
+ 10454 0x10 0x91 0x60 0x00 0xb0 0x60 0x70 0x02 MOVS p0, p1; MOV p1, p0
+.delay_slot
+.swstall delay_slot
+ 10462 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10464 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10466 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10468 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128
+.return_address
+ 10480 0x1f 0x71 0x80 0xf8 MOV lr, dc0
+ 10484 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 10488 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 10494 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10496 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10498 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10500 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0
+
+.text_segment PM 10512
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 10512 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992
+ 10518 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID
+ 10524 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 10530 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15
+ 10538 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008
+ 10548 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+ 10552 0x00 0x00 NOPX
+ 10554 0x00 0x00 NOPX
+ 10556 0x80 0x14 0xf0 0x40 0x01 0x84 JNZ r16, #10720
+.delay_slot
+ 10562 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 10566 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 10570 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12]
+.delay_slot
+ 10574 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0
+.delay_slot
+ 10582 0x00 0x07 0xc0 0xca 0x00 0x44 MOVXM p0, #509184
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10588 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10594 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10604 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 10606 0x00 0x13 0x00 0x00 0x01 0x04 JL #9728
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10612 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10614 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10616 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 10620 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 10624 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV
+.return_address
+ 10640 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+ 10646 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #509184
+ 10656 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #509184
+ 10666 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012
+ 10676 0x00 0x00 NOPX
+ 10678 0x00 0x00 NOPX
+ 10680 0x00 0x14 0xf8 0x00 0x00 0x84 J #10736
+.delay_slot
+ 10686 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024
+.delay_slot
+.swstall delay_slot
+ 10692 0x00 0x00 NOPX
+.delay_slot
+ 10694 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 10698 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0]
+.delay_slot
+ 10704 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+ 10720 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+ 10736 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12
+ 10744 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992
+ 10754 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4
+ 10758 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4
+ 10762 0x07 0x46 0x56 0x98 LDA r18, [p7, #16]
+ 10766 0x00 0x00 NOPX
+ 10768 0x00 0x00 NOPX
+ 10770 0x00 0x00 NOPX
+ 10772 0x00 0x00 NOPX
+ 10774 0x00 0x00 NOPX
+ 10776 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 10780 0x0f 0x06 0x11 0x98 ST r16, [p7]
+ 10784 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 10788 0x00 0x00 NOPX
+ 10790 0x00 0x00 NOPX
+ 10792 0x00 0x00 NOPX
+ 10794 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 10798 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3
+ 10804 0x00 0x00 NOPX
+ 10806 0x00 0x00 NOPX
+ 10808 0x00 0x06 0x36 0x98 LDA r17, [p0]
+ 10812 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7
+ 10818 0x01 0x06 0x76 0x98 LDA r19, [p1]
+ 10822 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20
+ 10826 0x00 0x00 NOPX
+.no_stack_arguments
+ 10828 0x00 0x14 0x38 0x00 0x01 0x04 JL #10352
+.delay_slot
+.swstall delay_slot
+ 10834 0x00 0x00 NOPX
+.delay_slot
+ 10836 0x14 0x62 0x07 0x18 ADD r17, r17, #1
+.delay_slot
+ 10840 0x08 0x06 0x31 0x98 ST r17, [p0]
+.delay_slot
+ 10844 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16
+.delay_slot
+ 10848 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV
+.return_address
+ 10864 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992
+ 10874 0x10 0x20 0x05 0x18 MOVX r16, #1
+ 10878 0x00 0x00 NOPX
+ 10880 0x00 0x00 NOPX
+ 10882 0x00 0x00 NOPX
+ 10884 0x00 0x00 NOPX
+ 10886 0x00 0x00 NOPX
+ 10888 0x14 0x51 0x08 0x18 REL r17, r16
+ 10892 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024
+ 10902 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 10906 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 10910 0x00 0x00 NOPX
+ 10912 0x00 0x00 NOPX
+ 10914 0x00 0x00 NOPX
+ 10916 0x00 0x00 NOPX
+ 10918 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 10922 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 10926 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 10930 0x80 0x15 0x68 0x40 0x01 0x84 JNZ r16, #10960
+.delay_slot
+.swstall delay_slot
+ 10936 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10938 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10940 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10942 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10944 0x00 0x00 NOPX
+ 10946 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 10950 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 10960 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12]
+ 10964 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8]
+ 10968 0x00 0x00 NOPX
+ 10970 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10972 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10974 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10978 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10980 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10984 0x1f 0x67 0xa0 0xf8 MOV p7, r15
+.delay_slot
+ 10988 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 10994 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10996 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 10998 0x00 0x00 NOPX
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 11008
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+.function_start
+ 11008 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0
+ 11018 0x17 0x80 0x01 0x18 MOVX r0, #-128
+ 11022 0x00 0x00 NOPX
+ 11024 0x00 0x00 NOPX
+ 11026 0x00 0x00 NOPX
+ 11028 0x00 0x00 NOPX
+ 11030 0x00 0x00 NOPX
+ 11032 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11036 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 11040 0x00 0x00 NOPX
+ 11042 0x00 0x00 NOPX
+ 11044 0x00 0x00 NOPX
+ 11046 0x00 0x00 NOPX
+ 11048 0x00 0x00 NOPX
+ 11050 0x00 0x00 NOPX
+ 11052 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11056 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 11060 0x00 0x00 NOPX
+ 11062 0x00 0x00 NOPX
+ 11064 0x00 0x00 NOPX
+ 11066 0x00 0x00 NOPX
+ 11068 0x00 0x00 NOPX
+ 11070 0x00 0x00 NOPX
+ 11072 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11076 0x01 0x14 0x76 0x98 LDA r3, [p1, #4]
+ 11080 0x00 0x00 NOPX
+ 11082 0x00 0x00 NOPX
+ 11084 0x00 0x00 NOPX
+ 11086 0x00 0x00 NOPX
+ 11088 0x00 0x00 NOPX
+ 11090 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11092 0x08 0x4c 0x71 0x98 ST r3, [p0], #16
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11096 0x00 0x04 0x17 0x18 ST.s16 r0, [p0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11100 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11104 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11110 0x10 0xc2 0x14 0x98 AND r1, r3, r1
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11114 0x10 0x76 0x27 0x98 EQ r27, r1, r2
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11118 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27
+.delay_slot
+.swstall delay_slot
+ 11122 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0
+
+.text_segment PM 11136
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function_start
+ 11136 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 11142 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8]
+.no_stack_arguments
+ 11146 0x00 0x15 0x80 0x00 0x01 0x04 JL #11008
+.delay_slot
+ 11152 0x18 0x17 0xa0 0xf8 MOV r0, r15
+.delay_slot
+ 11156 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4]
+.delay_slot
+ 11160 0x1b 0xd0 0xc0 0xf8 MOV r15, p0
+.delay_slot
+.swstall delay_slot
+ 11164 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11166 0x00 0x00 NOPX
+.return_address
+ 11168 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16
+ 11178 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3
+ 11188 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128
+ 11198 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0]
+ 11202 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11204 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11206 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11210 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11214 0x10 0x22 0x05 0x18 MOVX r17, #1
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11218 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11224 0x14 0x77 0x27 0x98 EQ r27, r17, r18
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11228 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27
+.delay_slot
+.swstall delay_slot
+ 11232 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+
+.text_segment PM 11248
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.tail_call
+.function_start
+ 11248 0x00 0x13 0x28 0x00 0x00 0x84 J #9808
+.delay_slot
+.swstall delay_slot
+ 11254 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11256 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11258 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11260 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11262 0x00 0x00 NOPX
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.function_start
+ 11264 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64
+.delay_slot
+ 11270 0x18 0x50 0xc0 0xf8 MOV r1, p0
+.delay_slot
+ 11274 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32
+.delay_slot
+ 11278 0x08 0x04 0x11 0x98 ST r0, [p0]
+.delay_slot
+ 11282 0x08 0x14 0x11 0x98 ST r0, [p0, #4]
+.delay_slot
+.swstall delay_slot
+ 11286 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0
+
+.text_segment PM 11296
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.function_start
+ 11296 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 11300 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 11306 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4]
+ 11310 0x00 0x00 NOPX
+ 11312 0x00 0x00 NOPX
+ 11314 0x00 0x00 NOPX
+ 11316 0x00 0x00 NOPX
+ 11318 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11322 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4
+ 11326 0x00 0x00 NOPX
+ 11328 0x00 0x00 NOPX
+ 11330 0x00 0x00 NOPX
+ 11332 0x00 0x00 NOPX
+ 11334 0x00 0x00 NOPX
+ 11336 0x00 0x00 NOPX
+ 11338 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11342 0x01 0x04 0x2e 0x98 LDA el0, [p1]
+ 11346 0x00 0x00 NOPX
+ 11348 0x00 0x00 NOPX
+ 11350 0x00 0x00 NOPX
+ 11352 0x00 0x00 NOPX
+ 11354 0x00 0x00 NOPX
+ 11356 0x00 0x00 NOPX
+ 11358 0x08 0x1c 0x29 0x98 ST el0, [p0], #4
+ 11362 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4]
+ 11366 0x00 0x00 NOPX
+ 11368 0x00 0x00 NOPX
+.no_stack_arguments
+ 11370 0x00 0x16 0x00 0x00 0x01 0x04 JL #11264
+.delay_slot
+ 11376 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8]
+.delay_slot
+.swstall delay_slot
+ 11380 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 11382 0x00 0x00 NOPX
+.delay_slot
+ 11384 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12
+.delay_slot
+ 11388 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.return_address
+ 11392 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4]
+ 11396 0x00 0x00 NOPX
+ 11398 0x00 0x00 NOPX
+ 11400 0x00 0x00 NOPX
+ 11402 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11404 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11406 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11410 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11414 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11416 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11418 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11420 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11424 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+
+.text_segment PM 11440
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.function_start
+ 11440 0x04 0x00 0x80 0x00 0x00 0x08 0x7e 0xb0 0x10 0xba MOVA m0, #32; MOVXM ls, #11616
+ 11450 0x61 0x0e 0xd0 0x00 0x00 0x09 0xbe 0xb8 0x10 0xba LDA r3, [p3], m0; MOVXM le, #11632
+ 11460 0x60 0x90 0xd0 0x3e 0x17 0x48 0x0b 0x3c 0x58 0xba LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828
+ 11470 0x62 0x80 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m0, [p3, #4]; MOVXM p4, #509032
+ 11480 0x04 0x04 0x42 0x98 LDA.s8 r2, [p4]
+ 11484 0x00 0x00 NOPX
+ 11486 0x00 0x00 NOPX
+ 11488 0x00 0x00 NOPX
+ 11490 0x10 0xc2 0x1d 0x98 LSHL r1, r3, r1
+ 11494 0x05 0x0e 0x8a 0xe1 0xf9 0x34 VLDB x1, [p0], m1; ADD.NC lc, r1, #-7
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11500 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11506 0x21 0x1b 0x70 0x50 0xe8 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11514 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11520 0x21 0x1b 0x70 0x50 0xe8 0x3c VLDA x3, [p1], m0; VLDB x1, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11526 0x01 0x08 0x9b 0x98 VLDA x2, [p1], m0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11530 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11536 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11542 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11552 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11562 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11572 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11582 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11592 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11602 0x21 0x1b 0x70 0x50 0x68 0x00 0x00 0x08 0x70 0x8c 0x00 0xe2 0x41 0x6e VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0
+.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11616 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0
+.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11632 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0
+.loop_nesting 0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11648 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11656 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11664 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11672 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11680 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11688 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11696 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11704 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11708 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11714 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11718 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+ 11722 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64
+.delay_slot
+ 11726 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64
+.delay_slot
+.swstall delay_slot
+ 11730 0x00 0x00 NOPX
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0
+
+.text_segment PM 11744
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.function_start
+ 11744 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992
+ 11750 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID
+ 11756 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 11762 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2
+ 11772 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15
+ 11780 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4]
+ 11784 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16]
+ 11788 0x00 0x00 NOPX
+ 11790 0x80 0x17 0x50 0x40 0x01 0x84 JNZ r16, #11936
+.delay_slot
+ 11796 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 11800 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008
+.delay_slot
+ 11806 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7
+.delay_slot
+ 11814 0x1b 0xd6 0xc0 0xf8 MOV r15, p3
+.delay_slot
+ 11818 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0xc0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #509312
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11828 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x34 0x11 0x3a MOVS p0, p7; MOVXM p2, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11838 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11848 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11850 0x00 0x16 0x10 0x00 0x01 0x04 JL #11296
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11856 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11858 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11860 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 11864 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 11868 0x0a 0x06 0x11 0x98 ST r16, [p2]
+.return_address
+ 11872 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x28 0x10 0xba LDA r16, [p7]; MOVXM p1, #509008
+ 11882 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb2 0x2a 0x10 0xba LDA r17, [p1]; MOVXM p3, #509012
+ 11892 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #509020
+ 11902 0x00 0x00 NOPX
+ 11904 0x00 0x00 NOPX
+ 11906 0x00 0x00 NOPX
+ 11908 0x00 0x17 0x58 0x00 0x00 0x84 J #11952
+.delay_slot
+ 11914 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024
+.delay_slot
+ 11920 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+.delay_slot
+ 11924 0x0a 0x06 0x51 0x98 ST r18, [p2]
+.delay_slot
+ 11928 0x0b 0x06 0x11 0x98 ST r16, [p3]
+.delay_slot
+ 11932 0x09 0x06 0x11 0x98 ST r16, [p1]
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192
+ 11936 0x00 0x07 0xc6 0xc8 0xa8 0x44 MOVXM p3, #509012
+ 11942 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba NOPA; MOVXM p1, #509020
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208
+ 11952 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12
+ 11956 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508992
+ 11966 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4
+ 11970 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4
+ 11974 0x02 0x06 0x56 0x98 LDA r18, [p2]
+ 11978 0x00 0x46 0x76 0x98 LDA r19, [p0, #16]
+ 11982 0x00 0x00 NOPX
+ 11984 0x00 0x00 NOPX
+ 11986 0x00 0x00 NOPX
+ 11988 0x00 0x00 NOPX
+ 11990 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 11994 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1
+ 12000 0x0a 0x06 0x11 0x98 ST r16, [p2]
+ 12004 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 12008 0x00 0x00 NOPX
+ 12010 0x00 0x00 NOPX
+ 12012 0x00 0x00 NOPX
+ 12014 0x14 0xd3 0x08 0x18 ACQ r19, r16
+ 12018 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12
+ 12022 0x00 0x00 NOPX
+ 12024 0x00 0x00 NOPX
+ 12026 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4
+ 12030 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+ 12034 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4
+ 12038 0x02 0x56 0x76 0x98 LDA r19, [p2, #20]
+ 12042 0x00 0x00 NOPX
+ 12044 0x00 0x00 NOPX
+ 12046 0x00 0x00 NOPX
+ 12048 0x00 0x00 NOPX
+ 12050 0x00 0x00 NOPX
+ 12052 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27
+ 12056 0x0a 0x06 0x31 0x98 ST r17, [p2]
+ 12060 0x00 0x00 NOPX
+ 12062 0x00 0x00 NOPX
+ 12064 0x00 0x00 NOPX
+ 12066 0x00 0x00 NOPX
+ 12068 0x14 0xd3 0x08 0x18 ACQ r19, r16
+ 12072 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6
+ 12082 0x00 0x00 NOPX
+ 12084 0x00 0x00 NOPX
+ 12086 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20]
+ 12090 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20]
+ 12096 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 12102 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 12106 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12110 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12114 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12116 0x04 0x06 0x76 0x98 LDA r19, [p4]
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12120 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 12122 0x00 0x16 0x58 0x00 0x01 0x04 JL #11440
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12128 0x1b 0xd4 0xc0 0xf8 MOV r15, p2
+.delay_slot
+ 12132 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16
+.delay_slot
+ 12136 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16
+.delay_slot
+ 12140 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16
+.delay_slot
+ 12144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV
+.return_address
+ 12160 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15
+ 12170 0x00 0x07 0xcc 0xc8 0xc0 0x44 MOVXM p6, #509024
+ 12176 0x00 0x00 NOPX
+ 12178 0x00 0x00 NOPX
+ 12180 0x00 0x00 NOPX
+ 12182 0x00 0x00 NOPX
+ 12184 0x00 0x00 NOPX
+ 12186 0x14 0x51 0x08 0x18 REL r17, r16
+ 12190 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4]
+ 12194 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20]
+ 12198 0x00 0x00 NOPX
+ 12200 0x00 0x00 NOPX
+ 12202 0x00 0x00 NOPX
+ 12204 0x00 0x00 NOPX
+ 12206 0x00 0x00 NOPX
+ 12208 0x14 0x23 0x11 0x98 SUB r17, r16, r17
+ 12212 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4]
+ 12218 0x00 0x00 NOPX
+ 12220 0x00 0x00 NOPX
+ 12222 0x00 0x00 NOPX
+ 12224 0x00 0x00 NOPX
+ 12226 0x00 0x00 NOPX
+ 12228 0x00 0x00 NOPX
+ 12230 0x14 0x51 0x08 0x18 REL r17, r16
+ 12234 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb2 0x20 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508992
+ 12244 0x06 0x06 0x56 0x98 LDA r18, [p6]
+ 12248 0x01 0x06 0x36 0x98 LDA r17, [p1]
+ 12252 0x00 0x00 NOPX
+ 12254 0x00 0x00 NOPX
+ 12256 0x00 0x00 NOPX
+ 12258 0x00 0x00 NOPX
+ 12260 0x14 0x21 0x31 0x98 SUB r16, r16, r19
+ 12264 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8]
+ 12268 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 12272 0x80 0x18 0x08 0x40 0x01 0x84 JNZ r16, #12304
+.delay_slot
+.swstall delay_slot
+ 12278 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12280 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12282 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12284 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12286 0x00 0x00 NOPX
+ 12288 0x10 0x20 0x01 0x18 MOVX r16, #0
+ 12292 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560
+ 12304 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16]
+ 12308 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4]
+ 12312 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12]
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 12316 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 12318 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8]
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12322 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12324 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12326 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12330 0x0e 0x8e 0x0b 0x18 MOVS p6, r14
+.delay_slot
+ 12334 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 12340 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12342 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12344 0x00 0x00 NOPX
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0
+
+.text_segment PM 12352
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0
+.function_start
+ 12352 0x03 0x85 0xd0 0x00 0x01 0xf0 0xb3 0xe0 0x10 0xba LDA el0, [p0], #4; MOVXM p1, #509888
+ 12362 0x03 0x81 0xd0 0x01 0x00 0x4b 0x08 0x00 0x58 0xba LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0
+ 12372 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 12378 0xfe 0xf3 0xb0 0x00 0x01 0xf3 0xb3 0xe0 0x11 0x3a ST p7, [sp, #-12]; MOVXM p7, #509888
+ 12388 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4]
+ 12392 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8]
+ 12396 0x00 0x00 NOPX
+ 12398 0x09 0x1c 0x29 0x98 ST el0, [p1], #4
+ 12402 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4
+ 12406 0x00 0x04 0x2e 0x98 LDA el0, [p0]
+ 12410 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4]
+ 12414 0x00 0x00 NOPX
+ 12416 0x00 0x00 NOPX
+ 12418 0x00 0x00 NOPX
+ 12420 0x00 0x00 NOPX
+ 12422 0x00 0x00 NOPX
+ 12424 0x09 0x04 0x29 0x98 ST el0, [p1]
+ 12428 0x09 0x14 0x09 0x98 ST eh0, [p1, #4]
+ 12432 0x07 0x5e 0x2a 0x98 LDA.u8 r17, [p7], #5
+ 12436 0x07 0xee 0x4a 0x98 LDA.u8 r18, [p7], #-2
+ 12440 0x07 0xec 0x2a 0x98 LDA.u8 r1, [p7], #-2
+ 12444 0x00 0x00 NOPX
+ 12446 0x00 0x00 NOPX
+ 12448 0x00 0x00 NOPX
+ 12450 0x00 0x00 NOPX
+.no_stack_arguments
+ 12452 0x00 0x1e 0x98 0x00 0x01 0x04 JL #15664
+.delay_slot
+ 12458 0xfc 0xca 0xb8 0xbe 0x43 0x5c ST r18, [sp, #-28]; SUB r15, r17, r18
+.delay_slot
+ 12464 0xfd 0x86 0xb0 0xc2 0x11 0x5c ST r1, [sp, #-20]; NE r16, r1, r16
+.delay_slot
+ 12470 0xfe 0x42 0xb7 0xef 0x15 0x5c ST r16, [sp, #-16]; LT r27, r15, r24
+.delay_slot
+ 12476 0x16 0x22 0xf1 0x98 SUB r17, r24, r15
+.delay_slot
+ 12480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x1e 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV
+.return_address
+ 12496 0xfd 0xd2 0x20 0x40 0x02 0x2c LDA r20, [sp, #-20]; MOVX r16, #0
+ 12502 0xe7 0xc5 0x58 0x48 0x43 0x2c LDA.u8 r17, [p7], #3; SUB r18, r16, r2
+ 12508 0x07 0xee 0x6a 0x98 LDA.u8 r19, [p7], #-2
+ 12512 0x07 0xec 0x31 0x18 LDA r1, [sp, #-20]
+ 12516 0x00 0x00 NOPX
+ 12518 0x00 0x00 NOPX
+ 12520 0x00 0x00 NOPX
+ 12522 0x13 0xe9 0x46 0x98 XOR r20, r15, r20
+ 12526 0x15 0x37 0x0a 0x98 LT r27, r20, r16
+ 12530 0xfd 0x4e 0xb8 0xc6 0x63 0x5c ST r19, [sp, #-24]; SUB r17, r17, r19
+.no_stack_arguments
+ 12536 0xfc 0x46 0xb0 0x00 0x07 0xa6 0x00 0x00 0x41 0x3a ST r17, [sp, #-32]; JL #15664
+.delay_slot
+ 12546 0x10 0xa9 0x22 0x18 SEL.EQZ r20, r2, r18, r27
+.delay_slot
+ 12550 0x14 0x77 0x0a 0x98 LT r27, r17, r16
+.delay_slot
+ 12554 0x14 0x25 0x11 0x98 SUB r18, r16, r17
+.delay_slot
+ 12558 0x15 0x26 0x70 0x18 EXTEND.s16 r19, r20
+.delay_slot
+ 12562 0x00 0x2c 0xf0 0x00 0x24 0x41 0x22 0x3d 0x98 0x09 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1
+.return_address
+ 12576 0xfc 0x0e 0x20 0x3f 0x37 0xc8 0x00 0x42 0x58 0xba LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66
+ 12586 0xfd 0xc2 0x20 0x01 0x80 0x08 0x29 0xfc 0x58 0xba LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508
+ 12596 0xfc 0xda 0x20 0x00 0x60 0x88 0x88 0x02 0x58 0xba LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2
+ 12606 0xe1 0x45 0x50 0x00 0x51 0x0b 0x88 0x17 0x58 0xba LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23
+ 12616 0xfd 0x56 0x20 0x3f 0x27 0x48 0x80 0x20 0x58 0xba LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32
+ 12626 0xfe 0x7a 0x20 0x01 0x70 0xcb 0x48 0x01 0x58 0xba LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1
+ 12636 0xe9 0xc0 0x80 0x05 0xd0 0x0b 0xef 0xc0 0x58 0xba MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64
+ 12646 0x16 0x28 0x21 0x98 SUB r20, r24, r2
+ 12650 0x10 0xc7 0x06 0x98 XOR r3, r3, r16
+ 12654 0x1e 0xf1 0x50 0x36 0x02 0x24 LT r27, r3, r24; ADD.NC r0, r22, #2
+ 12660 0x15 0x28 0x4b 0x3f 0xf5 0x64 SEL.EQZ r20, r2, r20, r27; MOV r22, #-3
+ 12666 0x78 0xe1 0xf1 0x20 0x1d 0x64 MUL r3, r15, r16; MOV r2, #7
+ 12672 0x15 0x28 0x70 0x18 EXTEND.s16 r20, r20
+ 12676 0x08 0x00 0x90 0xa0 0x01 0x24 AND r0, r1, r0; ADD.NC r1, r0, #1
+ 12682 0x0c 0xe7 0xbd 0xb4 0x01 0x24 LSHL r19, r1, r19; ADD.NC r27, r20, #1
+ 12688 0x7d 0x0d 0xb0 0xa3 0x02 0xa4 LSHL r20, r15, r6; ADD.NC r1, r3, r0
+ 12694 0x09 0xcd 0xb0 0x35 0xff 0x24 LSHL r7, r1, r6; ADD.NC r0, r21, #-1
+ 12700 0x16 0xcd 0x0f 0x98 MUL r6, r27, r16
+ 12704 0x13 0xdf 0x1f 0x98 MUL r15, r15, r17
+ 12708 0x9d 0x6b 0xf9 0xb3 0xff 0x24 MUL r21, r19, r21; ADD.NC r19, r19, #-1
+ 12714 0x11 0x37 0x07 0x98 EQ r27, r4, r16
+ 12718 0xff 0xd6 0x37 0x90 0xdf 0x5c ST r21, [p7], #-4; MUL r4, r15, r6
+ 12724 0x17 0x38 0x52 0x18 SEL.EQZ r28, r28, r5, r27
+ 12728 0x11 0x25 0x2d 0x98 LSHL r18, r4, r18
+ 12732 0xe5 0x4a 0x38 0xc8 0x3f 0x5c ST r18, [p7], m1; MUL r18, r17, r1
+ 12738 0xf9 0xf2 0x3f 0x72 0xfb 0x5c ST r28, [p7], #-16; LSHL r28, r30, r23
+ 12744 0xed 0xf2 0x39 0x70 0x1f 0x5c ST r28, [p7], #24; MUL r28, r18, r0
+ 12750 0xe3 0xce 0x39 0xce 0xfb 0x5c ST r19, [p7], #4; LSHL r19, r19, r23
+ 12756 0xe7 0x35 0xb9 0xb3 0xea 0xa4 LSHL r28, r28, r26; ADD.NC r19, r19, r29
+ 12762 0xe3 0xfe 0x39 0x7b 0x5b 0x5c ST r31, [p7], #4; LSHL r30, r18, r26
+ 12768 0x94 0x21 0xf9 0x33 0xe2 0xa4 MUL r16, r18, r16; ADD.NC r18, r19, r28
+ 12774 0xe3 0x82 0x3f 0xf3 0x04 0x5c ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27
+ 12780 0x10 0xff 0x6d 0x98 LSHL r31, r3, r22
+ 12784 0xf0 0x66 0x39 0xbf 0xff 0x24 SUB r1, r30, r19; ADD.NC r19, r31, #-1
+ 12790 0xe3 0x86 0x38 0xc6 0xdb 0x5c ST r1, [p7], #4; LSHL r17, r17, r22
+ 12796 0xc5 0xa4 0x39 0x31 0xff 0x24 SUB r22, r24, r18; ADD.NC r18, r17, #-1
+ 12802 0xe3 0xda 0x33 0xdb 0xc3 0x5c ST r22, [p7], #4; SUB r22, r7, r30
+ 12808 0xe3 0xca 0x38 0x43 0x5b 0x5c ST r18, [p7], #4; LSHL r16, r16, r26
+ 12814 0xe3 0x9e 0x39 0xfc 0x5b 0x5c ST r7, [p7], #4; LSHL r31, r19, r2
+ 12820 0xe3 0xce 0x3e 0xda 0xc1 0x5c ST r19, [p7], #4; ADD r22, r29, r22
+ 12826 0x3c 0x20 0x1e 0xbf 0xf2 0xa4 ADD r16, r7, r16; ADD.NC r29, r31, r30
+ 12832 0xe3 0xda 0x38 0x43 0xa3 0x5c ST r22, [p7], #4; SUB r16, r16, r29
+ 12838 0xe3 0xc2 0x30 0x1f 0x6d 0x6e 0x0f 0xff 0x59 0x3a ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1
+ 12848 0xe3 0xca 0x3e 0x6a 0x81 0x5c ST r18, [p7], #4; ADD r26, r28, r20
+ 12854 0xe3 0xea 0x3a 0x52 0xc3 0x5c ST r26, [p7], #4; SUB r20, r20, r22
+ 12860 0x08 0x11 0x07 0x1e 0x71 0xab 0x08 0xb2 0x6d 0x10 0x08 0x76 MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64
+ 12872 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4
+ 12876 0xe3 0xc6 0x38 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r16, r23
+ 12882 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20
+ 12888 0xe3 0xc6 0x39 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r18, r23
+ 12894 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20
+ 12900 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4
+ 12904 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4
+ 12908 0x0f 0x0a 0x11 0x98 ST r16, [p7], m0
+ 12912 0x07 0x06 0x0a 0x98 LDA.u8 r16, [p7]
+ 12916 0x00 0x00 NOPX
+ 12918 0x00 0x00 NOPX
+ 12920 0x00 0x00 NOPX
+ 12922 0x00 0x00 NOPX
+ 12924 0x00 0x00 NOPX
+ 12926 0x00 0x00 NOPX
+ 12928 0x80 0x19 0x50 0x00 0x01 0x84 JZ r16, #12960
+.delay_slot
+ 12934 0x19 0x3b 0x60 0xf8 MOV vaddSign0, crMCDEn
+.delay_slot
+ 12938 0xff 0x7f 0x09 0xa0 0x00 0x44 MOVXM r19, #-8454144
+.delay_slot
+.swstall delay_slot
+ 12944 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12946 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 12948 0x00 0x00 NOPX
+ 12950 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x00 0x26 0x01 0x7a NOPA; NOPS; MOVX r19, #0
+.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608
+ 12960 0xff 0x87 0x20 0x00 0x01 0xf0 0x32 0x34 0x10 0xba LDA lr, [sp, #-4]; MOVXM p0, #509032
+ 12970 0x00 0xc0 0x50 0x04 0xe2 0xd4 LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19
+ 12976 0xfe 0x83 0x21 0x02 0xe9 0x54 LDA p0, [sp, #-12]; MOV dj0, #186
+ 12982 0xff 0x3e 0x20 0x01 0x25 0xd4 LDA r15, [sp, #-8]; VMOV bmll0, x0
+ 12988 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+ 12994 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 12996 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 12998 0x07 0x02 0x17 0x18 ST.s16 r16, [p7, dj0]
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13002 0x05 0x00 0x0f 0x70 0x41 0xe4 RET lr; MOV crRnd, r16
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13008 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13012 0x1f 0x60 0xc0 0xf8 MOV p7, p0
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13016 0x1c 0x01 0x01 0xb8 VEXTRACT.16 r16, x0, #0, vaddSign0
+.delay_slot
+.swstall delay_slot
+ 13020 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 13022 0x00 0x00 NOPX
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.function_start
+ 13024 0x1c 0x56 0xc0 0xf8 MOV r17, p3
+ 13028 0x20 0x93 0xde 0x01 0xa9 0x54 LDA p1, [p1]; MOV m7, #106
+ 13034 0x00 0x83 0xd6 0xd1 0x02 0x14 LDA p0, [p0]; ADD.NC p3, r17, #2
+ 13040 0x03 0xe8 0x8a 0x98 LDA.u8 r4, [p3], m7
+ 13044 0x03 0xfd 0x46 0x98 LDA dj2, [p3], #-4
+ 13048 0x03 0x3d 0x26 0x98 LDA dn2, [p3], #12
+ 13052 0x03 0xff 0x46 0x98 LDA dj6, [p3], #-4
+ 13056 0x03 0x2f 0x26 0x98 LDA dn6, [p3], #8
+ 13060 0x03 0x2d 0x06 0x98 LDA m2, [p3], #8
+ 13064 0x03 0xfc 0x46 0x98 LDA dj0, [p3], #-4
+ 13068 0x03 0x3c 0x26 0x98 LDA dn0, [p3], #12
+ 13072 0x03 0xfe 0x46 0x98 LDA dj4, [p3], #-4
+ 13076 0x03 0x2e 0x26 0x98 LDA dn4, [p3], #8
+ 13080 0x03 0x2c 0x06 0x98 LDA m0, [p3], #8
+ 13084 0x03 0xfc 0xc6 0x98 LDA dj1, [p3], #-4
+ 13088 0x03 0x3c 0xa6 0x98 LDA dn1, [p3], #12
+ 13092 0x03 0xfe 0xc6 0x98 LDA dj5, [p3], #-4
+ 13096 0x03 0x2e 0xa6 0x98 LDA dn5, [p3], #8
+ 13100 0x03 0x2c 0x86 0x98 LDA m1, [p3], #8
+ 13104 0x03 0xff 0xc6 0x98 LDA dj7, [p3], #-4
+ 13108 0x03 0x2f 0xa6 0x98 LDA dn7, [p3], #8
+ 13112 0x65 0xf0 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m7, [p3], #8; MOVXM p4, #509032
+ 13122 0x80 0x98 0x58 0xc5 0x81 0xd4 LDA.s8 r6, [p4]; MOV p4, p1
+ 13128 0x1b 0x0f 0x10 0xb8 MOV m3, #-120
+ 13132 0x80 0x85 0x70 0x3b 0x68 0x00 0x20 0x6a 0x60 0x00 0x58 0xb6 VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0
+ 13144 0x7f 0xb8 0xd0 0x38 0xe9 0x04 0x2d 0xe0 0x10 0x0b 0x62 0x09 0x60 0x7e LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128
+ 13158 0x65 0xb4 0xd1 0x0c 0x4b 0x02 0x80 0x90 0x72 0xba LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2
+ 13168 0x6d 0x30 0xd1 0xab 0x90 0x03 0xe1 0xc0 0x7e 0xba LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1
+ 13178 0x79 0x0a 0xd1 0xf0 0xf4 0x02 0x07 0x90 0x5e 0xba LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112
+ 13188 0x71 0x1e 0x50 0x00 0x82 0x2c LDA.s16 r7, [p3], m4; MOVX r0, #16
+ 13194 0x69 0xc0 0xd6 0x10 0x4b 0x00 0x00 0x0c 0x79 0xf8 0x10 0x76 LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296
+ 13206 0x72 0x92 0xd2 0x10 0x4b 0x00 0x00 0x0d 0xba 0x28 0x10 0x76 LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392
+ 13218 0x0b 0x16 0x84 0x61 0x05 0xb4 VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0
+ 13224 0x1b 0x00 0x8a 0xf8 VMOV cml3, cml0
+ 13228 0x60 0x96 0xd0 0x00 0x00 0x0d 0xb2 0x48 0x10 0xba LDA r5, [p3]; MOVXM p3, #13456
+ 13238 0x00 0x2c 0xf0 0x00 0x14 0x0a 0x8e 0x01 0xa8 0xba NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0
+ 13248 0x07 0x91 0x00 0x00 0x20 0x01 0x5b 0x00 0x36 0x08 0x0e 0xb9 0x78 0x00 0x00 0xe1 MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV
+ 13264 0x00 0x2c 0xf0 0x00 0x20 0x10 0x4b 0x0d 0xd4 0x02 0x0e 0x03 0xac 0x63 0x6a 0x0b NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13280 0x40 0xa3 0xd0 0x00 0x25 0x10 0x4b 0x04 0x2f 0xda 0xb9 0x3f 0xcc 0x48 0x1a 0x0b LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272
+.loop_nesting 1
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13296 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13306 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13316 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13326 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13330 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13338 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13346 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3
+ 13350 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17
+ 13358 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17
+ 13366 0x00 0x2c 0xf0 0x00 0x10 0x01 0x18 0x41 0x6e 0xba NOPA; NOPB; VSHIFT x4, x6, x1, r0
+ 13376 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x63 0x6a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 13392 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17
+.loop_nesting 0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13408 0x03 0x0c 0xf4 0x73 0x90 0x02 0x84 0x81 0x6e 0xba PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13418 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13426 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13434 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13438 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13446 0x00 0x2c 0xf4 0xb0 0x8e 0xc2 0x8a 0x36 0xa1 0x4a NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17
+.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432
+.loop_nesting 1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13456 0x3e 0x1e 0x8b 0x12 0x1d 0xb4 VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13462 0x00 0x00 0x01 0xb7 0x54 0x02 0x8b 0x92 0xe1 0x5a MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13472 0x80 0x85 0x70 0x00 0x01 0x8f 0x4f 0x02 0x88 0x56 0xe1 0x46 VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13484 0x1d 0x72 0x7f 0x98 ADD.NC lc, r4, #-1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13488 0x00 0x1d 0x9b 0x98 VLDA x6, [p0], #64
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13492 0x38 0x1c 0x74 0x18 VLDB x1, [p0], #64
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13496 0x38 0x58 0xb4 0x18 VLDB.3D x2, [p0], d2
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13500 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13504 0x0b 0x10 0x16 0x18 VCONV.bf16.fp32 x6, cml0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13508 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13512 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13516 0x51 0x42 0x60 0x02 0xa8 0x36 0x70 0x02 VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13524 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13528 0x47 0x52 0x60 0x01 0x80 0x45 0x70 0x02 VST.3D x10, [p2], d1; VMOV cml3, cml0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13536 0x04 0x1c 0x07 0x46 0x8c 0x6d 0x41 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13544 0x02 0x30 0x82 0xc6 0x89 0x03 0x41 0x62 VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528
+.loop_nesting 2
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 13552 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13562 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13572 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13582 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13586 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13594 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13602 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3
+ 13606 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17
+ 13614 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17
+ 13622 0x00 0x2c 0xf4 0x61 0x05 0x94 NOPA; VSHIFT x4, x6, x1, r0
+ 13628 0x8c 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r17
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608
+.end_of_loop
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 13632 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17
+.loop_nesting 1
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13648 0x03 0x0c 0xf8 0xe7 0x20 0x04 0x27 0x02 0x84 0x81 0x68 0xb6 PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13660 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13668 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13676 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13680 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13688 0x04 0xb0 0x8e 0xc6 0x8a 0x36 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17
+.loop_nesting 0
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13696 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3
+ 13700 0x8b 0x92 0xe1 0x48 VMAC.f dm3, dm4, x9, x7, r17
+ 13704 0x88 0x56 0xe1 0x48 VMAC.f dm0, dm2, x11, x7, r17
+ 13708 0x00 0x00 NOPX
+ 13710 0x00 0x00 NOPX
+ 13712 0x00 0x00 NOPX
+ 13714 0x00 0x00 NOPX
+ 13716 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3
+ 13720 0x62 0x02 0xc0 0x50 0x00 0x5c VCONV.bf16.fp32 x6, cml0; RET lr
+.delay_slot
+ 13726 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0
+.delay_slot
+ 13730 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5
+.delay_slot
+ 13734 0x1d 0x50 0x6c 0xf8 VMAX_LT.bf16 x10, r16, x10, x0
+.delay_slot
+ 13738 0x0a 0x8a 0x13 0x18 VST x8, [p2], m4
+.delay_slot
+ 13742 0x0a 0x3a 0x93 0x18 VST.3D x10, [p2], d1
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0
+
+.text_segment PM 13760
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function_start
+ 13760 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992
+ 13766 0x80 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p4]; MOV r0, r15
+ 13772 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128
+ 13778 0xff 0x3a 0xb0 0x02 0x2d 0x70 0x70 0x02 ST r14, [sp, #-8]; MOV r17, CORE_ID
+ 13786 0xff 0xb6 0xb0 0x01 0xa8 0xf0 0x70 0x02 ST r13, [sp, #-4]; MOV r13, lr
+ 13794 0x0f 0xec 0x1d 0x98 ST p0, [sp, #-20]
+ 13798 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12]
+ 13802 0xfe 0x02 0xb0 0x01 0xca 0x60 0x70 0x02 ST r0, [sp, #-16]; MOV r14, p2
+ 13810 0x80 0x1b 0x38 0x40 0x01 0x84 JNZ r16, #13936
+.delay_slot
+ 13816 0x1b 0xd6 0xc0 0xf8 MOV r15, p3
+.delay_slot
+ 13820 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 13824 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 13828 0x00 0x07 0xc6 0xc8 0xa0 0x44 MOVXM p3, #509008
+.delay_slot
+ 13834 0x0b 0x06 0x31 0x98 ST r17, [p3]
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13838 0xf0 0x91 0x60 0x00 0x01 0xf0 0xb2 0x34 0x11 0x3a MOVS p7, p1; MOVXM p1, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13848 0x20 0xc0 0xe0 0x88 0x8b 0x00 0x01 0xf0 0xb2 0x32 0x10 0x76 ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13860 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 13862 0x00 0x18 0x20 0x00 0x01 0x04 JL #12352
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13868 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13870 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13872 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 13876 0x10 0x20 0x05 0x18 MOVX r16, #1
+.delay_slot
+ 13880 0x20 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p1]; NOPM
+.return_address
+ 13888 0x33 0x91 0x60 0x01 0x33 0x82 0x00 0x02 MOVS p1, p7; ADD.NC p2, r14, #8
+ 13896 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2]
+ 13900 0x44 0xc3 0x50 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA.u16 r16, [p2, #4]; MOVXM p2, #509024
+ 13910 0x00 0x00 NOPX
+ 13912 0x00 0x1b 0x40 0x00 0x00 0x84 J #13952
+.delay_slot
+ 13918 0x00 0x07 0xc6 0xc8 0xb0 0x44 MOVXM p3, #509016
+.delay_slot
+.swstall delay_slot
+ 13924 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 13926 0x00 0x00 NOPX
+.delay_slot
+ 13928 0x0b 0x06 0x31 0x98 ST r17, [p3]
+.delay_slot
+ 13932 0x0a 0x06 0x11 0x98 ST r16, [p2]
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176
+ 13936 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0xb2 0x2c 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192
+ 13952 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12
+ 13956 0x5f 0xee 0xd0 0x00 0x01 0xf2 0x32 0x28 0x10 0xba LDA r27, [p2], #-4; MOVXM p4, #509008
+ 13966 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4
+ 13970 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4
+ 13974 0x02 0x46 0x56 0x98 LDA r18, [p2, #16]
+ 13978 0x00 0x00 NOPX
+ 13980 0x00 0x00 NOPX
+ 13982 0x00 0x00 NOPX
+ 13984 0x00 0x00 NOPX
+ 13986 0x00 0x00 NOPX
+ 13988 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27
+ 13992 0x0a 0x06 0x11 0x98 ST r16, [p2]
+ 13996 0x17 0xe0 0xfd 0x18 MOVX r16, #-1
+ 14000 0x00 0x00 NOPX
+ 14002 0x00 0x00 NOPX
+ 14004 0x00 0x00 NOPX
+ 14006 0x14 0x93 0x08 0x18 ACQ r18, r16
+ 14010 0x00 0x2f 0x00 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba MOVA r15, #1; MOVXM p7, #508992
+ 14020 0x06 0x00 0x28 0x2b 0xc1 0xe4 MOVX r24, #0; MOV r16, sp
+ 14026 0x18 0x68 0x5a 0x18 ADD.NC p0, r16, #-76
+ 14030 0xfd 0xd3 0x27 0x29 0x81 0xd4 LDA p5, [sp, #-20]; MOV r14, p2
+ 14036 0x04 0x06 0x36 0x98 LDA r17, [p4]
+ 14040 0x60 0xc2 0xd0 0x00 0x01 0xf1 0xb3 0xe0 0x10 0xba LDA r16, [p3]; MOVXM p3, #509888
+ 14050 0x07 0x06 0x56 0x98 LDA r18, [p7]
+ 14054 0x00 0x00 NOPX
+ 14056 0x00 0x00 NOPX
+ 14058 0x00 0x00 NOPX
+ 14060 0x05 0x06 0x76 0x98 LDA r19, [p5]
+ 14064 0x00 0x00 NOPX
+ 14066 0x14 0x61 0x0f 0x98 MUL r16, r17, r16
+ 14070 0x14 0xa2 0x07 0x18 ADD r17, r18, #1
+ 14074 0x14 0x20 0xfd 0x98 LSHL r16, r16, r15
+.no_stack_arguments
+ 14078 0x00 0x19 0x70 0x00 0x01 0x04 JL #13024
+.delay_slot
+ 14084 0x0f 0x06 0x31 0x98 ST r17, [p7]
+.delay_slot
+ 14088 0x18 0x49 0xc1 0x58 ADD.NC dn0, r19, r16
+.delay_slot
+ 14092 0x0f 0xb4 0x25 0x98 ST dn0, [sp, #-76]
+.delay_slot
+ 14096 0x0f 0xbb 0x15 0x98 ST r24, [sp, #-72]
+.delay_slot
+ 14100 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xdf 0x8a 0xc1 0x36 NOPA; NOPB; ST r24, [sp, #-68]; NOPX
+.return_address
+ 14112 0x1a 0x67 0x0a 0x18 ADD.NC p2, r14, #20
+ 14116 0x02 0x06 0x16 0x98 LDA r16, [p2]
+ 14120 0x00 0x00 NOPX
+ 14122 0x00 0x00 NOPX
+ 14124 0x00 0x00 NOPX
+ 14126 0x00 0x00 NOPX
+ 14128 0x00 0x00 NOPX
+ 14130 0x00 0x00 NOPX
+ 14132 0x14 0x10 0xf8 0x18 REL r16, r15
+ 14136 0x5c 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x30 0x10 0xba LDA r16, [p2, #-8]; MOVXM p1, #509024
+ 14146 0x01 0x06 0x56 0x98 LDA r18, [p1]
+ 14150 0x07 0x06 0x36 0x98 LDA r17, [p7]
+ 14154 0x07 0xf4 0x99 0x18 LDA p1, [sp, #-12]
+ 14158 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8]
+ 14162 0x00 0x00 NOPX
+ 14164 0x00 0x00 NOPX
+ 14166 0x13 0xe1 0x01 0x98 SUB r16, r15, r16
+ 14170 0x0a 0xe6 0x11 0x98 ST r16, [p2, #-8]
+ 14174 0x14 0x61 0x28 0x98 NE r16, r17, r18
+ 14178 0x80 0x1b 0xc0 0x40 0x01 0x84 JNZ r16, #14208
+.delay_slot
+ 14184 0x10 0x30 0x01 0x18 MOVX r24, #0
+.delay_slot
+.swstall delay_slot
+ 14188 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14190 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14192 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14194 0x00 0x00 NOPX
+ 14196 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+ 14208 0xff 0xb6 0x2e 0xed 0x41 0xd4 LDA r13, [sp, #-4]; MOV lr, r13
+ 14214 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16]
+ 14218 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 14222 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128
+.delay_slot
+.swstall delay_slot
+ 14228 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14230 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14232 0x00 0x00 NOPX
+.delay_slot
+ 14234 0x1f 0x62 0xc0 0xf8 MOV p7, p1
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+
+.text_segment PM 14240
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+.function_start
+ 14240 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64
+ 14246 0xff 0x73 0xb0 0x00 0x01 0xf3 0xb2 0x20 0x11 0x3a ST p7, [sp, #-8]; MOVXM p7, #508992
+ 14256 0xe0 0xc2 0xd7 0xff 0x1d 0x82 0x2d 0x70 0x72 0xba LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID
+ 14266 0x0f 0xf6 0x1d 0x98 ST p4, [sp, #-12]
+ 14270 0x0f 0xf1 0x1d 0x98 ST p2, [sp, #-16]
+ 14274 0xfd 0x87 0xb0 0x03 0xb3 0x60 0x70 0x02 ST lr, [sp, #-20]; MOV p7, p3
+ 14282 0x00 0x00 NOPX
+ 14284 0x00 0x00 NOPX
+ 14286 0x00 0x00 NOPX
+ 14288 0x80 0x1c 0xb0 0x40 0x01 0x84 JNZ r16, #14688
+.delay_slot
+ 14294 0x0f 0xe8 0x1d 0x98 ST p0, [sp, #-24]
+.delay_slot
+ 14298 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17
+.delay_slot
+ 14302 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2
+.delay_slot
+ 14306 0x00 0x07 0xcc 0xc8 0xa0 0x44 MOVXM p6, #509008
+.delay_slot
+ 14312 0x0e 0x06 0x31 0x98 ST r17, [p6]
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 14316 0x00 0x20 0x00 0x00 0x01 0xf3 0x32 0x34 0x10 0xba MOVA r0, #1; MOVXM p6, #509032
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 14326 0xc0 0xc0 0xe6 0x84 0x8b 0x00 0x01 0xf0 0x32 0x32 0x10 0x76 ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14338 0x00 0x01 0x00 0x00 0x01 0xf0 0xb3 0x00 0x10 0xba MOVA r1, #0; MOVXM p1, #509440
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 14348 0x00 0x05 0x60 0x00 0x01 0x04 JL #2752
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14354 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14356 0x00 0x00 NOPX
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14358 0x10 0x20 0x31 0x18 MOVX r16, #12
+.delay_slot
+ 14362 0x00 0x2c 0xf0 0x40 0x0a 0x2c NOPA; MOVX r16, #1
+.delay_slot
+ 14368 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV
+.return_address
+ 14384 0x04 0x00 0xa1 0x01 0x01 0x64 MOVX r16, #1; MOV dj0, #64
+ 14390 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0]
+ 14394 0x00 0x00 NOPX
+ 14396 0x00 0x00 NOPX
+ 14398 0x00 0x00 NOPX
+ 14400 0x00 0x00 NOPX
+ 14402 0x00 0x00 NOPX
+ 14404 0x00 0x00 NOPX
+ 14406 0x14 0xa1 0x07 0x98 EQ r16, r18, r16
+ 14410 0x80 0x1c 0x68 0x40 0x01 0x84 JNZ r16, #14544
+.delay_slot
+ 14416 0x1c 0x5e 0xc0 0xf8 MOV r17, p7
+.delay_slot
+ 14420 0x18 0xc8 0x90 0x18 ADD.NC dc0, r17, #32
+.delay_slot
+.swstall delay_slot
+ 14424 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14426 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14428 0x00 0x00 NOPX
+ 14430 0x90 0x1c 0x58 0x40 0x01 0x84 JNZ r18, #14512
+.delay_slot
+ 14436 0x00 0x07 0xc8 0x2c 0x00 0x44 MOVXM r16, #509440
+.delay_slot
+ 14442 0x10 0x22 0x01 0x18 MOVX r17, #0
+.delay_slot
+.swstall delay_slot
+ 14446 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14448 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14450 0x00 0x00 NOPX
+.no_stack_arguments
+ 14452 0xfc 0xe3 0xb0 0x00 0x05 0x70 0x00 0x00 0x41 0x3a ST p6, [sp, #-28]; JL #11136
+.delay_slot
+ 14462 0x00 0x07 0xcc 0xca 0x80 0x44 MOVXM p6, #509248
+.delay_slot
+ 14468 0x00 0x07 0xc0 0xca 0x80 0x44 MOVXM p0, #509248
+.delay_slot
+ 14474 0x19 0x61 0x80 0xf8 MOV p1, dc0
+.delay_slot
+.swstall delay_slot
+ 14478 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV
+.return_address
+ 14496 0xc0 0xc6 0xd0 0x00 0x01 0xf2 0x0b 0x00 0x10 0xba LDA r17, [p6]; MOVXM r16, #509440
+ 14506 0xfc 0xe3 0x20 0x00 0x20 0x3c LDA p6, [sp, #-28]; NOPB
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272
+ 14512 0x00 0x00 NOPX
+ 14514 0x00 0x1c 0x80 0x00 0x00 0x84 J #14592
+.delay_slot
+.swstall delay_slot
+ 14520 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14522 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14524 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14526 0x00 0x00 NOPX
+.delay_slot
+ 14528 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xb6 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304
+.no_stack_arguments
+ 14544 0xfc 0xe3 0xb0 0x00 0x05 0x84 0x00 0x00 0x41 0x3a ST p6, [sp, #-28]; JL #11296
+.delay_slot
+ 14554 0x00 0x07 0xcc 0xcb 0x00 0x44 MOVXM p6, #509312
+.delay_slot
+ 14560 0x00 0x07 0xc0 0xcb 0x00 0x44 MOVXM p0, #509312
+.delay_slot
+ 14566 0x19 0x61 0x80 0xf8 MOV p1, dc0
+.delay_slot
+.swstall delay_slot
+ 14570 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14572 0x00 0x01 0x67 0x98 NOPA
+.return_address
+ 14576 0xc0 0xc6 0xd0 0x00 0x01 0xf2 0x0b 0x00 0x10 0xba LDA r17, [p6]; MOVXM r16, #509440
+ 14586 0xfc 0x93 0x20 0x00 0x20 0x3c LDA p1, [sp, #-28]; NOPB
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352
+ 14592 0x1b 0x68 0x05 0x98 ADD.NC p3, r16, #11
+ 14596 0x6f 0xcd 0x50 0x00 0x01 0xf3 0x32 0x28 0x10 0xba LDA.u8 r19, [p3], #7; MOVXM p6, #509008
+ 14606 0x06 0x06 0x56 0x98 LDA r18, [p6]
+ 14610 0x03 0x1e 0xba 0x98 LDA.u16 r21, [p3], #2
+ 14614 0x03 0x06 0x1a 0x98 LDA.u16 r16, [p3]
+ 14618 0x00 0x00 NOPX
+ 14620 0x03 0x16 0x9a 0x98 LDA.u16 r20, [p3, #2]
+ 14624 0x00 0x00 NOPX
+ 14626 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 14628 0x00 0x07 0xc0 0xc8 0x88 0x44 MOVXM p0, #508996
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 14634 0x14 0xe7 0x5f 0x98 MUL r19, r19, r21
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14638 0x00 0xce 0x30 0x00 0x01 0xf1 0x32 0x2e 0x11 0x3a ST r19, [p0]; MOVXM p2, #509020
+ 14648 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16
+ 14652 0x14 0x63 0x2f 0x98 MUL r17, r17, r18
+ 14656 0x15 0x21 0x0f 0x98 MUL r16, r20, r16
+ 14660 0x00 0x2c 0xf2 0x06 0x31 0x80 0x01 0xf3 0x32 0x30 0x10 0x76 NOPA; ST r17, [p2]; MOVXM p6, #509024
+ 14672 0x00 0x2c 0xf0 0x00 0x26 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448
+ 14688 0x00 0x07 0xc0 0xc8 0x90 0x44 MOVXM p0, #509000
+ 14694 0x00 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r16, [p0]; MOVXM p2, #508992
+ 14704 0x40 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x26 0x10 0xba LDA r17, [p2]; MOVXM p6, #509004
+ 14714 0x06 0x06 0x56 0x98 LDA r18, [p6]
+ 14718 0x00 0x00 NOPX
+ 14720 0x00 0x00 NOPX
+ 14722 0x00 0x00 NOPX
+ 14724 0x00 0x00 NOPX
+ 14726 0x80 0x1c 0xf8 0x40 0x01 0x84 JNZ r16, #14832
+.delay_slot
+ 14732 0x8c 0x40 0xe9 0xb0 0x01 0x24 ADD r17, r17, #1; ADD.NC r19, r16, #1
+.delay_slot
+ 14738 0x14 0xa4 0x07 0x18 ADD r18, r18, #1
+.delay_slot
+ 14742 0x0a 0x06 0x31 0x98 ST r17, [p2]
+.delay_slot
+ 14746 0x0e 0x06 0x51 0x98 ST r18, [p6]
+.delay_slot
+ 14750 0x08 0x06 0x71 0x98 ST r19, [p0]
+ 14754 0x07 0xf6 0x31 0x18 LDA r17, [sp, #-12]
+ 14758 0x00 0x00 NOPX
+ 14760 0x00 0x00 NOPX
+ 14762 0x00 0x00 NOPX
+ 14764 0x00 0x00 NOPX
+ 14766 0x00 0x00 NOPX
+ 14768 0x00 0x00 NOPX
+ 14770 0x1e 0x68 0x86 0x18 ADD.NC p6, r17, #12
+ 14774 0x06 0xff 0x76 0x98 LDA r27, [p6], #-4
+ 14778 0x06 0xfe 0x36 0x98 LDA r17, [p6], #-4
+ 14782 0x06 0xfe 0x56 0x98 LDA r18, [p6], #-4
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 14786 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 14788 0x06 0x46 0x36 0x98 LDA r17, [p6, #16]
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14792 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14794 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14796 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14798 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14800 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14804 0xc0 0xc6 0x3f 0xc1 0xfa 0x5c ST r17, [p6]; MOVX r16, #-1
+ 14810 0x00 0x00 NOPX
+ 14812 0x00 0x00 NOPX
+ 14814 0x00 0x00 NOPX
+ 14816 0x00 0x00 NOPX
+ 14818 0x00 0x2c 0xf0 0x00 0x24 0x53 0x08 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; ACQ r17, r16; NOPM
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592
+ 14832 0x00 0x00 NOPX
+ 14834 0x00 0x00 NOPX
+ 14836 0x00 0x00 NOPX
+ 14838 0x07 0xf5 0x19 0x18 LDA p2, [sp, #-12]
+ 14842 0x07 0xe8 0x19 0x18 LDA p0, [sp, #-24]
+.no_stack_arguments
+ 14846 0x00 0x08 0xb8 0x00 0x01 0x04 JL #4464
+.delay_slot
+ 14852 0x00 0x07 0xc6 0xcc 0x00 0x44 MOVXM p3, #509440
+.delay_slot
+.swstall delay_slot
+ 14858 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14860 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14862 0x00 0x00 NOPX
+.delay_slot
+ 14864 0x00 0x2c 0xf0 0x00 0x26 0x88 0x8b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV
+.return_address
+ 14880 0xfe 0x42 0x20 0x00 0x01 0xf0 0xb2 0x24 0x10 0xba LDA r16, [sp, #-16]; MOVXM p1, #509000
+ 14890 0x20 0xc6 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r17, [p1]; MOVXM p1, #508996
+ 14900 0x01 0x06 0x56 0x98 LDA r18, [p1]
+ 14904 0x00 0x00 NOPX
+ 14906 0x00 0x00 NOPX
+ 14908 0x00 0x00 NOPX
+ 14910 0x00 0x00 NOPX
+ 14912 0x00 0x00 NOPX
+ 14914 0x00 0x00 NOPX
+ 14916 0x14 0x63 0x28 0x98 NE r17, r17, r18
+ 14920 0x88 0x1d 0xd0 0x40 0x01 0x84 JNZ r17, #15264
+.delay_slot
+.swstall delay_slot
+ 14926 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14928 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14930 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14932 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 14934 0x00 0x00 NOPX
+ 14936 0x08 0x02 0x80 0x3f 0x17 0xe8 0xb4 0x03 0x08 0xba MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12
+ 14946 0x3f 0xee 0xd0 0x00 0x01 0xf0 0x32 0x2e 0x10 0xba LDA r27, [p1], #-4; MOVXM p0, #509020
+ 14956 0x01 0xfe 0x56 0x98 LDA r18, [p1], #-4
+ 14960 0x01 0xfe 0x76 0x98 LDA r19, [p1], #-4
+ 14964 0x01 0x56 0x96 0x98 LDA r20, [p1, #20]
+ 14968 0x00 0x00 NOPX
+ 14970 0x00 0x00 NOPX
+ 14972 0x00 0x00 NOPX
+ 14974 0x00 0x00 NOPX
+ 14976 0x00 0x00 NOPX
+ 14978 0x14 0xe5 0x22 0x18 SEL.EQZ r18, r19, r18, r27
+ 14982 0x20 0xca 0x30 0x40 0x0a 0x5c ST r18, [p1]; MOVX r16, #1
+ 14988 0x00 0x00 NOPX
+ 14990 0x00 0x00 NOPX
+ 14992 0x00 0x00 NOPX
+ 14994 0x00 0x00 NOPX
+ 14996 0x15 0x13 0x18 0x18 ACQ r20, r17
+ 15000 0x00 0x00 NOPX
+ 15002 0x00 0x00 NOPX
+ 15004 0x00 0x00 NOPX
+ 15006 0x00 0x06 0x76 0x98 LDA r19, [p0]
+ 15010 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0]
+ 15014 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 15016 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 15018 0x06 0x5c 0x1e 0x98 LDA p0, [p6], #20
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15022 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15024 0x00 0x00 NOPX
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15026 0x29 0xc6 0xd0 0x27 0x38 0x6c 0x31 0x60 0x78 0xba LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15036 0x14 0xa1 0x07 0x98 EQ r16, r18, r16
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15040 0x80 0x1d 0x88 0x40 0x01 0x84 JNZ r16, #15120
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 15046 0x0f 0x80 0x8b 0x18 MOVS p7, p0
+.delay_slot
+.swstall delay_slot
+ 15050 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15052 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15054 0x00 0x00 NOPX
+.delay_slot
+ 15056 0xfe 0x13 0xb0 0x00 0xb4 0xe2 0xa0 0x02 ST p1, [sp, #-16]; ADD.NC p1, r19, r17
+ 15064 0x90 0x1d 0x98 0x40 0x01 0x84 JNZ r18, #15152
+.delay_slot
+.swstall delay_slot
+ 15070 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15072 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15074 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15076 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15078 0x00 0x00 NOPX
+.no_stack_arguments
+ 15080 0x00 0x15 0xf8 0x00 0x01 0x04 JL #11248
+.delay_slot
+ 15086 0x00 0x07 0xc6 0xca 0x80 0x44 MOVXM p3, #509248
+.delay_slot
+.swstall delay_slot
+ 15092 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15094 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15096 0x00 0x00 NOPX
+.delay_slot
+ 15098 0x00 0x2c 0xf4 0xc1 0x81 0xd4 NOPA; MOV p2, p0
+.return_address
+ 15104 0x00 0x1d 0x98 0x00 0x00 0x84 J #15152
+.delay_slot
+.swstall delay_slot
+ 15110 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15112 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15114 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15116 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15118 0x00 0x00 NOPX
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880
+.no_stack_arguments
+ 15120 0x00 0x16 0x58 0x00 0x01 0x04 JL #11440
+.delay_slot
+ 15126 0x00 0x07 0xc6 0xcb 0x00 0x44 MOVXM p3, #509312
+.delay_slot
+ 15132 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+.delay_slot
+.swstall delay_slot
+ 15136 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15138 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15140 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912
+.return_address
+ 15152 0x07 0xf0 0x99 0x18 LDA p1, [sp, #-16]
+ 15156 0xfe 0x83 0x20 0x44 0x0a 0x2c LDA p0, [sp, #-12]; MOVX r17, #1
+ 15162 0xe8 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x24 0x10 0xba LDA r16, [p7, #16]; MOVXM p7, #509000
+ 15172 0x00 0x00 NOPX
+ 15174 0x00 0x00 NOPX
+ 15176 0x00 0x00 NOPX
+ 15178 0x00 0x00 NOPX
+ 15180 0x00 0x00 NOPX
+ 15182 0x00 0x00 NOPX
+ 15184 0x14 0x11 0x18 0x18 REL r16, r17
+ 15188 0x01 0xf6 0x56 0x98 LDA r18, [p1, #-4]
+ 15192 0x00 0x56 0x16 0x98 LDA r16, [p0, #20]
+ 15196 0x00 0x00 NOPX
+ 15198 0x00 0x00 NOPX
+ 15200 0x00 0x00 NOPX
+ 15202 0x00 0x00 NOPX
+ 15204 0x00 0x00 NOPX
+ 15206 0x14 0x65 0x21 0x98 SUB r18, r17, r18
+ 15210 0x09 0xf6 0x51 0x98 ST r18, [p1, #-4]
+ 15214 0x00 0x00 NOPX
+ 15216 0x00 0x00 NOPX
+ 15218 0x00 0x00 NOPX
+ 15220 0x00 0x00 NOPX
+ 15222 0x14 0x11 0x18 0x18 REL r16, r17
+ 15226 0x06 0xe6 0x56 0x98 LDA r18, [p6, #-8]
+ 15230 0x00 0x00 NOPX
+ 15232 0x00 0x00 NOPX
+ 15234 0x00 0x1d 0xd8 0x00 0x00 0x84 J #15280
+.delay_slot
+.swstall delay_slot
+ 15240 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15242 0x00 0x00 NOPX
+.delay_slot
+ 15244 0x10 0x20 0x01 0x18 MOVX r16, #0
+.delay_slot
+ 15248 0xe0 0xc2 0x38 0xc6 0x43 0x5c ST r16, [p7]; SUB r17, r17, r18
+.delay_slot
+ 15254 0x00 0x2c 0xf6 0xe6 0x31 0x80 0x00 0x00 0x00 0x7a NOPA; ST r17, [p6, #-8]; NOPX
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024
+ 15264 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040
+ 15280 0xfd 0x87 0x20 0x00 0x01 0xf3 0xb2 0x30 0x10 0xba LDA lr, [sp, #-20]; MOVXM p7, #509024
+ 15290 0xe0 0xca 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r18, [p7]; MOVXM p6, #508992
+ 15300 0x06 0x06 0x36 0x98 LDA r17, [p6]
+ 15304 0x00 0x00 NOPX
+ 15306 0x00 0x00 NOPX
+ 15308 0x00 0x00 NOPX
+ 15310 0x00 0x00 NOPX
+ 15312 0x00 0x00 NOPX
+ 15314 0x00 0x00 NOPX
+ 15316 0x14 0x63 0x28 0x98 NE r17, r17, r18
+ 15320 0x88 0x1d 0xf8 0x40 0x01 0x84 JNZ r17, #15344
+.delay_slot
+ 15326 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8]
+.delay_slot
+.swstall delay_slot
+ 15330 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15332 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15334 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15336 0x00 0x00 NOPX
+ 15338 0x00 0x2c 0xfc 0x0c 0x23 0x0c NOPA; ST r16, [p6]
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104
+ 15344 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4]
+ 15348 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 15352 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64
+.delay_slot
+.swstall delay_slot
+ 15358 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15360 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15362 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15364 0x00 0x00 NOPX
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0
+
+.text_segment PM 15376
+.label __Z13_b896_wrapperPPv___func_begin0
+.label _Z13_b896_wrapperPPv
+.function_start
+ 15376 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+ 15380 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4
+ 15384 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8
+ 15388 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4]
+ 15392 0x02 0x05 0x1e 0x98 LDA p2, [p2]
+.tail_call
+ 15396 0x00 0x0d 0x70 0x00 0x00 0x84 J #6880
+.delay_slot
+.swstall delay_slot
+ 15402 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15404 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15406 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15408 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15410 0x00 0x00 NOPX
+.label _Z13_b896_wrapperPPv__end
+.label __Z13_b896_wrapperPPv___func_end0
+
+.text_segment PM 15424
+.label __Z13_b901_wrapperPPv___func_begin0
+.label _Z13_b901_wrapperPPv
+.function_start
+ 15424 0x19 0x60 0xc0 0xf8 MOV p1, p0
+ 15428 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8
+ 15432 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4]
+ 15436 0x01 0x04 0x9e 0x98 LDA p1, [p1]
+.tail_call
+ 15440 0x00 0x10 0x18 0x00 0x00 0x84 J #8240
+.delay_slot
+.swstall delay_slot
+ 15446 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15448 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15450 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15452 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15454 0x00 0x00 NOPX
+.label _Z13_b901_wrapperPPv__end
+.label __Z13_b901_wrapperPPv___func_end0
+.label __Z13_b906_wrapperPPv___func_begin0
+.label _Z13_b906_wrapperPPv
+.function_start
+ 15456 0x19 0x60 0xc0 0xf8 MOV p1, p0
+ 15460 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8
+ 15464 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4]
+ 15468 0x01 0x04 0x9e 0x98 LDA p1, [p1]
+.tail_call
+ 15472 0x00 0x11 0xc8 0x00 0x00 0x84 J #9104
+.delay_slot
+.swstall delay_slot
+ 15478 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15480 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15482 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15484 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15486 0x00 0x00 NOPX
+.label _Z13_b906_wrapperPPv__end
+.label __Z13_b906_wrapperPPv___func_end0
+.label __Z13_b881_wrapperPPv___func_begin0
+.label _Z13_b881_wrapperPPv
+.function_start
+ 15488 0x19 0x60 0xc0 0xf8 MOV p1, p0
+ 15492 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8
+ 15496 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4]
+ 15500 0x01 0x04 0x9e 0x98 LDA p1, [p1]
+.tail_call
+ 15504 0x00 0x14 0x88 0x00 0x00 0x84 J #10512
+.delay_slot
+.swstall delay_slot
+ 15510 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15512 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15514 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15516 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15518 0x00 0x00 NOPX
+.label _Z13_b881_wrapperPPv__end
+.label __Z13_b881_wrapperPPv___func_end0
+.label __Z13_b891_wrapperPPv___func_begin0
+.label _Z13_b891_wrapperPPv
+.function_start
+ 15520 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+ 15524 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12
+ 15528 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8
+ 15532 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4]
+ 15536 0x02 0x05 0x1e 0x98 LDA p2, [p2]
+.tail_call
+ 15540 0x00 0x16 0xf0 0x00 0x00 0x84 J #11744
+.delay_slot
+.swstall delay_slot
+ 15546 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15548 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15550 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15552 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15554 0x00 0x00 NOPX
+.label _Z13_b891_wrapperPPv__end
+.label __Z13_b891_wrapperPPv___func_end0
+
+.text_segment PM 15568
+.label __Z13_b924_wrapperPPv___func_begin0
+.label _Z13_b924_wrapperPPv
+.function_start
+ 15568 0x1b 0x60 0xc0 0xf8 MOV p3, p0
+ 15572 0x03 0x1c 0x1e 0x98 LDA p0, [p3], #4
+ 15576 0x03 0x1c 0x9e 0x98 LDA p1, [p3], #4
+ 15580 0x03 0x2d 0x1e 0x98 LDA p2, [p3], #8
+ 15584 0x03 0xf6 0x1e 0x98 LDA p4, [p3, #-4]
+ 15588 0x03 0x05 0x9e 0x98 LDA p3, [p3]
+.tail_call
+ 15592 0x00 0x1b 0xd0 0x00 0x00 0x84 J #14240
+.delay_slot
+.swstall delay_slot
+ 15598 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15600 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15602 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15604 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15606 0x00 0x00 NOPX
+.label _Z13_b924_wrapperPPv__end
+.label __Z13_b924_wrapperPPv___func_end0
+
+.text_segment PM 15616
+.label __Z13_b919_wrapperPPv___func_begin0
+.label _Z13_b919_wrapperPPv
+.function_start
+ 15616 0x1a 0x60 0xc0 0xf8 MOV p2, p0
+ 15620 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4
+ 15624 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8
+ 15628 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4]
+ 15632 0x02 0x05 0x1e 0x98 LDA p2, [p2]
+.tail_call
+ 15636 0x00 0x1a 0xe0 0x00 0x00 0x84 J #13760
+.delay_slot
+.swstall delay_slot
+ 15642 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15644 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15646 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15648 0x00 0x00 NOPX
+.delay_slot
+.swstall delay_slot
+ 15650 0x00 0x00 NOPX
+.label _Z13_b919_wrapperPPv__end
+.label __Z13_b919_wrapperPPv___func_end0
+
+.text_segment PM 15664
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function_start
+ 15664 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0
+ 15670 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15674 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15678 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15682 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15686 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15690 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15694 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15698 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15702 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15706 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15710 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15714 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15718 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15722 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15726 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15730 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15734 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15738 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15742 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15746 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15750 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15754 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15758 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15762 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15766 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15770 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15774 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15778 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+ 15782 0x10 0x28 0x00 0x18 RET lr
+.delay_slot
+ 15786 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 15790 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 15794 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 15798 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1
+.delay_slot
+ 15802 0x18 0x9f 0xa0 0xf8 MOV r2, r31
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+
+.bss_segment DMb 508992 32
+
+.data_segment DMb 509024
+.label _ZL8num_iter
+ 0x1
+ 0x0
+ 0x0
+ 0x0
+
+.bss_segment DMb 509028 4
+
+.bss_segment DMb 509032 1
+
+.rodata_segment DMb 509056
+.label _ZL20g_uniformKernelFuncs
+ 0x10
+ 0x3c
+ 0x0
+ 0x0
+ 0x40
+ 0x3c
+ 0x0
+ 0x0
+ 0x60
+ 0x3c
+ 0x0
+ 0x0
+ 0x80
+ 0x3c
+ 0x0
+ 0x0
+ 0xa0
+ 0x3c
+ 0x0
+ 0x0
+ 0xd0
+ 0x3c
+ 0x0
+ 0x0
+ 0x0
+ 0x3d
+ 0x0
+ 0x0
+
+.bss_segment DMb 509120 1024
+
+.stack DM_stack 506560 508928
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.map
new file mode 100644
index 0000000000000000000000000000000000000000..b11a3b333f5cabeaaee231f81abbc9a33f2e051a
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.map
@@ -0,0 +1,324 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:21 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme
+
+// Release: ipp V-2024.06-TGT-241219
+
+Memory map for memory 'DM_stack':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 2368
+
+ 0x0007bac0..0x0007c3ff ( 2368 items) : Stack
+
+Memory map for memory 'DMb':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 3461
+
+ 0x00000000..0x0007babf ( 506560 items) : Reserved
+ 0x0007bac0..0x0007c3ff ( 2368 items) : Stack
+ 0x0007c400..0x0007c43f ( 64 items) : Reserved
+ 0x0007c440..0x0007c443 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL9curr_iter (Data, Local, .bss.DMb.4)
+ 0x0007c444..0x0007c447 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL14num_depth_iter (Data, Local, .bss.DMb.4)
+ 0x0007c448..0x0007c44b ( 4 items) : ../Release/0_0_reloadable5.o::_ZL10depth_iter (Data, Local, .bss.DMb.4)
+ 0x0007c44c..0x0007c44f ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11total_iters (Data, Local, .bss.DMb.4)
+ 0x0007c450..0x0007c453 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL8core_row (Data, Local, .bss.DMb.4)
+ 0x0007c454..0x0007c457 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4)
+ 0x0007c458..0x0007c45b ( 4 items) : ../Release/0_0_reloadable5.o::_ZL10ifmsv_size (Data, Local, .bss.DMb.4)
+ 0x0007c45c..0x0007c45f ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4)
+ 0x0007c460..0x0007c463 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL8num_iter (Data, Local, .data.DMb.4)
+ 0x0007c464..0x0007c467 ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4)
+ 0x0007c468..0x0007c468 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1)
+ 0x0007c480..0x0007c49b ( 28 items) : ../Release/0_0_reloadable5.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64)
+
+ Called functions : _Z13_b896_wrapperPPv
+ _Z13_b901_wrapperPPv
+ _Z13_b906_wrapperPPv
+ _Z13_b881_wrapperPPv
+ _Z13_b891_wrapperPPv
+ _Z13_b924_wrapperPPv
+ _Z13_b919_wrapperPPv
+
+ 0x0007c4c0..0x0007c4ff ( 64 items) : ../Release/0_0_reloadable5.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64)
+ 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable5.o::mul1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64)
+ 0x0007c540..0x0007c57f ( 64 items) : ../Release/0_0_reloadable5.o::add1d_params (Data, Global, .bss.DMb.64)
+ 0x0007c580..0x0007c5bf ( 64 items) : ../Release/0_0_reloadable5.o::mul1d_params (Data, Global, .bss.DMb.64)
+ 0x0007c5c0..0x0007c5ff ( 64 items) : ../Release/0_0_reloadable5.o::clip1d_params (Data, Global, .bss.DMb.64)
+ 0x0007c600..0x0007c7bf ( 448 items) : ../Release/0_0_reloadable5.o::conv2d_params (Data, Global, .bss.DMb.64)
+ 0x0007c7c0..0x0007c8bf ( 256 items) : ../Release/0_0_reloadable5.o::conv2d_dw_params (Data, Global, .bss.DMb.64)
+ 0x0007ccc0..0x000fffff ( 537408 items) : Reserved
+
+Memory map for memory 'PM':
+
+ Size = 1048576
+ Width = 8 bits
+ Offset = 0
+ Used = 13150
+
+ 0x00000000..0x0000092f ( 2352 items) : Reserved
+ 0x00000930..0x00000ab5 ( 390 items) : ../Release/0_0_reloadable5.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64)
+
+ Referenced symbols: _ZL20g_uniformKernelFuncs
+
+ 0x00000ac0..0x00001055 ( 1430 items) : ../Release/0_0_reloadable5.o::_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (Function, Weak, .text) (stack frame size = 64)
+ 0x00001060..0x0000116d ( 270 items) : ../Release/0_0_reloadable5.o::_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001170..0x00001ad9 ( 2410 items) : ../Release/0_0_reloadable5.o::_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (Function, Weak, .text) (stack frame size = 128)
+
+ Called functions : _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001ae0..0x00001d17 ( 568 items) : ../Release/0_0_reloadable5.o::_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+
+ Referenced symbols: _ZL9curr_iter
+ conv2d_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL14num_depth_iter
+ _ZL8num_iter
+ _ZL10depth_iter
+ _ZL11total_iters
+
+ 0x00001d20..0x00001d37 ( 24 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00001d40..0x00001de1 ( 162 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+
+ 0x00001df0..0x00001e27 ( 56 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00001e30..0x00001e6d ( 62 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+
+ 0x00001e70..0x00001fa9 ( 314 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00001fb0..0x00002021 ( 114 items) : ../Release/0_0_reloadable5.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128)
+
+ Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+
+ 0x00002030..0x00002217 ( 488 items) : ../Release/0_0_reloadable5.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ add1d_attribute_broadcasting_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL8num_iter
+
+ 0x00002220..0x00002283 ( 100 items) : ../Release/0_0_reloadable5.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0)
+ 0x00002290..0x00002381 ( 242 items) : ../Release/0_0_reloadable5.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00002390..0x00002577 ( 488 items) : ../Release/0_0_reloadable5.o::_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+ _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ clip1d_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL8num_iter
+
+ 0x00002580..0x000025f3 ( 116 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0)
+ 0x00002600..0x00002649 ( 74 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+
+ 0x00002650..0x00002865 ( 534 items) : ../Release/0_0_reloadable5.o::_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (Function, Local, .text) (stack frame size = 128)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00002870..0x00002905 ( 150 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+
+ 0x00002910..0x00002af7 ( 488 items) : ../Release/0_0_reloadable5.o::_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ mul1d_attribute_broadcasting_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL8num_iter
+
+ 0x00002b00..0x00002b73 ( 116 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 0)
+ 0x00002b80..0x00002be1 ( 98 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+
+ 0x00002bf0..0x00002bff ( 16 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0)
+
+ Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+
+ 0x00002c00..0x00002c17 ( 24 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0)
+ 0x00002c20..0x00002ca9 ( 138 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+
+ 0x00002cb0..0x00002dd3 ( 292 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x00002de0..0x00003039 ( 602 items) : ../Release/0_0_reloadable5.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ mul1d_params
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL11ifm1_offset
+ _ZL11ifm2_offset
+ _ZL8num_iter
+
+ 0x00003040..0x000032df ( 672 items) : ../Release/0_0_reloadable5.o::_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh (Function, Local, .text) (stack frame size = 64)
+
+ Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_
+
+ Referenced symbols: conv2d_dw_params
+ _ZN12me_primitive11control_rndE
+
+ 0x000032e0..0x000035b1 ( 722 items) : ../Release/0_0_reloadable5.o::_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0)
+
+ Referenced symbols: _ZN12me_primitive11control_rndE
+
+ 0x000035c0..0x0000379d ( 478 items) : ../Release/0_0_reloadable5.o::_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128)
+
+ Called functions : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+ _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ _ZL8num_iter
+ _ZL10ifmsv_size
+ conv2d_dw_params
+
+ 0x000037a0..0x00003c05 ( 1126 items) : ../Release/0_0_reloadable5.o::_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE (Function, Global, .text) (stack frame size = 64)
+
+ Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+ _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+ _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+ _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+ _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+
+ Referenced symbols: _ZL9curr_iter
+ _ZL8core_row
+ _ZN12me_primitive11control_rndE
+ _ZN12me_primitive11control_satE
+ conv2d_params
+ add1d_params
+ mul1d_params
+ _ZL14num_depth_iter
+ _ZL11ifm2_offset
+ _ZL8num_iter
+ _ZL10depth_iter
+ _ZL11total_iters
+
+ 0x00003c10..0x00003c33 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b896_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003c40..0x00003c5f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b901_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003c60..0x00003c7f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b906_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003c80..0x00003c9f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b881_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003ca0..0x00003cc3 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b891_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+
+ 0x00003cd0..0x00003cf7 ( 40 items) : ../Release/0_0_reloadable5.o::_Z13_b924_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+
+ 0x00003d00..0x00003d23 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b919_wrapperPPv (Function, Global, .text) (stack frame size = 0)
+
+ Called functions : _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+
+ 0x00003d30..0x00003dbd ( 142 items) : me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0)
+
+External symbols:
+
+ __dso_handle = 0x0
+ _ctors_end = 0x0
+ _ctors_start = 0x0
+ _dtors_end = 0x0
+ _dtors_start = 0x0
+ _pc_end = 0x3dbe
+ _pc_start = 0x930
+ _sp_end_DM_stack = 0x7c400
+ _sp_start_DM_stack = 0x7bac0
+
+Section summary for memory 'DM_stack':
+
+ .stack File
+ ---------- ----------
+ 2368
+ ---------- ----------
+ 2368 Total
+
+Section summary for memory 'DMb':
+
+ .bss .data .rodata File
+ ---------- ---------- ---------- ----------
+ 1056 4 28 ../Release/0_0_reloadable5.o
+ 5 0 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ ---------- ---------- ---------- ----------
+ 1061 4 28 Total
+
+Section summary for memory 'PM':
+
+ .text File
+ ---------- ----------
+ 13008 ../Release/0_0_reloadable5.o
+ 142 me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ ---------- ----------
+ 13150 Total
+
+File summary:
+
+../Release/0_0_reloadable5.o
+ DMb 1088
+ PM 13008
+
+me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ DMb 5
+
+me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)
+ PM 142
+
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.sdr
new file mode 100644
index 0000000000000000000000000000000000000000..029eac6b3129d1ccada1bf5bd7decb96296f96f7
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.sdr
@@ -0,0 +1,129 @@
+
+// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:21 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme
+
+// Release: ipp V-2024.06-TGT-241219
+
+// Symbols in memory 'DM_bankA':
+// Symbols in memory 'DM_bankAB':
+// Symbols in memory 'DM_bankAC':
+// Symbols in memory 'DM_bankAD':
+// Symbols in memory 'DM_bankB':
+// Symbols in memory 'DM_bankBC':
+// Symbols in memory 'DM_bankBD':
+// Symbols in memory 'DM_bankC':
+// Symbols in memory 'DM_bankCD':
+// Symbols in memory 'DM_bankD':
+// Symbols in memory 'DM_stack':
+// Symbols in memory 'DM_test':
+// Symbols in memory 'DMb':
+_symbol _ZN12me_primitive11control_satE 0x0007c464
+_symbol _ZN12me_primitive11control_rndE 0x0007c468
+_symbol add1d_attribute_broadcasting_params 0x0007c4c0
+_symbol mul1d_attribute_broadcasting_params 0x0007c500
+_symbol add1d_params 0x0007c540
+_symbol mul1d_params 0x0007c580
+_symbol clip1d_params 0x0007c5c0
+_symbol conv2d_params 0x0007c600
+_symbol conv2d_dw_params 0x0007c7c0
+// Symbols in memory 'DMh':
+// Symbols in memory 'DMh_bankA':
+// Symbols in memory 'DMh_bankAB':
+// Symbols in memory 'DMh_bankAC':
+// Symbols in memory 'DMh_bankAD':
+// Symbols in memory 'DMh_bankB':
+// Symbols in memory 'DMh_bankBC':
+// Symbols in memory 'DMh_bankBD':
+// Symbols in memory 'DMh_bankC':
+// Symbols in memory 'DMh_bankCD':
+// Symbols in memory 'DMh_bankD':
+// Symbols in memory 'DMh_stack':
+// Symbols in memory 'DMs':
+// Symbols in memory 'DMs_bankA':
+// Symbols in memory 'DMs_bankAB':
+// Symbols in memory 'DMs_bankAC':
+// Symbols in memory 'DMs_bankAD':
+// Symbols in memory 'DMs_bankB':
+// Symbols in memory 'DMs_bankBC':
+// Symbols in memory 'DMs_bankBD':
+// Symbols in memory 'DMs_bankC':
+// Symbols in memory 'DMs_bankCD':
+// Symbols in memory 'DMs_bankD':
+// Symbols in memory 'DMs_stack':
+// Symbols in memory 'DMv':
+// Symbols in memory 'DMv_bankA':
+// Symbols in memory 'DMv_bankAB':
+// Symbols in memory 'DMv_bankAC':
+// Symbols in memory 'DMv_bankAD':
+// Symbols in memory 'DMv_bankB':
+// Symbols in memory 'DMv_bankBC':
+// Symbols in memory 'DMv_bankBD':
+// Symbols in memory 'DMv_bankC':
+// Symbols in memory 'DMv_bankCD':
+// Symbols in memory 'DMv_bankD':
+// Symbols in memory 'DMv_stack':
+// Symbols in memory 'DMw':
+// Symbols in memory 'DMw_bankA':
+// Symbols in memory 'DMw_bankAB':
+// Symbols in memory 'DMw_bankAC':
+// Symbols in memory 'DMw_bankAD':
+// Symbols in memory 'DMw_bankB':
+// Symbols in memory 'DMw_bankBC':
+// Symbols in memory 'DMw_bankBD':
+// Symbols in memory 'DMw_bankC':
+// Symbols in memory 'DMw_bankCD':
+// Symbols in memory 'DMw_bankD':
+// Symbols in memory 'DMw_stack':
+// Symbols in memory 'DMx':
+// Symbols in memory 'DMx_bankA':
+// Symbols in memory 'DMx_bankAB':
+// Symbols in memory 'DMx_bankAC':
+// Symbols in memory 'DMx_bankAD':
+// Symbols in memory 'DMx_bankB':
+// Symbols in memory 'DMx_bankBC':
+// Symbols in memory 'DMx_bankBD':
+// Symbols in memory 'DMx_bankC':
+// Symbols in memory 'DMx_bankCD':
+// Symbols in memory 'DMx_bankD':
+// Symbols in memory 'DMx_stack':
+// Symbols in memory 'PM':
+_symbol _Z13kernelWrapperPPvjjjj 0x00000930
+_symbol _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh 0x00000ac0
+_symbol _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams 0x00001060
+_symbol _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params 0x00001170
+_symbol _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001ae0
+_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E 0x00001d20
+_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001d40
+_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E 0x00001df0
+_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001e30
+_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00001e70
+_symbol _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E 0x00001fb0
+_symbol _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002030
+_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv 0x00002220
+_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E 0x00002290
+_symbol _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002390
+_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv 0x00002580
+_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002600
+_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E 0x00002870
+_symbol _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002910
+_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv 0x00002b00
+_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00002b80
+_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00002bf0
+_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x00002c00
+_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002c20
+_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x00002cb0
+_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x00002de0
+_symbol _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params 0x000032e0
+_symbol _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x000035c0
+_symbol _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE 0x000037a0
+_symbol _Z13_b896_wrapperPPv 0x00003c10
+_symbol _Z13_b901_wrapperPPv 0x00003c40
+_symbol _Z13_b906_wrapperPPv 0x00003c60
+_symbol _Z13_b881_wrapperPPv 0x00003c80
+_symbol _Z13_b891_wrapperPPv 0x00003ca0
+_symbol _Z13_b924_wrapperPPv 0x00003cd0
+_symbol _Z13_b919_wrapperPPv 0x00003d00
+_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x00003d30
+// Symbols in memory 'PMw':
+// Symbols in memory 'TM4':
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.srv
new file mode 100644
index 0000000000000000000000000000000000000000..cc24263e196c609ab062129e37812e382b48d43f
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.srv
@@ -0,0 +1,19187 @@
+
+// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025
+// Copyright 2014-2024 Synopsys, Inc. All rights reserved.
+// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me
+
+// Release: ipp V-2024.06-TGT-241219
+.label __Z13kernelWrapperPPvjjjj___func_begin0
+.label _Z13kernelWrapperPPvjjjj
+.function kernelWrapper _Z13kernelWrapperPPvjjjj
+.src_ref 0 "0_0_reloadable5.cc" 94 first
+.src_ref 0 "0_0_reloadable5.cc" 96 60 first
+.src_ref 0 "0_0_reloadable5.cc" 96 110
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.function_start
+ 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2353 "01000001" // /* MW 5 */
+ 2354 "00100001" // /* MW 4 */
+ 2355 "11010001" // /* MW 3 */
+ 2356 "11000110" // /* MW 2 */
+ 2357 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 94
+ 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2359 "00000001" // /* MW 5 */
+ 2360 "00000000" // /* MW 4 */
+ 2361 "00000000" // /* MW 3 */
+ 2362 "00001000" // /* MW 2 */
+ 2363 "00000000" // /* MW 1 */
+ 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2365 "01110000" // /* MW 7 */
+ 2366 "11010000" // /* MW 6 */
+ 2367 "00101011" // /* MW 5 */
+ 2368 "00000000" // /* MW 4 */
+ 2369 "10110000" // /* MW 3 */
+ 2370 "11110011" // /* MW 2 */
+ 2371 "11111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 96 110
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2373 "01110000" // /* MW 7 */
+ 2374 "10010000" // /* MW 6 */
+ 2375 "11101000" // /* MW 5 */
+ 2376 "00000001" // /* MW 4 */
+ 2377 "10110000" // /* MW 3 */
+ 2378 "10000111" // /* MW 2 */
+ 2379 "11111111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 96 110 first
+ 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2381 "11100000" // /* MW 5 */
+ 2382 "11000001" // /* MW 4 */
+ 2383 "10110111" // /* MW 3 */
+ 2384 "00000110" // /* MW 2 */
+ 2385 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2387 "00100000" // /* MW 3 */
+ 2388 "10011000" // /* MW 2 */
+ 2389 "00011110" // /* MW 1 */
+ 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2391 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2393 "10000010" // /* MW 3 */
+ 2394 "01101000" // /* MW 2 */
+ 2395 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2397 "00110110" // /* MW 3 */
+ 2398 "00011110" // /* MW 2 */
+ 2399 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2401 "01110110" // /* MW 3 */
+ 2402 "00111110" // /* MW 2 */
+ 2403 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2405 "01010110" // /* MW 3 */
+ 2406 "11101110" // /* MW 2 */
+ 2407 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2409 "01110110" // /* MW 3 */
+ 2410 "00000111" // /* MW 2 */
+ 2411 "00000111" // /* MW 1 */
+ 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2413 "00000000" // /* MW 1 */
+ 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2415 "00000000" // /* MW 1 */
+ 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2417 "00000000" // /* MW 1 */
+ 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2419 "00000000" // /* MW 1 */
+ 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2421 "00000000" // /* MW 1 */
+ 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2423 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2425 "00110010" // /* MW 3 */
+ 2426 "01100011" // /* MW 2 */
+ 2427 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2429 "00110001" // /* MW 3 */
+ 2430 "11010110" // /* MW 2 */
+ 2431 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8
+.src_ref 1 "io_buffer_main.h" 410 8
+ 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2433 "11111101" // /* MW 3 */
+ 2434 "11100010" // /* MW 2 */
+ 2435 "00010111" // /* MW 1 */
+ 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2437 "00000000" // /* MW 1 */
+ 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2439 "00000000" // /* MW 1 */
+ 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2441 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2443 "00011000" // /* MW 3 */
+ 2444 "10010111" // /* MW 2 */
+ 2445 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2447 "00001001" // /* MW 3 */
+ 2448 "00100100" // /* MW 2 */
+ 2449 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60 first
+ 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2451 "00101101" // /* MW 3 */
+ 2452 "00101001" // /* MW 2 */
+ 2453 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2455 "00100000" // /* MW 3 */
+ 2456 "10001010" // /* MW 2 */
+ 2457 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 60
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2459 "10001011" // /* MW 5 */
+ 2460 "11011000" // /* MW 4 */
+ 2461 "11011111" // /* MW 3 */
+ 2462 "01001110" // /* MW 2 */
+ 2463 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2465 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2467 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2469 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2471 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2473 "00000101" // /* MW 3 */
+ 2474 "00100110" // /* MW 2 */
+ 2475 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 98 110
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2477 "11111100" // /* MW 3 */
+ 2478 "11110100" // /* MW 2 */
+ 2479 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13 first
+ 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 2481 "00000000" // /* MW 7 */
+ 2482 "11000001" // /* MW 6 */
+ 2483 "10110100" // /* MW 5 */
+ 2484 "00000011" // /* MW 4 */
+ 2485 "10110000" // /* MW 3 */
+ 2486 "01101010" // /* MW 2 */
+ 2487 "11111110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 13
+ 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2489 "01110110" // /* MW 3 */
+ 2490 "00011110" // /* MW 2 */
+ 2491 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 36
+ 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2493 "10110110" // /* MW 3 */
+ 2494 "00111110" // /* MW 2 */
+ 2495 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 46
+ 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2497 "10010110" // /* MW 3 */
+ 2498 "11101110" // /* MW 2 */
+ 2499 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 590 60
+ 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2501 "01110110" // /* MW 3 */
+ 2502 "00000111" // /* MW 2 */
+ 2503 "00000111" // /* MW 1 */
+ 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2505 "00000000" // /* MW 1 */
+ 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2507 "00000000" // /* MW 1 */
+ 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2509 "00000000" // /* MW 1 */
+ 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2511 "00000000" // /* MW 1 */
+ 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2513 "00000000" // /* MW 1 */
+ 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2515 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 195 30 first
+.src_ref 1 "io_buffer_compiler.h" 195 37 first
+ 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2517 "01010010" // /* MW 3 */
+ 2518 "11100111" // /* MW 2 */
+ 2519 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 194 23 first
+ 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2521 "01110001" // /* MW 3 */
+ 2522 "11010110" // /* MW 2 */
+ 2523 "00001111" // /* MW 1 */
+ 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2525 "00000000" // /* MW 1 */
+ 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2527 "00000000" // /* MW 1 */
+ 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2529 "00000000" // /* MW 1 */
+ 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2531 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 410 8 first
+ 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2533 "00011000" // /* MW 3 */
+ 2534 "00010111" // /* MW 2 */
+ 2535 "00010101" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7 first
+ 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2537 "00101101" // /* MW 3 */
+ 2538 "00100011" // /* MW 2 */
+ 2539 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2541 "10100000" // /* MW 3 */
+ 2542 "10001000" // /* MW 2 */
+ 2543 "00011000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2545 "00000000" // /* MW 5 */
+ 2546 "11001001" // /* MW 4 */
+ 2547 "11001110" // /* MW 3 */
+ 2548 "00000111" // /* MW 2 */
+ 2549 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 7
+ 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2551 "00101011" // /* MW 5 */
+ 2552 "11010100" // /* MW 4 */
+ 2553 "11011111" // /* MW 3 */
+ 2554 "00010011" // /* MW 2 */
+ 2555 "11100000" // /* MW 1 */
+ 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2557 "00000000" // /* MW 1 */
+ 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2559 "00000000" // /* MW 1 */
+ 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2561 "00000000" // /* MW 1 */
+ 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2563 "00000000" // /* MW 1 */
+ 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2565 "00000000" // /* MW 1 */
+ 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2567 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 102 4
+.no_stack_arguments
+ 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */
+ 2569 "01000000" // /* MW 3 */
+ 2570 "00110000" // /* MW 2 */
+ 2571 "00010000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 105 60
+.src_ref 0 "0_0_reloadable5.cc" 107 60
+.delay_slot
+ 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2573 "11000000" // /* MW 3 */
+ 2574 "01100000" // /* MW 2 */
+ 2575 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2577 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2579 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2581 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2583 "01111110" // /* MW 9 */
+ 2584 "10100101" // /* MW 8 */
+ 2585 "00000001" // /* MW 7 */
+ 2586 "00000000" // /* MW 6 */
+ 2587 "00010000" // /* MW 5 */
+ 2588 "00000000" // /* MW 4 */
+ 2589 "11110000" // /* MW 3 */
+ 2590 "00101100" // /* MW 2 */
+ 2591 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 105 60 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+.src_ref 1 "io_buffer_main.h" 440 8
+.return_address
+ 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2593 "00001010" // /* MW 5 */
+ 2594 "01000000" // /* MW 4 */
+ 2595 "11010000" // /* MW 3 */
+ 2596 "11000110" // /* MW 2 */
+ 2597 "11100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2599 "01010001" // /* MW 3 */
+ 2600 "11101011" // /* MW 2 */
+ 2601 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 107 60
+ 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2603 "01000001" // /* MW 3 */
+ 2604 "11101100" // /* MW 2 */
+ 2605 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_main.h" 440 8
+ 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2607 "00101001" // /* MW 3 */
+ 2608 "11110000" // /* MW 2 */
+ 2609 "00000111" // /* MW 1 */
+ 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2611 "00000000" // /* MW 1 */
+ 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2613 "00000000" // /* MW 1 */
+ 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+ 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2617 "10001000" // /* MW 3 */
+ 2618 "01101000" // /* MW 2 */
+ 2619 "00011001" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+ 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2621 "00110110" // /* MW 3 */
+ 2622 "00000110" // /* MW 2 */
+ 2623 "00000001" // /* MW 1 */
+ 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2625 "00000000" // /* MW 1 */
+ 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2627 "00000000" // /* MW 1 */
+ 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2629 "00000000" // /* MW 1 */
+ 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2631 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2633 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2635 "00011100" // /* MW 3 */
+ 2636 "10100000" // /* MW 2 */
+ 2637 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2639 "00001000" // /* MW 3 */
+ 2640 "01010101" // /* MW 2 */
+ 2641 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2643 "01000001" // /* MW 5 */
+ 2644 "10101111" // /* MW 4 */
+ 2645 "11011101" // /* MW 3 */
+ 2646 "11000110" // /* MW 2 */
+ 2647 "00111110" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 107 60 first
+ 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2649 "01010110" // /* MW 3 */
+ 2650 "00000010" // /* MW 2 */
+ 2651 "00000111" // /* MW 1 */
+ 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2653 "00000000" // /* MW 1 */
+ 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2655 "00000000" // /* MW 1 */
+ 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2657 "00000000" // /* MW 1 */
+ 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2659 "00000000" // /* MW 1 */
+ 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2661 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2663 "00010001" // /* MW 3 */
+ 2664 "00100111" // /* MW 2 */
+ 2665 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12 first
+.src_ref 1 "io_buffer_compiler.h" 606 24
+ 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2667 "00010000" // /* MW 5 */
+ 2668 "11010010" // /* MW 4 */
+ 2669 "01000000" // /* MW 3 */
+ 2670 "01100110" // /* MW 2 */
+ 2671 "10001100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 605 12
+.src_ref 1 "io_buffer_compiler.h" 606 22 first
+ 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2673 "01100011" // /* MW 5 */
+ 2674 "11101100" // /* MW 4 */
+ 2675 "11010011" // /* MW 3 */
+ 2676 "11000110" // /* MW 2 */
+ 2677 "00000000" // /* MW 1 */
+ 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2679 "00000000" // /* MW 1 */
+ 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2681 "00000000" // /* MW 1 */
+ 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2683 "00000000" // /* MW 1 */
+ 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2685 "00000000" // /* MW 1 */
+ 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2687 "00000000" // /* MW 1 */
+ 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2689 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 440 8 first
+ 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2691 "00001000" // /* MW 3 */
+ 2692 "01010101" // /* MW 2 */
+ 2693 "00010100" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110
+ 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2695 "00111001" // /* MW 3 */
+ 2696 "11111100" // /* MW 2 */
+ 2697 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+ 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2699 "00110110" // /* MW 3 */
+ 2700 "11110110" // /* MW 2 */
+ 2701 "00000000" // /* MW 1 */
+ 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2703 "10011001" // /* MW 3 */
+ 2704 "11110111" // /* MW 2 */
+ 2705 "00000111" // /* MW 1 */
+ 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2707 "11110001" // /* MW 3 */
+ 2708 "11111001" // /* MW 2 */
+ 2709 "00000111" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110 first
+ 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2711 "00000001" // /* MW 5 */
+ 2712 "00000000" // /* MW 4 */
+ 2713 "00000000" // /* MW 3 */
+ 2714 "11111000" // /* MW 2 */
+ 2715 "11111111" // /* MW 1 */
+ 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2717 "00000000" // /* MW 1 */
+ 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2719 "00000000" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 110
+ 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 2721 "00000000" // /* MW 3 */
+ 2722 "00101000" // /* MW 2 */
+ 2723 "00010000" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2725 "00011100" // /* MW 3 */
+ 2726 "11100000" // /* MW 2 */
+ 2727 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24 first
+.delay_slot
+ 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2729 "00010001" // /* MW 3 */
+ 2730 "00100001" // /* MW 2 */
+ 2731 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 24
+.delay_slot
+ 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2733 "00000010" // /* MW 3 */
+ 2734 "01100001" // /* MW 2 */
+ 2735 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_compiler.h" 606 22
+.delay_slot
+ 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2737 "00010001" // /* MW 3 */
+ 2738 "11110110" // /* MW 2 */
+ 2739 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13kernelWrapperPPvjjjj__end
+.label __Z13kernelWrapperPPvjjjj___func_end0
+ 2741 "00000000" // /* MW 1 */
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 432 first
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.function_start
+ 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2753 "01111000" // /* MW 9 */
+ 2754 "01100000" // /* MW 8 */
+ 2755 "01001001" // /* MW 7 */
+ 2756 "10001000" // /* MW 6 */
+ 2757 "01000000" // /* MW 5 */
+ 2758 "00000000" // /* MW 4 */
+ 2759 "11010000" // /* MW 3 */
+ 2760 "10000101" // /* MW 2 */
+ 2761 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+.src_ref 2 "conv2d_bf16_params.h" 438 17 first
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2763 "01001000" // /* MW 9 */
+ 2764 "10000010" // /* MW 8 */
+ 2765 "00110000" // /* MW 7 */
+ 2766 "11101001" // /* MW 6 */
+ 2767 "01010111" // /* MW 5 */
+ 2768 "00111110" // /* MW 4 */
+ 2769 "11010000" // /* MW 3 */
+ 2770 "10000001" // /* MW 2 */
+ 2771 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 432
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+ 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 2773 "01110000" // /* MW 9 */
+ 2774 "00000000" // /* MW 8 */
+ 2775 "00000000" // /* MW 7 */
+ 2776 "00000000" // /* MW 6 */
+ 2777 "00000010" // /* MW 5 */
+ 2778 "00000000" // /* MW 4 */
+ 2779 "00000000" // /* MW 3 */
+ 2780 "10000001" // /* MW 2 */
+ 2781 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+.src_ref 2 "conv2d_bf16_params.h" 458 30
+ 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2783 "01011000" // /* MW 11 */
+ 2784 "00010000" // /* MW 10 */
+ 2785 "00000000" // /* MW 9 */
+ 2786 "00101000" // /* MW 8 */
+ 2787 "00000000" // /* MW 7 */
+ 2788 "10000001" // /* MW 6 */
+ 2789 "10110101" // /* MW 5 */
+ 2790 "11111101" // /* MW 4 */
+ 2791 "00000111" // /* MW 3 */
+ 2792 "10000110" // /* MW 2 */
+ 2793 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 2795 "01011000" // /* MW 11 */
+ 2796 "00001111" // /* MW 10 */
+ 2797 "10001000" // /* MW 9 */
+ 2798 "10101010" // /* MW 8 */
+ 2799 "01010111" // /* MW 7 */
+ 2800 "10111111" // /* MW 6 */
+ 2801 "11010101" // /* MW 5 */
+ 2802 "11111001" // /* MW 4 */
+ 2803 "00000111" // /* MW 3 */
+ 2804 "01100011" // /* MW 2 */
+ 2805 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2807 "00000010" // /* MW 5 */
+ 2808 "01100000" // /* MW 4 */
+ 2809 "10110000" // /* MW 3 */
+ 2810 "10111110" // /* MW 2 */
+ 2811 "11111110" // /* MW 1 */
+ 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2815 "00101001" // /* MW 3 */
+ 2816 "00011100" // /* MW 2 */
+ 2817 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2819 "00001001" // /* MW 3 */
+ 2820 "00011100" // /* MW 2 */
+ 2821 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2823 "00101110" // /* MW 3 */
+ 2824 "00011100" // /* MW 2 */
+ 2825 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2827 "00001110" // /* MW 3 */
+ 2828 "00011100" // /* MW 2 */
+ 2829 "00000000" // /* MW 1 */
+ 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2831 "00000000" // /* MW 1 */
+ 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2833 "00000000" // /* MW 1 */
+ 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2835 "00000000" // /* MW 1 */
+ 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2837 "00000000" // /* MW 1 */
+ 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2839 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2841 "00101001" // /* MW 3 */
+ 2842 "00011100" // /* MW 2 */
+ 2843 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2845 "00001001" // /* MW 3 */
+ 2846 "00011100" // /* MW 2 */
+ 2847 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2849 "00101110" // /* MW 3 */
+ 2850 "00011100" // /* MW 2 */
+ 2851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2853 "00001110" // /* MW 3 */
+ 2854 "00011100" // /* MW 2 */
+ 2855 "00000000" // /* MW 1 */
+ 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2857 "00000000" // /* MW 1 */
+ 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2859 "00000000" // /* MW 1 */
+ 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2861 "00000000" // /* MW 1 */
+ 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2863 "00000000" // /* MW 1 */
+ 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2865 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2867 "00101001" // /* MW 3 */
+ 2868 "00011100" // /* MW 2 */
+ 2869 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2871 "00001001" // /* MW 3 */
+ 2872 "00011100" // /* MW 2 */
+ 2873 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2875 "00001110" // /* MW 3 */
+ 2876 "00000100" // /* MW 2 */
+ 2877 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 17
+ 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2879 "00101110" // /* MW 3 */
+ 2880 "00010100" // /* MW 2 */
+ 2881 "00000000" // /* MW 1 */
+ 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2883 "00000000" // /* MW 1 */
+ 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2885 "00000000" // /* MW 1 */
+ 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2887 "00000000" // /* MW 1 */
+ 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2889 "00000000" // /* MW 1 */
+ 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2891 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2893 "00001001" // /* MW 3 */
+ 2894 "00000100" // /* MW 2 */
+ 2895 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 438 15
+ 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2897 "00101001" // /* MW 3 */
+ 2898 "00010100" // /* MW 2 */
+ 2899 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 40 first
+ 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2901 "10101010" // /* MW 3 */
+ 2902 "11011101" // /* MW 2 */
+ 2903 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 447 34 first
+ 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2905 "00101010" // /* MW 3 */
+ 2906 "00011110" // /* MW 2 */
+ 2907 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 448 34 first
+ 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2909 "11001010" // /* MW 3 */
+ 2910 "10111101" // /* MW 2 */
+ 2911 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2913 "11111010" // /* MW 3 */
+ 2914 "11111101" // /* MW 2 */
+ 2915 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+ 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2917 "01101010" // /* MW 3 */
+ 2918 "00001010" // /* MW 2 */
+ 2919 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 20 first
+ 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2921 "11101010" // /* MW 3 */
+ 2922 "10101100" // /* MW 2 */
+ 2923 "00000010" // /* MW 1 */
+ 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 2925 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+ 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2927 "00011101" // /* MW 3 */
+ 2928 "01000010" // /* MW 2 */
+ 2929 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+ 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2931 "00000001" // /* MW 5 */
+ 2932 "00110001" // /* MW 4 */
+ 2933 "11111001" // /* MW 3 */
+ 2934 "00100000" // /* MW 2 */
+ 2935 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2937 "01011101" // /* MW 3 */
+ 2938 "10100100" // /* MW 2 */
+ 2939 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40 first
+ 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2941 "01000111" // /* MW 3 */
+ 2942 "11110110" // /* MW 2 */
+ 2943 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 452 40
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2945 "00111001" // /* MW 5 */
+ 2946 "10110111" // /* MW 4 */
+ 2947 "01000000" // /* MW 3 */
+ 2948 "01001010" // /* MW 2 */
+ 2949 "11000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7
+ 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2951 "00100010" // /* MW 3 */
+ 2952 "01111011" // /* MW 2 */
+ 2953 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 30 first
+ 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2955 "01100111" // /* MW 3 */
+ 2956 "11001100" // /* MW 2 */
+ 2957 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+ 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2959 "00000100" // /* MW 3 */
+ 2960 "10110111" // /* MW 2 */
+ 2961 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+ 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2963 "01000001" // /* MW 5 */
+ 2964 "10111011" // /* MW 4 */
+ 2965 "10111100" // /* MW 3 */
+ 2966 "11101011" // /* MW 2 */
+ 2967 "01111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+ 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 2969 "00000100" // /* MW 5 */
+ 2970 "10011011" // /* MW 4 */
+ 2971 "10110011" // /* MW 3 */
+ 2972 "10111110" // /* MW 2 */
+ 2973 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+ 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 2975 "00000001" // /* MW 5 */
+ 2976 "01000000" // /* MW 4 */
+ 2977 "11111000" // /* MW 3 */
+ 2978 "00000101" // /* MW 2 */
+ 2979 "11001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 458 36 first
+.delay_slot
+ 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2981 "01000111" // /* MW 3 */
+ 2982 "10110110" // /* MW 2 */
+ 2983 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 444 52 first
+.delay_slot
+ 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2985 "01000100" // /* MW 3 */
+ 2986 "01110001" // /* MW 2 */
+ 2987 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 462 7 first
+.delay_slot
+ 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2989 "01011101" // /* MW 3 */
+ 2990 "11111100" // /* MW 2 */
+ 2991 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11 first
+.delay_slot
+ 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2993 "01001101" // /* MW 3 */
+ 2994 "11101000" // /* MW 2 */
+ 2995 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 470 11
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.delay_slot
+ 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 2997 "00110010" // /* MW 3 */
+ 2998 "10001100" // /* MW 2 */
+ 2999 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+ 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */
+ 3001 "00000001" // /* MW 5 */
+ 3002 "01000000" // /* MW 4 */
+ 3003 "11111000" // /* MW 3 */
+ 3004 "00000101" // /* MW 2 */
+ 3005 "11011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3007 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3009 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3011 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3013 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3015 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */
+ 3017 "00100000" // /* MW 9 */
+ 3018 "00000000" // /* MW 8 */
+ 3019 "00000000" // /* MW 7 */
+ 3020 "10000100" // /* MW 6 */
+ 3021 "00000001" // /* MW 5 */
+ 3022 "00000000" // /* MW 4 */
+ 3023 "00000000" // /* MW 3 */
+ 3024 "00101111" // /* MW 2 */
+ 3025 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3027 "01011000" // /* MW 9 */
+ 3028 "00001100" // /* MW 8 */
+ 3029 "10001000" // /* MW 7 */
+ 3030 "10101011" // /* MW 6 */
+ 3031 "01010111" // /* MW 5 */
+ 3032 "00111110" // /* MW 4 */
+ 3033 "00000000" // /* MW 3 */
+ 3034 "00011010" // /* MW 2 */
+ 3035 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3037 "01000001" // /* MW 5 */
+ 3038 "00100000" // /* MW 4 */
+ 3039 "00100001" // /* MW 3 */
+ 3040 "01000010" // /* MW 2 */
+ 3041 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.delay_slot
+ 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3043 "00001101" // /* MW 3 */
+ 3044 "00011010" // /* MW 2 */
+ 3045 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.delay_slot
+ 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3047 "00111101" // /* MW 3 */
+ 3048 "00001110" // /* MW 2 */
+ 3049 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.delay_slot
+ 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3051 "11100010" // /* MW 5 */
+ 3052 "10010001" // /* MW 4 */
+ 3053 "11111111" // /* MW 3 */
+ 3054 "00101100" // /* MW 2 */
+ 3055 "00000000" // /* MW 1 */
+.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 453 40
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 504 45
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3057 "01011000" // /* MW 11 */
+ 3058 "11111100" // /* MW 10 */
+ 3059 "10001111" // /* MW 9 */
+ 3060 "10001000" // /* MW 8 */
+ 3061 "01010000" // /* MW 7 */
+ 3062 "00000001" // /* MW 6 */
+ 3063 "00001011" // /* MW 5 */
+ 3064 "10000010" // /* MW 4 */
+ 3065 "10000001" // /* MW 3 */
+ 3066 "00000010" // /* MW 2 */
+ 3067 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 453 40 first
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+.src_ref 2 "conv2d_bf16_params.h" 507 53
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3069 "01011000" // /* MW 9 */
+ 3070 "00001100" // /* MW 8 */
+ 3071 "10001000" // /* MW 7 */
+ 3072 "00001011" // /* MW 6 */
+ 3073 "10100000" // /* MW 5 */
+ 3074 "00000001" // /* MW 4 */
+ 3075 "11100000" // /* MW 3 */
+ 3076 "00011000" // /* MW 2 */
+ 3077 "00100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25
+.src_ref 2 "conv2d_bf16_params.h" 492 25
+.src_ref 2 "conv2d_bf16_params.h" 495 99
+.src_ref 2 "conv2d_bf16_params.h" 502 57
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 621 240
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3079 "01011000" // /* MW 9 */
+ 3080 "00000001" // /* MW 8 */
+ 3081 "11101000" // /* MW 7 */
+ 3082 "10101001" // /* MW 6 */
+ 3083 "01010111" // /* MW 5 */
+ 3084 "00111110" // /* MW 4 */
+ 3085 "00000000" // /* MW 3 */
+ 3086 "00000010" // /* MW 2 */
+ 3087 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 40
+.src_ref 2 "conv2d_bf16_params.h" 529 78
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 578 52
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+ 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 3089 "00000000" // /* MW 15 */
+ 3090 "00000000" // /* MW 14 */
+ 3091 "01011000" // /* MW 13 */
+ 3092 "00000011" // /* MW 12 */
+ 3093 "10101000" // /* MW 11 */
+ 3094 "11101001" // /* MW 10 */
+ 3095 "01110001" // /* MW 9 */
+ 3096 "00000000" // /* MW 8 */
+ 3097 "01011011" // /* MW 7 */
+ 3098 "00000001" // /* MW 6 */
+ 3099 "00100000" // /* MW 5 */
+ 3100 "00000000" // /* MW 4 */
+ 3101 "11110000" // /* MW 3 */
+ 3102 "00101100" // /* MW 2 */
+ 3103 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352
+.src_ref 2 "conv2d_bf16_params.h" 477 40 first
+.src_ref 2 "conv2d_bf16_params.h" 495 68 first
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+ 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3105 "01011000" // /* MW 9 */
+ 3106 "00111100" // /* MW 8 */
+ 3107 "00000000" // /* MW 7 */
+ 3108 "00111100" // /* MW 6 */
+ 3109 "10110011" // /* MW 5 */
+ 3110 "00011011" // /* MW 4 */
+ 3111 "01010000" // /* MW 3 */
+ 3112 "11000101" // /* MW 2 */
+ 3113 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18
+.src_ref 2 "conv2d_bf16_params.h" 481 24 first
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 495 112
+ 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3115 "01011000" // /* MW 9 */
+ 3116 "11001101" // /* MW 8 */
+ 3117 "10000111" // /* MW 7 */
+ 3118 "00010010" // /* MW 6 */
+ 3119 "00101101" // /* MW 5 */
+ 3120 "00000011" // /* MW 4 */
+ 3121 "01010000" // /* MW 3 */
+ 3122 "00000101" // /* MW 2 */
+ 3123 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 477 18 first
+.src_ref 2 "conv2d_bf16_params.h" 496 68
+.src_ref 2 "conv2d_bf16_params.h" 504 35
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 578 47
+ 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3125 "01011000" // /* MW 9 */
+ 3126 "00110111" // /* MW 8 */
+ 3127 "10000000" // /* MW 7 */
+ 3128 "10010001" // /* MW 6 */
+ 3129 "11011010" // /* MW 5 */
+ 3130 "00111011" // /* MW 4 */
+ 3131 "00000000" // /* MW 3 */
+ 3132 "01010111" // /* MW 2 */
+ 3133 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.src_ref 2 "conv2d_bf16_params.h" 504 45 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+.src_ref 2 "conv2d_bf16_params.h" 519 42
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+ 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3135 "01011000" // /* MW 9 */
+ 3136 "10111100" // /* MW 8 */
+ 3137 "00000111" // /* MW 7 */
+ 3138 "00111101" // /* MW 6 */
+ 3139 "10110000" // /* MW 5 */
+ 3140 "00101011" // /* MW 4 */
+ 3141 "00000000" // /* MW 3 */
+ 3142 "00000011" // /* MW 2 */
+ 3143 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 492 25 first
+.src_ref 2 "conv2d_bf16_params.h" 497 46
+.src_ref 2 "conv2d_bf16_params.h" 509 50
+ 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3145 "01011000" // /* MW 9 */
+ 3146 "01110000" // /* MW 8 */
+ 3147 "10000000" // /* MW 7 */
+ 3148 "01101100" // /* MW 6 */
+ 3149 "01101100" // /* MW 5 */
+ 3150 "00011111" // /* MW 4 */
+ 3151 "00000000" // /* MW 3 */
+ 3152 "00010000" // /* MW 2 */
+ 3153 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 520 34 first
+ 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3155 "01011101" // /* MW 5 */
+ 3156 "00011110" // /* MW 4 */
+ 3157 "00001000" // /* MW 3 */
+ 3158 "10010010" // /* MW 2 */
+ 3159 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77
+.src_ref 2 "conv2d_bf16_params.h" 520 48
+ 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3161 "01011001" // /* MW 9 */
+ 3162 "00110001" // /* MW 8 */
+ 3163 "10000000" // /* MW 7 */
+ 3164 "01101111" // /* MW 6 */
+ 3165 "01100001" // /* MW 5 */
+ 3166 "00101101" // /* MW 4 */
+ 3167 "10110000" // /* MW 3 */
+ 3168 "01011010" // /* MW 2 */
+ 3169 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+.src_ref 2 "conv2d_bf16_params.h" 507 42 first
+ 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3171 "00000101" // /* MW 5 */
+ 3172 "00011111" // /* MW 4 */
+ 3173 "00111100" // /* MW 3 */
+ 3174 "10111010" // /* MW 2 */
+ 3175 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 99 first
+ 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3177 "00010001" // /* MW 3 */
+ 3178 "11000010" // /* MW 2 */
+ 3179 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 495 96
+.src_ref 2 "conv2d_bf16_params.h" 610 64
+.src_ref 2 "conv2d_bf16_params.h" 709 96
+ 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3181 "00011101" // /* MW 5 */
+ 3182 "10100000" // /* MW 4 */
+ 3183 "11110000" // /* MW 3 */
+ 3184 "11000011" // /* MW 2 */
+ 3185 "10001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+ 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3187 "00100001" // /* MW 3 */
+ 3188 "10100011" // /* MW 2 */
+ 3189 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 96 first
+ 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3191 "00011101" // /* MW 3 */
+ 3192 "11111110" // /* MW 2 */
+ 3193 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 495 53
+.src_ref 2 "conv2d_bf16_params.h" 506 48
+.src_ref 2 "conv2d_bf16_params.h" 519 42 first
+ 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3195 "01011001" // /* MW 9 */
+ 3196 "01010111" // /* MW 8 */
+ 3197 "10000000" // /* MW 7 */
+ 3198 "11101110" // /* MW 6 */
+ 3199 "11110001" // /* MW 5 */
+ 3200 "00111011" // /* MW 4 */
+ 3201 "00110000" // /* MW 3 */
+ 3202 "01111110" // /* MW 2 */
+ 3203 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 68 first
+.src_ref 2 "conv2d_bf16_params.h" 504 35 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68
+ 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3205 "01011000" // /* MW 9 */
+ 3206 "10110010" // /* MW 8 */
+ 3207 "10000111" // /* MW 7 */
+ 3208 "00111101" // /* MW 6 */
+ 3209 "00110000" // /* MW 5 */
+ 3210 "00101111" // /* MW 4 */
+ 3211 "01010000" // /* MW 3 */
+ 3212 "01010101" // /* MW 2 */
+ 3213 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3215 "01111011" // /* MW 5 */
+ 3216 "11001100" // /* MW 4 */
+ 3217 "10111001" // /* MW 3 */
+ 3218 "01001110" // /* MW 2 */
+ 3219 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 496 53 first
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3221 "01011000" // /* MW 9 */
+ 3222 "11110110" // /* MW 8 */
+ 3223 "00000000" // /* MW 7 */
+ 3224 "00101101" // /* MW 6 */
+ 3225 "01101011" // /* MW 5 */
+ 3226 "00111111" // /* MW 4 */
+ 3227 "11100000" // /* MW 3 */
+ 3228 "01010100" // /* MW 2 */
+ 3229 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 46 first
+.src_ref 2 "conv2d_bf16_params.h" 509 50 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3231 "01011000" // /* MW 9 */
+ 3232 "01010000" // /* MW 8 */
+ 3233 "10000111" // /* MW 7 */
+ 3234 "00010000" // /* MW 6 */
+ 3235 "00111000" // /* MW 5 */
+ 3236 "00100111" // /* MW 4 */
+ 3237 "01010000" // /* MW 3 */
+ 3238 "01000011" // /* MW 2 */
+ 3239 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3241 "01100111" // /* MW 3 */
+ 3242 "11111110" // /* MW 2 */
+ 3243 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3245 "01100111" // /* MW 3 */
+ 3246 "11100000" // /* MW 2 */
+ 3247 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 499 51
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3249 "00000101" // /* MW 3 */
+ 3250 "11110111" // /* MW 2 */
+ 3251 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 78 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3253 "01010100" // /* MW 3 */
+ 3254 "11101011" // /* MW 2 */
+ 3255 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 507 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3257 "01100001" // /* MW 5 */
+ 3258 "10100000" // /* MW 4 */
+ 3259 "11011000" // /* MW 3 */
+ 3260 "10100011" // /* MW 2 */
+ 3261 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 491 25 first
+.src_ref 2 "conv2d_bf16_params.h" 507 34
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3263 "01001001" // /* MW 9 */
+ 3264 "10000000" // /* MW 8 */
+ 3265 "11001111" // /* MW 7 */
+ 3266 "01101111" // /* MW 6 */
+ 3267 "00101001" // /* MW 5 */
+ 3268 "00011111" // /* MW 4 */
+ 3269 "10110000" // /* MW 3 */
+ 3270 "01000010" // /* MW 2 */
+ 3271 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53 first
+.src_ref 2 "conv2d_bf16_params.h" 511 47 first
+ 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3273 "00111011" // /* MW 5 */
+ 3274 "01000110" // /* MW 4 */
+ 3275 "00111111" // /* MW 3 */
+ 3276 "11101010" // /* MW 2 */
+ 3277 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 500 53
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3279 "01010000" // /* MW 7 */
+ 3280 "10101000" // /* MW 6 */
+ 3281 "00000000" // /* MW 5 */
+ 3282 "00000010" // /* MW 4 */
+ 3283 "00110000" // /* MW 3 */
+ 3284 "01101010" // /* MW 2 */
+ 3285 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 77 first
+.src_ref 2 "conv2d_bf16_params.h" 509 19 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3287 "01111000" // /* MW 11 */
+ 3288 "11001110" // /* MW 10 */
+ 3289 "00001101" // /* MW 9 */
+ 3290 "00101100" // /* MW 8 */
+ 3291 "10110000" // /* MW 7 */
+ 3292 "10100111" // /* MW 6 */
+ 3293 "11110101" // /* MW 5 */
+ 3294 "11100111" // /* MW 4 */
+ 3295 "01010111" // /* MW 3 */
+ 3296 "01001001" // /* MW 2 */
+ 3297 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 19 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3299 "00010101" // /* MW 3 */
+ 3300 "11100011" // /* MW 2 */
+ 3301 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3303 "10000001" // /* MW 3 */
+ 3304 "10110111" // /* MW 2 */
+ 3305 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 47 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3307 "10010000" // /* MW 3 */
+ 3308 "10110000" // /* MW 2 */
+ 3309 "00010100" // /* MW 1 */
+ 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3311 "00000000" // /* MW 1 */
+ 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3313 "00000000" // /* MW 1 */
+ 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3315 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 57 first
+ 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3317 "00100001" // /* MW 3 */
+ 3318 "11100101" // /* MW 2 */
+ 3319 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 502 53
+ 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3321 "01010001" // /* MW 3 */
+ 3322 "11001010" // /* MW 2 */
+ 3323 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 48 first
+ 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3325 "01001010" // /* MW 3 */
+ 3326 "10101010" // /* MW 2 */
+ 3327 "00000010" // /* MW 1 */
+ 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3329 "00000000" // /* MW 1 */
+ 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3331 "00000000" // /* MW 1 */
+ 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3333 "00000000" // /* MW 1 */
+ 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3335 "00000000" // /* MW 1 */
+ 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3337 "00000000" // /* MW 1 */
+ 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3339 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 62
+ 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3341 "11100001" // /* MW 3 */
+ 3342 "10100100" // /* MW 2 */
+ 3343 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 506 73
+ 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3345 "10111110" // /* MW 3 */
+ 3346 "10100101" // /* MW 2 */
+ 3347 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45 first
+ 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3349 "00101101" // /* MW 3 */
+ 3350 "10100100" // /* MW 2 */
+ 3351 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3353 "00000000" // /* MW 5 */
+ 3354 "10100000" // /* MW 4 */
+ 3355 "00001101" // /* MW 3 */
+ 3356 "00000001" // /* MW 2 */
+ 3357 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3359 "00100000" // /* MW 3 */
+ 3360 "11100101" // /* MW 2 */
+ 3361 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+ 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3363 "00000000" // /* MW 5 */
+ 3364 "10100000" // /* MW 4 */
+ 3365 "00001101" // /* MW 3 */
+ 3366 "11111111" // /* MW 2 */
+ 3367 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 510 45
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.src_ref 2 "conv2d_bf16_params.h" 642 99
+ 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3369 "11000001" // /* MW 5 */
+ 3370 "00111111" // /* MW 4 */
+ 3371 "10011001" // /* MW 3 */
+ 3372 "11100100" // /* MW 2 */
+ 3373 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 19 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3375 "11100001" // /* MW 5 */
+ 3376 "10111111" // /* MW 4 */
+ 3377 "10111000" // /* MW 3 */
+ 3378 "11100010" // /* MW 2 */
+ 3379 "11011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 512 64 first
+.src_ref 2 "conv2d_bf16_params.h" 524 122 first
+ 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3381 "00111011" // /* MW 5 */
+ 3382 "11001110" // /* MW 4 */
+ 3383 "00111001" // /* MW 3 */
+ 3384 "11101110" // /* MW 2 */
+ 3385 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3387 "00110001" // /* MW 3 */
+ 3388 "10110101" // /* MW 2 */
+ 3389 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 122
+ 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3391 "10101101" // /* MW 3 */
+ 3392 "00101001" // /* MW 2 */
+ 3393 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 520 19 first
+ 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3395 "01100101" // /* MW 3 */
+ 3396 "10110101" // /* MW 2 */
+ 3397 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 511 36 first
+.src_ref 2 "conv2d_bf16_params.h" 522 68 first
+ 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3399 "00100000" // /* MW 5 */
+ 3400 "01101001" // /* MW 4 */
+ 3401 "00111111" // /* MW 3 */
+ 3402 "01101010" // /* MW 2 */
+ 3403 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 524 65 first
+.src_ref 2 "conv2d_bf16_params.h" 529 62 first
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3405 "10101000" // /* MW 9 */
+ 3406 "10101000" // /* MW 8 */
+ 3407 "11001110" // /* MW 7 */
+ 3408 "01101111" // /* MW 6 */
+ 3409 "01001001" // /* MW 5 */
+ 3410 "00110111" // /* MW 4 */
+ 3411 "01010000" // /* MW 3 */
+ 3412 "01100101" // /* MW 2 */
+ 3413 "01001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3415 "11111001" // /* MW 5 */
+ 3416 "10100011" // /* MW 4 */
+ 3417 "10111000" // /* MW 3 */
+ 3418 "10100011" // /* MW 2 */
+ 3419 "10110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 527 45 first
+.src_ref 2 "conv2d_bf16_params.h" 533 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3421 "00011111" // /* MW 5 */
+ 3422 "01101011" // /* MW 4 */
+ 3423 "11101101" // /* MW 3 */
+ 3424 "01100100" // /* MW 2 */
+ 3425 "01000101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3427 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3429 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3431 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3433 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 48 first
+.src_ref 2 "conv2d_bf16_params.h" 533 46
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3435 "11001010" // /* MW 5 */
+ 3436 "10110101" // /* MW 4 */
+ 3437 "10111101" // /* MW 3 */
+ 3438 "01011111" // /* MW 2 */
+ 3439 "10000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14 first
+ 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3441 "00000001" // /* MW 5 */
+ 3442 "01000000" // /* MW 4 */
+ 3443 "11111000" // /* MW 3 */
+ 3444 "00000110" // /* MW 2 */
+ 3445 "11111000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 76 first
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3447 "11110010" // /* MW 5 */
+ 3448 "10111011" // /* MW 4 */
+ 3449 "11101101" // /* MW 3 */
+ 3450 "01000001" // /* MW 2 */
+ 3451 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 529 122
+.delay_slot
+ 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3453 "01011101" // /* MW 3 */
+ 3454 "11101011" // /* MW 2 */
+ 3455 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 93 first
+.delay_slot
+ 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3457 "00010100" // /* MW 3 */
+ 3458 "01100011" // /* MW 2 */
+ 3459 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 534 44
+.src_ref 2 "conv2d_bf16_params.h" 539 139 first
+.src_ref 2 "conv2d_bf16_params.h" 555 59
+.src_ref 2 "conv2d_bf16_params.h" 559 59
+.src_ref 2 "conv2d_bf16_params.h" 700 17
+.delay_slot
+ 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3461 "01011001" // /* MW 9 */
+ 3462 "00000001" // /* MW 8 */
+ 3463 "00101000" // /* MW 7 */
+ 3464 "00111110" // /* MW 6 */
+ 3465 "10111110" // /* MW 5 */
+ 3466 "00001101" // /* MW 4 */
+ 3467 "00110000" // /* MW 3 */
+ 3468 "01000110" // /* MW 2 */
+ 3469 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.delay_slot
+ 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3471 "10011100" // /* MW 3 */
+ 3472 "10011011" // /* MW 2 */
+ 3473 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3475 "10010001" // /* MW 3 */
+ 3476 "11100011" // /* MW 2 */
+ 3477 "00000111" // /* MW 1 */
+ 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3479 "00000000" // /* MW 1 */
+ 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3481 "00000000" // /* MW 1 */
+ 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3483 "00000000" // /* MW 1 */
+ 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3485 "00000000" // /* MW 1 */
+ 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3487 "00000000" // /* MW 1 */
+ 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 14
+ 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */
+ 3491 "00000001" // /* MW 5 */
+ 3492 "01000000" // /* MW 4 */
+ 3493 "11111000" // /* MW 3 */
+ 3494 "00000110" // /* MW 2 */
+ 3495 "11100000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3501 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3503 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 3505 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3507 "01011000" // /* MW 9 */
+ 3508 "01000000" // /* MW 8 */
+ 3509 "00101000" // /* MW 7 */
+ 3510 "10001011" // /* MW 6 */
+ 3511 "00010000" // /* MW 5 */
+ 3512 "00000001" // /* MW 4 */
+ 3513 "00000000" // /* MW 3 */
+ 3514 "10111100" // /* MW 2 */
+ 3515 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3517 "11010010" // /* MW 3 */
+ 3518 "01111110" // /* MW 2 */
+ 3519 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 162
+ 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3521 "01100111" // /* MW 3 */
+ 3522 "01110110" // /* MW 2 */
+ 3523 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+.src_ref 2 "conv2d_bf16_params.h" 539 139
+ 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3525 "00000001" // /* MW 5 */
+ 3526 "10100000" // /* MW 4 */
+ 3527 "01001111" // /* MW 3 */
+ 3528 "00111000" // /* MW 2 */
+ 3529 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 46
+ 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3531 "01010000" // /* MW 3 */
+ 3532 "00110010" // /* MW 2 */
+ 3533 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 44
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3535 "11101111" // /* MW 3 */
+ 3536 "01111101" // /* MW 2 */
+ 3537 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 115
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3539 "00111001" // /* MW 5 */
+ 3540 "11000100" // /* MW 4 */
+ 3541 "01011101" // /* MW 3 */
+ 3542 "11100011" // /* MW 2 */
+ 3543 "11001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 82
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3545 "10000010" // /* MW 3 */
+ 3546 "11100011" // /* MW 2 */
+ 3547 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 79
+ 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3549 "11101111" // /* MW 3 */
+ 3550 "01100011" // /* MW 2 */
+ 3551 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3553 "11000001" // /* MW 3 */
+ 3554 "11111001" // /* MW 2 */
+ 3555 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 539 135
+ 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3557 "11001110" // /* MW 3 */
+ 3558 "01100011" // /* MW 2 */
+ 3559 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 55 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 3561 "00011100" // /* MW 7 */
+ 3562 "00000000" // /* MW 6 */
+ 3563 "00000000" // /* MW 5 */
+ 3564 "10000001" // /* MW 4 */
+ 3565 "00010100" // /* MW 3 */
+ 3566 "00100011" // /* MW 2 */
+ 3567 "00000000" // /* MW 1 */
+.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.src_ref 2 "conv2d_bf16_params.h" 669 63
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3569 "01111000" // /* MW 9 */
+ 3570 "00001110" // /* MW 8 */
+ 3571 "01110000" // /* MW 7 */
+ 3572 "11101011" // /* MW 6 */
+ 3573 "11000111" // /* MW 5 */
+ 3574 "00111111" // /* MW 4 */
+ 3575 "00000000" // /* MW 3 */
+ 3576 "00011001" // /* MW 2 */
+ 3577 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 63 first
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3579 "11000010" // /* MW 3 */
+ 3580 "01111111" // /* MW 2 */
+ 3581 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 497 34 first
+.src_ref 2 "conv2d_bf16_params.h" 641 32 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3583 "10101000" // /* MW 9 */
+ 3584 "01110100" // /* MW 8 */
+ 3585 "01001111" // /* MW 7 */
+ 3586 "10000011" // /* MW 6 */
+ 3587 "00000100" // /* MW 5 */
+ 3588 "00100001" // /* MW 4 */
+ 3589 "00100000" // /* MW 3 */
+ 3590 "01101110" // /* MW 2 */
+ 3591 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 61 first
+.src_ref 2 "conv2d_bf16_params.h" 640 16
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.aggressive_scheduled_block_id 5
+.nohwbrkpt
+.noswbrkpt
+ 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3593 "01011000" // /* MW 9 */
+ 3594 "00001001" // /* MW 8 */
+ 3595 "10101000" // /* MW 7 */
+ 3596 "10000011" // /* MW 6 */
+ 3597 "01000100" // /* MW 5 */
+ 3598 "00101001" // /* MW 4 */
+ 3599 "00000000" // /* MW 3 */
+ 3600 "00011110" // /* MW 2 */
+ 3601 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3603 "11100010" // /* MW 3 */
+ 3604 "01110011" // /* MW 2 */
+ 3605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 47 first
+ 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3607 "10001000" // /* MW 3 */
+ 3608 "11111001" // /* MW 2 */
+ 3609 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 640 16 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3611 "00111101" // /* MW 3 */
+ 3612 "01111011" // /* MW 2 */
+ 3613 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3615 "00010000" // /* MW 9 */
+ 3616 "00000100" // /* MW 8 */
+ 3617 "00001010" // /* MW 7 */
+ 3618 "00000011" // /* MW 6 */
+ 3619 "00000000" // /* MW 5 */
+ 3620 "00000000" // /* MW 4 */
+ 3621 "00100000" // /* MW 3 */
+ 3622 "11011110" // /* MW 2 */
+ 3623 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 641 44 first
+.src_ref 2 "conv2d_bf16_params.h" 642 45 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3625 "11111111" // /* MW 5 */
+ 3626 "00111010" // /* MW 4 */
+ 3627 "10111111" // /* MW 3 */
+ 3628 "11100111" // /* MW 2 */
+ 3629 "11001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 20
+.src_ref 2 "conv2d_bf16_params.h" 642 87
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3631 "11100110" // /* MW 3 */
+ 3632 "11001111" // /* MW 2 */
+ 3633 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 55 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3635 "00101001" // /* MW 5 */
+ 3636 "10101000" // /* MW 4 */
+ 3637 "00001011" // /* MW 3 */
+ 3638 "11010010" // /* MW 2 */
+ 3639 "10110100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3641 "00000001" // /* MW 5 */
+ 3642 "00100001" // /* MW 4 */
+ 3643 "01001101" // /* MW 3 */
+ 3644 "10110000" // /* MW 2 */
+ 3645 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 38 first
+.src_ref 2 "conv2d_bf16_params.h" 557 34
+ 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3647 "00111001" // /* MW 5 */
+ 3648 "11000010" // /* MW 4 */
+ 3649 "00011101" // /* MW 3 */
+ 3650 "10110101" // /* MW 2 */
+ 3651 "00110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 99 first
+ 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3653 "00100100" // /* MW 3 */
+ 3654 "11001111" // /* MW 2 */
+ 3655 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 557 34 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+ 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3657 "01000001" // /* MW 5 */
+ 3658 "10100110" // /* MW 4 */
+ 3659 "01001101" // /* MW 3 */
+ 3660 "11011110" // /* MW 2 */
+ 3661 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3663 "01111101" // /* MW 5 */
+ 3664 "00100000" // /* MW 4 */
+ 3665 "01001001" // /* MW 3 */
+ 3666 "00001000" // /* MW 2 */
+ 3667 "00101001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 119 first
+ 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3669 "00100100" // /* MW 3 */
+ 3670 "11101111" // /* MW 2 */
+ 3671 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 540 15 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3673 "01011000" // /* MW 9 */
+ 3674 "01110000" // /* MW 8 */
+ 3675 "01001111" // /* MW 7 */
+ 3676 "01101110" // /* MW 6 */
+ 3677 "01000010" // /* MW 5 */
+ 3678 "00100000" // /* MW 4 */
+ 3679 "00000000" // /* MW 3 */
+ 3680 "00011110" // /* MW 2 */
+ 3681 "11011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3683 "00100010" // /* MW 3 */
+ 3684 "10111101" // /* MW 2 */
+ 3685 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 85 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+ 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3687 "01011000" // /* MW 9 */
+ 3688 "00100000" // /* MW 8 */
+ 3689 "00001001" // /* MW 7 */
+ 3690 "11111110" // /* MW 6 */
+ 3691 "10101001" // /* MW 5 */
+ 3692 "00101111" // /* MW 4 */
+ 3693 "00000000" // /* MW 3 */
+ 3694 "00000101" // /* MW 2 */
+ 3695 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+ 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3697 "01010010" // /* MW 3 */
+ 3698 "00100000" // /* MW 2 */
+ 3699 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 559 59 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3701 "11110010" // /* MW 5 */
+ 3702 "10111101" // /* MW 4 */
+ 3703 "11111101" // /* MW 3 */
+ 3704 "00001001" // /* MW 2 */
+ 3705 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 669 41 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3707 "00111001" // /* MW 5 */
+ 3708 "11000100" // /* MW 4 */
+ 3709 "10111101" // /* MW 3 */
+ 3710 "00111111" // /* MW 2 */
+ 3711 "10000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 117 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3713 "01011111" // /* MW 5 */
+ 3714 "01101011" // /* MW 4 */
+ 3715 "10110111" // /* MW 3 */
+ 3716 "11101110" // /* MW 2 */
+ 3717 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+ 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3719 "00110010" // /* MW 3 */
+ 3720 "10000100" // /* MW 2 */
+ 3721 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 52 first
+ 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3723 "00001100" // /* MW 3 */
+ 3724 "01111110" // /* MW 2 */
+ 3725 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 92 first
+ 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3727 "10001111" // /* MW 3 */
+ 3728 "00110001" // /* MW 2 */
+ 3729 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 578 36 first
+ 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3731 "11000101" // /* MW 3 */
+ 3732 "11110111" // /* MW 2 */
+ 3733 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 64 first
+.src_ref 2 "conv2d_bf16_params.h" 611 47
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 629 82
+ 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3735 "01011000" // /* MW 11 */
+ 3736 "00000000" // /* MW 10 */
+ 3737 "10001001" // /* MW 9 */
+ 3738 "11101110" // /* MW 8 */
+ 3739 "11000000" // /* MW 7 */
+ 3740 "10110111" // /* MW 6 */
+ 3741 "10010101" // /* MW 5 */
+ 3742 "11101110" // /* MW 4 */
+ 3743 "00000111" // /* MW 3 */
+ 3744 "00000011" // /* MW 2 */
+ 3745 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+ 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3747 "00111001" // /* MW 5 */
+ 3748 "10110111" // /* MW 4 */
+ 3749 "01000000" // /* MW 3 */
+ 3750 "00101000" // /* MW 2 */
+ 3751 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3753 "00001100" // /* MW 5 */
+ 3754 "10101100" // /* MW 4 */
+ 3755 "00001111" // /* MW 3 */
+ 3756 "00000000" // /* MW 2 */
+ 3757 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 60 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+ 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3759 "11001001" // /* MW 9 */
+ 3760 "00111111" // /* MW 8 */
+ 3761 "10001001" // /* MW 7 */
+ 3762 "00111100" // /* MW 6 */
+ 3763 "10110000" // /* MW 5 */
+ 3764 "00011111" // /* MW 4 */
+ 3765 "10110000" // /* MW 3 */
+ 3766 "00010010" // /* MW 2 */
+ 3767 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 554 53
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 555 59 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3769 "11001000" // /* MW 11 */
+ 3770 "01111111" // /* MW 10 */
+ 3771 "11001100" // /* MW 9 */
+ 3772 "10010010" // /* MW 8 */
+ 3773 "11111111" // /* MW 7 */
+ 3774 "10101101" // /* MW 6 */
+ 3775 "10010001" // /* MW 5 */
+ 3776 "00011100" // /* MW 4 */
+ 3777 "10000010" // /* MW 3 */
+ 3778 "10001100" // /* MW 2 */
+ 3779 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 555 53
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 621 240 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3781 "01111001" // /* MW 9 */
+ 3782 "10001110" // /* MW 8 */
+ 3783 "01110000" // /* MW 7 */
+ 3784 "11101111" // /* MW 6 */
+ 3785 "01010111" // /* MW 5 */
+ 3786 "00101011" // /* MW 4 */
+ 3787 "00110000" // /* MW 3 */
+ 3788 "01011010" // /* MW 2 */
+ 3789 "01001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 558 53 first
+.src_ref 2 "conv2d_bf16_params.h" 559 53
+.src_ref 2 "conv2d_bf16_params.h" 621 140
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 645 41
+ 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3791 "01011000" // /* MW 11 */
+ 3792 "01011000" // /* MW 10 */
+ 3793 "00000000" // /* MW 9 */
+ 3794 "00001110" // /* MW 8 */
+ 3795 "01001110" // /* MW 7 */
+ 3796 "10101001" // /* MW 6 */
+ 3797 "01010001" // /* MW 5 */
+ 3798 "00011111" // /* MW 4 */
+ 3799 "00000010" // /* MW 3 */
+ 3800 "11011001" // /* MW 2 */
+ 3801 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 559 53 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 645 41 first
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id first
+ 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3803 "00100100" // /* MW 5 */
+ 3804 "11100011" // /* MW 4 */
+ 3805 "00111111" // /* MW 3 */
+ 3806 "01100010" // /* MW 2 */
+ 3807 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 610 47 first
+.src_ref 2 "conv2d_bf16_params.h" 621 222
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 8
+.noswbrkpt
+ 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 3809 "01111000" // /* MW 11 */
+ 3810 "10010000" // /* MW 10 */
+ 3811 "01101001" // /* MW 9 */
+ 3812 "00001111" // /* MW 8 */
+ 3813 "11001110" // /* MW 7 */
+ 3814 "10101011" // /* MW 6 */
+ 3815 "10010001" // /* MW 5 */
+ 3816 "11101111" // /* MW 4 */
+ 3817 "00100010" // /* MW 3 */
+ 3818 "01101110" // /* MW 2 */
+ 3819 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 661 61
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3821 "11001000" // /* MW 9 */
+ 3822 "11111111" // /* MW 8 */
+ 3823 "10001100" // /* MW 7 */
+ 3824 "00010010" // /* MW 6 */
+ 3825 "11001110" // /* MW 5 */
+ 3826 "00101001" // /* MW 4 */
+ 3827 "00000000" // /* MW 3 */
+ 3828 "11110011" // /* MW 2 */
+ 3829 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 621 156
+.src_ref 2 "conv2d_bf16_params.h" 649 41
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 710 60
+.src_ref 2 "conv2d_bf16_params.h" 710 65
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3831 "01111000" // /* MW 9 */
+ 3832 "10001110" // /* MW 8 */
+ 3833 "01110000" // /* MW 7 */
+ 3834 "01110011" // /* MW 6 */
+ 3835 "11101010" // /* MW 5 */
+ 3836 "00111011" // /* MW 4 */
+ 3837 "00000000" // /* MW 3 */
+ 3838 "00011101" // /* MW 2 */
+ 3839 "11110111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3841 "01000100" // /* MW 5 */
+ 3842 "11001010" // /* MW 4 */
+ 3843 "00101110" // /* MW 3 */
+ 3844 "11101110" // /* MW 2 */
+ 3845 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 156 first
+.src_ref 2 "conv2d_bf16_params.h" 649 41 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.src_ref 2 "conv2d_bf16_params.h" 700 34
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3847 "01111000" // /* MW 9 */
+ 3848 "10010000" // /* MW 8 */
+ 3849 "01101001" // /* MW 7 */
+ 3850 "10010011" // /* MW 6 */
+ 3851 "00111001" // /* MW 5 */
+ 3852 "00111111" // /* MW 4 */
+ 3853 "00000000" // /* MW 3 */
+ 3854 "00011111" // /* MW 2 */
+ 3855 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 700 34 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3857 "00100010" // /* MW 3 */
+ 3858 "11000100" // /* MW 2 */
+ 3859 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 82 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3861 "01010001" // /* MW 3 */
+ 3862 "11101011" // /* MW 2 */
+ 3863 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 611 47 first
+.aggressive_scheduled_block_id 8
+.nohwbrkpt
+.noswbrkpt
+ 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3865 "01011001" // /* MW 9 */
+ 3866 "11000000" // /* MW 8 */
+ 3867 "01101111" // /* MW 7 */
+ 3868 "10010000" // /* MW 6 */
+ 3869 "00100111" // /* MW 5 */
+ 3870 "00000100" // /* MW 4 */
+ 3871 "00110000" // /* MW 3 */
+ 3872 "10001110" // /* MW 2 */
+ 3873 "01000111" // /* MW 1 */
+.aggressive_scheduled_block_id 8
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3875 "00110010" // /* MW 3 */
+ 3876 "00111000" // /* MW 2 */
+ 3877 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 643 22 first
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id first
+ 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 3879 "01111111" // /* MW 3 */
+ 3880 "11111110" // /* MW 2 */
+ 3881 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.noswbrkpt
+ 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3883 "01100100" // /* MW 5 */
+ 3884 "00001100" // /* MW 4 */
+ 3885 "00101110" // /* MW 3 */
+ 3886 "11000110" // /* MW 2 */
+ 3887 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 621 47 first
+.src_ref 2 "conv2d_bf16_params.h" 629 45
+.src_ref 2 "conv2d_bf16_params.h" 684 30 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3889 "01011001" // /* MW 9 */
+ 3890 "00101000" // /* MW 8 */
+ 3891 "10000000" // /* MW 7 */
+ 3892 "01111100" // /* MW 6 */
+ 3893 "00101001" // /* MW 5 */
+ 3894 "00110101" // /* MW 4 */
+ 3895 "00110000" // /* MW 3 */
+ 3896 "10001110" // /* MW 2 */
+ 3897 "01011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 629 45 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3899 "11100100" // /* MW 5 */
+ 3900 "00001101" // /* MW 4 */
+ 3901 "00110001" // /* MW 3 */
+ 3902 "01010110" // /* MW 2 */
+ 3903 "01000101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 644 22
+.src_ref 2 "conv2d_bf16_params.h" 700 17 first
+.src_ref 2 "conv2d_bf16_params.h" 705 50
+.src_ref 2 "conv2d_bf16_params.h" 705 61
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3905 "10101000" // /* MW 9 */
+ 3906 "11111100" // /* MW 8 */
+ 3907 "10101001" // /* MW 7 */
+ 3908 "11111110" // /* MW 6 */
+ 3909 "00111000" // /* MW 5 */
+ 3910 "00000110" // /* MW 4 */
+ 3911 "00100000" // /* MW 3 */
+ 3912 "00000010" // /* MW 2 */
+ 3913 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 645 38 first
+.src_ref 2 "conv2d_bf16_params.h" 700 111
+.src_ref 2 "conv2d_bf16_params.h" 700 149
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3915 "00000110" // /* MW 9 */
+ 3916 "00000110" // /* MW 8 */
+ 3917 "00000101" // /* MW 7 */
+ 3918 "10000000" // /* MW 6 */
+ 3919 "00010001" // /* MW 5 */
+ 3920 "00011111" // /* MW 4 */
+ 3921 "00100010" // /* MW 3 */
+ 3922 "11000110" // /* MW 2 */
+ 3923 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14
+.src_ref 2 "conv2d_bf16_params.h" 649 38 first
+.src_ref 2 "conv2d_bf16_params.h" 674 24
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3925 "00010001" // /* MW 9 */
+ 3926 "11111000" // /* MW 8 */
+ 3927 "01101111" // /* MW 7 */
+ 3928 "00111110" // /* MW 6 */
+ 3929 "00000000" // /* MW 5 */
+ 3930 "00000000" // /* MW 4 */
+ 3931 "00110000" // /* MW 3 */
+ 3932 "11001110" // /* MW 2 */
+ 3933 "01001111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 14 first
+.src_ref 2 "conv2d_bf16_params.h" 662 61
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.aggressive_scheduled_block_id 9
+.nohwbrkpt
+.noswbrkpt
+ 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3935 "11001001" // /* MW 9 */
+ 3936 "10111111" // /* MW 8 */
+ 3937 "01001011" // /* MW 7 */
+ 3938 "10100100" // /* MW 6 */
+ 3939 "01001001" // /* MW 5 */
+ 3940 "00111111" // /* MW 4 */
+ 3941 "00110000" // /* MW 3 */
+ 3942 "11010010" // /* MW 2 */
+ 3943 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 663 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 9
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3945 "10011100" // /* MW 5 */
+ 3946 "01010110" // /* MW 4 */
+ 3947 "00110001" // /* MW 3 */
+ 3948 "11000110" // /* MW 2 */
+ 3949 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+ 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3951 "10000001" // /* MW 5 */
+ 3952 "01111010" // /* MW 4 */
+ 3953 "00111111" // /* MW 3 */
+ 3954 "10001010" // /* MW 2 */
+ 3955 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id first
+ 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3957 "11100011" // /* MW 5 */
+ 3958 "01110011" // /* MW 4 */
+ 3959 "00111000" // /* MW 3 */
+ 3960 "11111010" // /* MW 2 */
+ 3961 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.aggressive_scheduled_block_id 10
+.noswbrkpt
+ 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3963 "01011001" // /* MW 9 */
+ 3964 "00000000" // /* MW 8 */
+ 3965 "01100000" // /* MW 7 */
+ 3966 "00110000" // /* MW 6 */
+ 3967 "11111000" // /* MW 5 */
+ 3968 "00101101" // /* MW 4 */
+ 3969 "00110000" // /* MW 3 */
+ 3970 "11010110" // /* MW 2 */
+ 3971 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 644 22 first
+.src_ref 2 "conv2d_bf16_params.h" 664 38 first
+.src_ref 2 "conv2d_bf16_params.h" 705 45
+.aggressive_scheduled_block_id 10
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3973 "11001001" // /* MW 9 */
+ 3974 "01111111" // /* MW 8 */
+ 3975 "00101100" // /* MW 7 */
+ 3976 "01111110" // /* MW 6 */
+ 3977 "00100000" // /* MW 5 */
+ 3978 "00111110" // /* MW 4 */
+ 3979 "00110000" // /* MW 3 */
+ 3980 "10001100" // /* MW 2 */
+ 3981 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 664 38
+.src_ref 2 "conv2d_bf16_params.h" 705 50 first
+.src_ref 2 "conv2d_bf16_params.h" 705 61 first
+ 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 3983 "00001100" // /* MW 5 */
+ 3984 "10111000" // /* MW 4 */
+ 3985 "00111000" // /* MW 3 */
+ 3986 "10001100" // /* MW 2 */
+ 3987 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10
+.src_ref 2 "conv2d_bf16_params.h" 674 24 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 682 38
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.src_ref 2 "conv2d_bf16_params.h" 720 50
+ 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3989 "01011001" // /* MW 9 */
+ 3990 "00000000" // /* MW 8 */
+ 3991 "01001000" // /* MW 7 */
+ 3992 "00100100" // /* MW 6 */
+ 3993 "00000001" // /* MW 5 */
+ 3994 "00100111" // /* MW 4 */
+ 3995 "00110000" // /* MW 3 */
+ 3996 "11011010" // /* MW 2 */
+ 3997 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 3999 "01111001" // /* MW 9 */
+ 4000 "00001110" // /* MW 8 */
+ 4001 "01110000" // /* MW 7 */
+ 4002 "10001111" // /* MW 6 */
+ 4003 "00011111" // /* MW 5 */
+ 4004 "00000101" // /* MW 4 */
+ 4005 "00110000" // /* MW 3 */
+ 4006 "11110010" // /* MW 2 */
+ 4007 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 707 61 first
+ 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4009 "11011111" // /* MW 5 */
+ 4010 "10111001" // /* MW 4 */
+ 4011 "00111011" // /* MW 3 */
+ 4012 "10010010" // /* MW 2 */
+ 4013 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25
+.src_ref 2 "conv2d_bf16_params.h" 674 22 first
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+ 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4015 "01011001" // /* MW 9 */
+ 4016 "00000110" // /* MW 8 */
+ 4017 "00001000" // /* MW 7 */
+ 4018 "10001100" // /* MW 6 */
+ 4019 "00001111" // /* MW 5 */
+ 4020 "00100001" // /* MW 4 */
+ 4021 "00110000" // /* MW 3 */
+ 4022 "11000110" // /* MW 2 */
+ 4023 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 642 25 first
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 655 23
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.src_ref 2 "conv2d_bf16_params.h" 679 23
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id first
+ 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4025 "01111000" // /* MW 11 */
+ 4026 "10010000" // /* MW 10 */
+ 4027 "01101001" // /* MW 9 */
+ 4028 "00010011" // /* MW 8 */
+ 4029 "00000000" // /* MW 7 */
+ 4030 "10011011" // /* MW 6 */
+ 4031 "00010001" // /* MW 5 */
+ 4032 "00011110" // /* MW 4 */
+ 4033 "00000010" // /* MW 3 */
+ 4034 "00000000" // /* MW 2 */
+ 4035 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 655 23 first
+.src_ref 2 "conv2d_bf16_params.h" 679 23 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12
+.aggressive_scheduled_block_id 11
+.noswbrkpt
+ 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4037 "10100100" // /* MW 5 */
+ 4038 "00010100" // /* MW 4 */
+ 4039 "00100000" // /* MW 3 */
+ 4040 "00010110" // /* MW 2 */
+ 4041 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 691 56 first
+.aggressive_scheduled_block_id 11
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4043 "10101111" // /* MW 3 */
+ 4044 "01100011" // /* MW 2 */
+ 4045 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 675 38 first
+.src_ref 2 "conv2d_bf16_params.h" 709 71 first
+ 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4047 "01011001" // /* MW 9 */
+ 4048 "11001000" // /* MW 8 */
+ 4049 "00000111" // /* MW 7 */
+ 4050 "01101101" // /* MW 6 */
+ 4051 "00001000" // /* MW 5 */
+ 4052 "00000111" // /* MW 4 */
+ 4053 "00110000" // /* MW 3 */
+ 4054 "10001100" // /* MW 2 */
+ 4055 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 675 38
+.src_ref 2 "conv2d_bf16_params.h" 706 23 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 709 76
+ 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4057 "11001000" // /* MW 11 */
+ 4058 "11000001" // /* MW 10 */
+ 4059 "10101000" // /* MW 9 */
+ 4060 "11101101" // /* MW 8 */
+ 4061 "11110111" // /* MW 7 */
+ 4062 "10100000" // /* MW 6 */
+ 4063 "01100001" // /* MW 5 */
+ 4064 "01001000" // /* MW 4 */
+ 4065 "00000010" // /* MW 3 */
+ 4066 "01100011" // /* MW 2 */
+ 4067 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 682 38 first
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4069 "01111011" // /* MW 5 */
+ 4070 "11000000" // /* MW 4 */
+ 4071 "00110110" // /* MW 3 */
+ 4072 "00001010" // /* MW 2 */
+ 4073 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 126 21 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+ 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4075 "01000001" // /* MW 5 */
+ 4076 "10001110" // /* MW 4 */
+ 4077 "00111000" // /* MW 3 */
+ 4078 "11011010" // /* MW 2 */
+ 4079 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 127 10 first
+.src_ref 2 "conv2d_bf16_params.h" 127 19 first
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+ 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4081 "10011100" // /* MW 5 */
+ 4082 "11001000" // /* MW 4 */
+ 4083 "00111000" // /* MW 3 */
+ 4084 "11001010" // /* MW 2 */
+ 4085 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4087 "11011011" // /* MW 5 */
+ 4088 "10010100" // /* MW 4 */
+ 4089 "00110010" // /* MW 3 */
+ 4090 "10010010" // /* MW 2 */
+ 4091 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+.src_ref 2 "conv2d_bf16_params.h" 706 28 first
+ 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4093 "01011001" // /* MW 9 */
+ 4094 "11111101" // /* MW 8 */
+ 4095 "00001111" // /* MW 7 */
+ 4096 "00000100" // /* MW 6 */
+ 4097 "00111000" // /* MW 5 */
+ 4098 "00011010" // /* MW 4 */
+ 4099 "00110000" // /* MW 3 */
+ 4100 "10001110" // /* MW 2 */
+ 4101 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 706 28
+ 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4103 "00001110" // /* MW 3 */
+ 4104 "11000000" // /* MW 2 */
+ 4105 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 707 66 first
+ 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4107 "00011111" // /* MW 5 */
+ 4108 "00010000" // /* MW 4 */
+ 4109 "00110111" // /* MW 3 */
+ 4110 "11001010" // /* MW 2 */
+ 4111 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37
+.src_ref 2 "conv2d_bf16_params.h" 709 96 first
+ 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4113 "00111011" // /* MW 5 */
+ 4114 "00001100" // /* MW 4 */
+ 4115 "00110000" // /* MW 3 */
+ 4116 "10001100" // /* MW 2 */
+ 4117 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 696 37 first
+.src_ref 2 "conv2d_bf16_params.h" 709 90
+ 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4119 "00110001" // /* MW 9 */
+ 4120 "11000110" // /* MW 8 */
+ 4121 "00000011" // /* MW 7 */
+ 4122 "10000000" // /* MW 6 */
+ 4123 "01100001" // /* MW 5 */
+ 4124 "00011100" // /* MW 4 */
+ 4125 "00100010" // /* MW 3 */
+ 4126 "10110110" // /* MW 2 */
+ 4127 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 707 50 first
+.src_ref 2 "conv2d_bf16_params.h" 708 59
+.src_ref 2 "conv2d_bf16_params.h" 710 60 first
+.src_ref 2 "conv2d_bf16_params.h" 710 65 first
+ 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4129 "11001000" // /* MW 11 */
+ 4130 "00111111" // /* MW 10 */
+ 4131 "00101000" // /* MW 9 */
+ 4132 "00110000" // /* MW 8 */
+ 4133 "01110000" // /* MW 7 */
+ 4134 "10111010" // /* MW 6 */
+ 4135 "10010001" // /* MW 5 */
+ 4136 "00011100" // /* MW 4 */
+ 4137 "00100010" // /* MW 3 */
+ 4138 "00111010" // /* MW 2 */
+ 4139 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 708 48 first
+.src_ref 2 "conv2d_bf16_params.h" 713 12 first
+ 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4141 "10101111" // /* MW 9 */
+ 4142 "01000001" // /* MW 8 */
+ 4143 "00000001" // /* MW 7 */
+ 4144 "10000000" // /* MW 6 */
+ 4145 "00110001" // /* MW 5 */
+ 4146 "00011100" // /* MW 4 */
+ 4147 "00100010" // /* MW 3 */
+ 4148 "10111110" // /* MW 2 */
+ 4149 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 709 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+ 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4151 "00000000" // /* MW 5 */
+ 4152 "01010000" // /* MW 4 */
+ 4153 "00110000" // /* MW 3 */
+ 4154 "10001110" // /* MW 2 */
+ 4155 "01000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 710 50 first
+.delay_slot
+ 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4157 "11110001" // /* MW 3 */
+ 4158 "01011100" // /* MW 2 */
+ 4159 "00001010" // /* MW 1 */
+.delay_slot
+ 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4161 "00010001" // /* MW 3 */
+ 4162 "00011100" // /* MW 2 */
+ 4163 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48 first
+.delay_slot
+ 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4165 "01010001" // /* MW 3 */
+ 4166 "00011100" // /* MW 2 */
+ 4167 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 718 48
+.delay_slot
+ 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4169 "01010001" // /* MW 3 */
+ 4170 "00000100" // /* MW 2 */
+ 4171 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 720 50 first
+.src_ref 2 "conv2d_bf16_params.h" 800 first
+.delay_slot
+ 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4173 "01110001" // /* MW 9 */
+ 4174 "00000000" // /* MW 8 */
+ 4175 "00000000" // /* MW 7 */
+ 4176 "00000000" // /* MW 6 */
+ 4177 "11111110" // /* MW 5 */
+ 4178 "00111111" // /* MW 4 */
+ 4179 "00110000" // /* MW 3 */
+ 4180 "10001010" // /* MW 2 */
+.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end
+.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0
+ 4181 "01000010" // /* MW 1 */
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 689 first
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 704 12
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.function_start
+ 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4193 "01111000" // /* MW 11 */
+ 4194 "01100000" // /* MW 10 */
+ 4195 "00001010" // /* MW 9 */
+ 4196 "00001000" // /* MW 8 */
+ 4197 "10000000" // /* MW 7 */
+ 4198 "00000001" // /* MW 6 */
+ 4199 "10001011" // /* MW 5 */
+ 4200 "10000100" // /* MW 4 */
+ 4201 "10000010" // /* MW 3 */
+ 4202 "00000011" // /* MW 2 */
+ 4203 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11
+.src_ref 3 "utils.h" 526 11
+.src_ref 2 "conv2d_bf16.h" 698 28 first
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+ 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4205 "01100000" // /* MW 13 */
+ 4206 "00001001" // /* MW 12 */
+ 4207 "00100000" // /* MW 11 */
+ 4208 "00100001" // /* MW 10 */
+ 4209 "00000000" // /* MW 9 */
+ 4210 "00110110" // /* MW 8 */
+ 4211 "00000001" // /* MW 7 */
+ 4212 "00110100" // /* MW 6 */
+ 4213 "00101000" // /* MW 5 */
+ 4214 "00101000" // /* MW 4 */
+ 4215 "10001000" // /* MW 3 */
+ 4216 "00000110" // /* MW 2 */
+ 4217 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 28
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4219 "00010000" // /* MW 9 */
+ 4220 "00110100" // /* MW 8 */
+ 4221 "00110010" // /* MW 7 */
+ 4222 "11110010" // /* MW 6 */
+ 4223 "00000001" // /* MW 5 */
+ 4224 "00000000" // /* MW 4 */
+ 4225 "11010000" // /* MW 3 */
+ 4226 "10010100" // /* MW 2 */
+ 4227 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 698 43
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+ 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4229 "00010000" // /* MW 9 */
+ 4230 "01111000" // /* MW 8 */
+ 4231 "01111000" // /* MW 7 */
+ 4232 "00000100" // /* MW 6 */
+ 4233 "00000000" // /* MW 5 */
+ 4234 "00000000" // /* MW 4 */
+ 4235 "11010000" // /* MW 3 */
+ 4236 "10010000" // /* MW 2 */
+ 4237 "01100011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 699 43 first
+.src_ref 2 "conv2d_bf16.h" 702 4
+ 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4239 "00010000" // /* MW 9 */
+ 4240 "10010000" // /* MW 8 */
+ 4241 "10111000" // /* MW 7 */
+ 4242 "00000101" // /* MW 6 */
+ 4243 "00000000" // /* MW 5 */
+ 4244 "00000000" // /* MW 4 */
+ 4245 "11010000" // /* MW 3 */
+ 4246 "10000000" // /* MW 2 */
+ 4247 "01100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 702 37 first
+ 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4249 "00000001" // /* MW 5 */
+ 4250 "00000000" // /* MW 4 */
+ 4251 "11010001" // /* MW 3 */
+ 4252 "10000010" // /* MW 2 */
+ 4253 "01111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 37
+ 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4255 "00100010" // /* MW 3 */
+ 4256 "00000100" // /* MW 2 */
+ 4257 "00000100" // /* MW 1 */
+ 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4259 "00000000" // /* MW 1 */
+ 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4261 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+ 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4263 "00000001" // /* MW 5 */
+ 4264 "10000101" // /* MW 4 */
+ 4265 "10000000" // /* MW 3 */
+ 4266 "00001010" // /* MW 2 */
+ 4267 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+ 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4269 "00010100" // /* MW 3 */
+ 4270 "00110000" // /* MW 2 */
+ 4271 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4273 "00010100" // /* MW 3 */
+ 4274 "00010100" // /* MW 2 */
+ 4275 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 702 4 first
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4277 "11111101" // /* MW 5 */
+ 4278 "11100000" // /* MW 4 */
+ 4279 "10001010" // /* MW 3 */
+ 4280 "00001010" // /* MW 2 */
+ 4281 "00000000" // /* MW 1 */
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 707 12
+.src_ref 2 "conv2d_bf16.h" 707 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4283 "00000000" // /* MW 5 */
+ 4284 "11110101" // /* MW 4 */
+ 4285 "10000000" // /* MW 3 */
+ 4286 "00000010" // /* MW 2 */
+ 4287 "11000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4289 "00010100" // /* MW 3 */
+ 4290 "00010100" // /* MW 2 */
+ 4291 "00111100" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4293 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4295 "01111110" // /* MW 9 */
+ 4296 "10100101" // /* MW 8 */
+ 4297 "00000001" // /* MW 7 */
+ 4298 "00000000" // /* MW 6 */
+ 4299 "01010100" // /* MW 5 */
+ 4300 "00000000" // /* MW 4 */
+ 4301 "11110000" // /* MW 3 */
+ 4302 "00101100" // /* MW 2 */
+ 4303 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4305 "00000000" // /* MW 15 */
+ 4306 "00000000" // /* MW 14 */
+ 4307 "01111000" // /* MW 13 */
+ 4308 "11000101" // /* MW 12 */
+ 4309 "00000001" // /* MW 11 */
+ 4310 "00000000" // /* MW 10 */
+ 4311 "00000000" // /* MW 9 */
+ 4312 "00000000" // /* MW 8 */
+ 4313 "01011011" // /* MW 7 */
+ 4314 "00000001" // /* MW 6 */
+ 4315 "00101000" // /* MW 5 */
+ 4316 "01100000" // /* MW 4 */
+ 4317 "11111100" // /* MW 3 */
+ 4318 "00101100" // /* MW 2 */
+ 4319 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4321 "00000000" // /* MW 15 */
+ 4322 "00000000" // /* MW 14 */
+ 4323 "01111000" // /* MW 13 */
+ 4324 "11000101" // /* MW 12 */
+ 4325 "01000000" // /* MW 11 */
+ 4326 "00000000" // /* MW 10 */
+ 4327 "00000000" // /* MW 9 */
+ 4328 "00000000" // /* MW 8 */
+ 4329 "01011011" // /* MW 7 */
+ 4330 "00000001" // /* MW 6 */
+ 4331 "00100000" // /* MW 5 */
+ 4332 "00000000" // /* MW 4 */
+ 4333 "11110000" // /* MW 3 */
+ 4334 "00101100" // /* MW 2 */
+ 4335 "00000000" // /* MW 1 */
+.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144
+.src_ref 2 "conv2d_bf16.h" 704 12 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4337 "00000000" // /* MW 15 */
+ 4338 "00000000" // /* MW 14 */
+ 4339 "01111000" // /* MW 13 */
+ 4340 "10100101" // /* MW 12 */
+ 4341 "00000001" // /* MW 11 */
+ 4342 "00000000" // /* MW 10 */
+ 4343 "00000000" // /* MW 9 */
+ 4344 "00000000" // /* MW 8 */
+ 4345 "01011011" // /* MW 7 */
+ 4346 "00000001" // /* MW 6 */
+ 4347 "00101000" // /* MW 5 */
+ 4348 "00101000" // /* MW 4 */
+ 4349 "11111000" // /* MW 3 */
+ 4350 "00101100" // /* MW 2 */
+ 4351 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 705 66 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4353 "00000000" // /* MW 15 */
+ 4354 "00000000" // /* MW 14 */
+ 4355 "01111000" // /* MW 13 */
+ 4356 "10100101" // /* MW 12 */
+ 4357 "00000001" // /* MW 11 */
+ 4358 "00000000" // /* MW 10 */
+ 4359 "00000000" // /* MW 9 */
+ 4360 "00000000" // /* MW 8 */
+ 4361 "00000011" // /* MW 7 */
+ 4362 "10000000" // /* MW 6 */
+ 4363 "10101101" // /* MW 5 */
+ 4364 "00000000" // /* MW 4 */
+ 4365 "11110000" // /* MW 3 */
+ 4366 "00101100" // /* MW 2 */
+ 4367 "00000000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 3 "utils.h" 526 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4369 "00000000" // /* MW 15 */
+ 4370 "00000000" // /* MW 14 */
+ 4371 "01111000" // /* MW 13 */
+ 4372 "11000101" // /* MW 12 */
+ 4373 "00000001" // /* MW 11 */
+ 4374 "00000000" // /* MW 10 */
+ 4375 "00000000" // /* MW 9 */
+ 4376 "00000000" // /* MW 8 */
+ 4377 "00000011" // /* MW 7 */
+ 4378 "00000000" // /* MW 6 */
+ 4379 "00101001" // /* MW 5 */
+ 4380 "01100000" // /* MW 4 */
+ 4381 "11111100" // /* MW 3 */
+ 4382 "00101100" // /* MW 2 */
+ 4383 "00000000" // /* MW 1 */
+.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 3 "utils.h" 531 4 first
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 4385 "00000000" // /* MW 15 */
+ 4386 "00000000" // /* MW 14 */
+ 4387 "01111000" // /* MW 13 */
+ 4388 "11000101" // /* MW 12 */
+ 4389 "01000000" // /* MW 11 */
+ 4390 "00000000" // /* MW 10 */
+ 4391 "00000000" // /* MW 9 */
+ 4392 "00000000" // /* MW 8 */
+ 4393 "00000011" // /* MW 7 */
+ 4394 "00000000" // /* MW 6 */
+ 4395 "00100011" // /* MW 5 */
+ 4396 "00000000" // /* MW 4 */
+ 4397 "11110000" // /* MW 3 */
+ 4398 "00101100" // /* MW 2 */
+ 4399 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4401 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4403 "00000011" // /* MW 3 */
+ 4404 "10000000" // /* MW 2 */
+ 4405 "00001101" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4407 "01110000" // /* MW 7 */
+ 4408 "11000101" // /* MW 6 */
+ 4409 "00000001" // /* MW 5 */
+ 4410 "00000000" // /* MW 4 */
+ 4411 "01100000" // /* MW 3 */
+ 4412 "00000000" // /* MW 2 */
+ 4413 "00100000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4415 "10001010" // /* MW 3 */
+ 4416 "10000001" // /* MW 2 */
+ 4417 "00011000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4419 "00000011" // /* MW 3 */
+ 4420 "00000000" // /* MW 2 */
+ 4421 "00001011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11 first
+.src_ref 2 "conv2d_bf16.h" 705 30 first
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+ 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4423 "01110000" // /* MW 7 */
+ 4424 "11000101" // /* MW 6 */
+ 4425 "00000001" // /* MW 5 */
+ 4426 "00000000" // /* MW 4 */
+ 4427 "01100000" // /* MW 3 */
+ 4428 "00000000" // /* MW 2 */
+ 4429 "10110000" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 350 11
+.src_ref 2 "conv2d_bf16.h" 706 18 first
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+ 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4431 "01110000" // /* MW 7 */
+ 4432 "11000101" // /* MW 6 */
+ 4433 "01000000" // /* MW 5 */
+ 4434 "00000000" // /* MW 4 */
+ 4435 "01100000" // /* MW 3 */
+ 4436 "00000000" // /* MW 2 */
+ 4437 "00100000" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+ 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4439 "00000011" // /* MW 3 */
+ 4440 "00000000" // /* MW 2 */
+ 4441 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 707 12 first
+.src_ref 2 "conv2d_bf16.h" 707 30 first
+.src_ref 2 "conv2d_bf16.h" 723 first
+ 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 4443 "00000000" // /* MW 5 */
+ 4444 "01010000" // /* MW 4 */
+ 4445 "01100000" // /* MW 3 */
+ 4446 "00000000" // /* MW 2 */
+ 4447 "10110000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 708 12 first
+.delay_slot
+ 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4449 "00000011" // /* MW 3 */
+ 4450 "00000000" // /* MW 2 */
+ 4451 "00001001" // /* MW 1 */
+.src_ref 3 "utils.h" 531 4 first
+.delay_slot
+ 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4453 "00000011" // /* MW 3 */
+ 4454 "00000000" // /* MW 2 */
+ 4455 "00001011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4457 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4459 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end
+.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0
+ 4461 "00000000" // /* MW 1 */
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16.h" 1836 first
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.function_start
+ 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4465 "01100000" // /* MW 13 */
+ 4466 "00010001" // /* MW 12 */
+ 4467 "10010001" // /* MW 11 */
+ 4468 "00001110" // /* MW 10 */
+ 4469 "00000000" // /* MW 9 */
+ 4470 "00000000" // /* MW 8 */
+ 4471 "10000000" // /* MW 7 */
+ 4472 "00000000" // /* MW 6 */
+ 4473 "00100000" // /* MW 5 */
+ 4474 "00111111" // /* MW 4 */
+ 4475 "10000110" // /* MW 3 */
+ 4476 "11100000" // /* MW 2 */
+ 4477 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 241 95
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4479 "01111000" // /* MW 11 */
+ 4480 "01100000" // /* MW 10 */
+ 4481 "00101011" // /* MW 9 */
+ 4482 "00001010" // /* MW 8 */
+ 4483 "11000101" // /* MW 7 */
+ 4484 "10111111" // /* MW 6 */
+ 4485 "10010101" // /* MW 5 */
+ 4486 "11110001" // /* MW 4 */
+ 4487 "00000111" // /* MW 3 */
+ 4488 "01110011" // /* MW 2 */
+ 4489 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1836
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 94
+.src_ref 2 "conv2d_bf16_params.h" 242 100
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 245 28
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4491 "00001000" // /* MW 11 */
+ 4492 "01000111" // /* MW 10 */
+ 4493 "00110100" // /* MW 9 */
+ 4494 "00101001" // /* MW 8 */
+ 4495 "00010000" // /* MW 7 */
+ 4496 "10000001" // /* MW 6 */
+ 4497 "00110101" // /* MW 5 */
+ 4498 "11011010" // /* MW 4 */
+ 4499 "00000111" // /* MW 3 */
+ 4500 "00011001" // /* MW 2 */
+ 4501 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 240 68 first
+ 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4503 "00010000" // /* MW 11 */
+ 4504 "00000000" // /* MW 10 */
+ 4505 "10101000" // /* MW 9 */
+ 4506 "00000011" // /* MW 8 */
+ 4507 "01000000" // /* MW 7 */
+ 4508 "10000000" // /* MW 6 */
+ 4509 "00110101" // /* MW 5 */
+ 4510 "11110101" // /* MW 4 */
+ 4511 "11010111" // /* MW 3 */
+ 4512 "11001010" // /* MW 2 */
+ 4513 "01000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.src_ref 2 "conv2d_bf16_params.h" 245 20
+ 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4515 "10010000" // /* MW 11 */
+ 4516 "11111111" // /* MW 10 */
+ 4517 "11101111" // /* MW 9 */
+ 4518 "11111111" // /* MW 8 */
+ 4519 "01111111" // /* MW 7 */
+ 4520 "10000000" // /* MW 6 */
+ 4521 "11010101" // /* MW 5 */
+ 4522 "11111101" // /* MW 4 */
+ 4523 "10000111" // /* MW 3 */
+ 4524 "00011000" // /* MW 2 */
+ 4525 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4527 "01011000" // /* MW 11 */
+ 4528 "11101100" // /* MW 10 */
+ 4529 "00000111" // /* MW 9 */
+ 4530 "00001010" // /* MW 8 */
+ 4531 "01100001" // /* MW 7 */
+ 4532 "10000001" // /* MW 6 */
+ 4533 "10110101" // /* MW 5 */
+ 4534 "11100001" // /* MW 4 */
+ 4535 "00000111" // /* MW 3 */
+ 4536 "10110100" // /* MW 2 */
+ 4537 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.src_ref 2 "conv2d_bf16_params.h" 250 71
+ 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4539 "01011000" // /* MW 11 */
+ 4540 "11000100" // /* MW 10 */
+ 4541 "10000111" // /* MW 9 */
+ 4542 "11001010" // /* MW 8 */
+ 4543 "01110111" // /* MW 7 */
+ 4544 "10000111" // /* MW 6 */
+ 4545 "11110101" // /* MW 5 */
+ 4546 "11101101" // /* MW 4 */
+ 4547 "00000111" // /* MW 3 */
+ 4548 "10010101" // /* MW 2 */
+ 4549 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44
+ 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4551 "01010000" // /* MW 7 */
+ 4552 "01000000" // /* MW 6 */
+ 4553 "10000000" // /* MW 5 */
+ 4554 "00000011" // /* MW 4 */
+ 4555 "10110000" // /* MW 3 */
+ 4556 "01110011" // /* MW 2 */
+ 4557 "11111111" // /* MW 1 */
+ 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4559 "00111101" // /* MW 3 */
+ 4560 "11100100" // /* MW 2 */
+ 4561 "00001111" // /* MW 1 */
+ 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4563 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+ 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4565 "00100000" // /* MW 3 */
+ 4566 "01011001" // /* MW 2 */
+ 4567 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 240 68
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+ 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4569 "10011011" // /* MW 5 */
+ 4570 "01110111" // /* MW 4 */
+ 4571 "00110110" // /* MW 3 */
+ 4572 "00110010" // /* MW 2 */
+ 4573 "01000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54 first
+.src_ref 2 "conv2d_bf16_params.h" 242 94 first
+ 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4575 "00101111" // /* MW 5 */
+ 4576 "11110010" // /* MW 4 */
+ 4577 "01011110" // /* MW 3 */
+ 4578 "11111001" // /* MW 2 */
+ 4579 "01011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 20 first
+ 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4581 "00101010" // /* MW 3 */
+ 4582 "11001001" // /* MW 2 */
+ 4583 "00000010" // /* MW 1 */
+ 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4585 "00000000" // /* MW 1 */
+ 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4587 "00000000" // /* MW 1 */
+ 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4589 "00000000" // /* MW 1 */
+ 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4591 "00000000" // /* MW 1 */
+ 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4593 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 174 first
+ 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4595 "11101100" // /* MW 3 */
+ 4596 "01110111" // /* MW 2 */
+ 4597 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+ 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4599 "00110010" // /* MW 3 */
+ 4600 "01011101" // /* MW 2 */
+ 4601 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 171
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4603 "11001100" // /* MW 3 */
+ 4604 "11110110" // /* MW 2 */
+ 4605 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 241 95 first
+.src_ref 2 "conv2d_bf16_params.h" 242 39
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4607 "11001111" // /* MW 5 */
+ 4608 "10110111" // /* MW 4 */
+ 4609 "11101110" // /* MW 3 */
+ 4610 "01110000" // /* MW 2 */
+ 4611 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 100 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4613 "00011101" // /* MW 3 */
+ 4614 "01111111" // /* MW 2 */
+ 4615 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 153
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4617 "11100010" // /* MW 3 */
+ 4618 "01011000" // /* MW 2 */
+ 4619 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 98
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4621 "11000101" // /* MW 3 */
+ 4622 "11111001" // /* MW 2 */
+ 4623 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 54
+.src_ref 2 "conv2d_bf16_params.h" 242 151
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4625 "01100010" // /* MW 5 */
+ 4626 "00111100" // /* MW 4 */
+ 4627 "10011110" // /* MW 3 */
+ 4628 "11111101" // /* MW 2 */
+ 4629 "10001110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 41
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4631 "11000010" // /* MW 3 */
+ 4632 "01111001" // /* MW 2 */
+ 4633 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+ 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4635 "11001100" // /* MW 3 */
+ 4636 "01111111" // /* MW 2 */
+ 4637 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 242 117 first
+.src_ref 2 "conv2d_bf16_params.h" 243 39
+ 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4639 "11010001" // /* MW 5 */
+ 4640 "11110111" // /* MW 4 */
+ 4641 "00111110" // /* MW 3 */
+ 4642 "01111110" // /* MW 2 */
+ 4643 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 44 first
+.src_ref 2 "conv2d_bf16_params.h" 245 28 first
+ 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4645 "00110001" // /* MW 5 */
+ 4646 "10110010" // /* MW 4 */
+ 4647 "01010100" // /* MW 3 */
+ 4648 "01111001" // /* MW 2 */
+ 4649 "01011101" // /* MW 1 */
+ 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4651 "00000000" // /* MW 1 */
+ 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4653 "00000000" // /* MW 1 */
+ 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4655 "00000000" // /* MW 1 */
+ 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4657 "00000000" // /* MW 1 */
+ 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4659 "00000000" // /* MW 1 */
+ 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4661 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 82
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+ 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4663 "11111100" // /* MW 5 */
+ 4664 "10111110" // /* MW 4 */
+ 4665 "00011111" // /* MW 3 */
+ 4666 "10101101" // /* MW 2 */
+ 4667 "11110101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4669 "00000001" // /* MW 5 */
+ 4670 "01000000" // /* MW 4 */
+ 4671 "01000000" // /* MW 3 */
+ 4672 "00001001" // /* MW 2 */
+ 4673 "01100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 156
+.delay_slot
+ 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4675 "01001000" // /* MW 3 */
+ 4676 "10010011" // /* MW 2 */
+ 4677 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4679 "10010000" // /* MW 3 */
+ 4680 "11111110" // /* MW 2 */
+ 4681 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4683 "01100100" // /* MW 3 */
+ 4684 "01101101" // /* MW 2 */
+ 4685 "00010010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 87
+.delay_slot
+ 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4687 "01111100" // /* MW 3 */
+ 4688 "11101111" // /* MW 2 */
+ 4689 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 244 132
+.delay_slot
+ 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4691 "01100100" // /* MW 3 */
+ 4692 "11100001" // /* MW 2 */
+ 4693 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 245 33
+ 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */
+ 4695 "00000001" // /* MW 5 */
+ 4696 "01000000" // /* MW 4 */
+ 4697 "01000000" // /* MW 3 */
+ 4698 "00001001" // /* MW 2 */
+ 4699 "11101000" // /* MW 1 */
+.delay_slot
+ 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4701 "00011101" // /* MW 3 */
+ 4702 "11101011" // /* MW 2 */
+ 4703 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4705 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4707 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4709 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4711 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 876 51
+ 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */
+ 4713 "00100000" // /* MW 9 */
+ 4714 "00000000" // /* MW 8 */
+ 4715 "00000000" // /* MW 7 */
+ 4716 "01010110" // /* MW 6 */
+ 4717 "00000010" // /* MW 5 */
+ 4718 "00000000" // /* MW 4 */
+ 4719 "00000000" // /* MW 3 */
+ 4720 "00111011" // /* MW 2 */
+ 4721 "00000000" // /* MW 1 */
+.delay_slot
+ 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4723 "10011100" // /* MW 3 */
+ 4724 "00011001" // /* MW 2 */
+ 4725 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1849 12
+.delay_slot
+ 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4727 "00000101" // /* MW 3 */
+ 4728 "00100110" // /* MW 2 */
+ 4729 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4731 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4733 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4735 "00000000" // /* MW 1 */
+.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params
+.src_ref 2 "conv2d_bf16_params.h" 250 71 first
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4737 "01111000" // /* MW 11 */
+ 4738 "11001110" // /* MW 10 */
+ 4739 "00001100" // /* MW 9 */
+ 4740 "00111100" // /* MW 8 */
+ 4741 "10111111" // /* MW 7 */
+ 4742 "10101011" // /* MW 6 */
+ 4743 "00011101" // /* MW 5 */
+ 4744 "11101011" // /* MW 4 */
+ 4745 "00000111" // /* MW 3 */
+ 4746 "10010101" // /* MW 2 */
+ 4747 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 101
+ 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4749 "01011101" // /* MW 3 */
+ 4750 "10101011" // /* MW 2 */
+ 4751 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+ 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4753 "10010010" // /* MW 3 */
+ 4754 "01101011" // /* MW 2 */
+ 4755 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 129
+ 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4757 "11100111" // /* MW 3 */
+ 4758 "11110111" // /* MW 2 */
+ 4759 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 106
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4761 "01000001" // /* MW 5 */
+ 4762 "10110000" // /* MW 4 */
+ 4763 "01001101" // /* MW 3 */
+ 4764 "11110010" // /* MW 2 */
+ 4765 "10101100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 133
+ 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4767 "00110010" // /* MW 3 */
+ 4768 "01100111" // /* MW 2 */
+ 4769 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87 first
+ 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4771 "01000100" // /* MW 3 */
+ 4772 "00101001" // /* MW 2 */
+ 4773 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 249 87
+ 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4775 "11110000" // /* MW 3 */
+ 4776 "00110110" // /* MW 2 */
+ 4777 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 250 152 first
+ 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4779 "10001011" // /* MW 5 */
+ 4780 "11001111" // /* MW 4 */
+ 4781 "11111001" // /* MW 3 */
+ 4782 "00101100" // /* MW 2 */
+ 4783 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320
+.src_ref 2 "conv2d_bf16_params.h" 258 8 first
+ 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */
+ 4785 "01100000" // /* MW 11 */
+ 4786 "00000000" // /* MW 10 */
+ 4787 "00010000" // /* MW 9 */
+ 4788 "01011100" // /* MW 8 */
+ 4789 "00000010" // /* MW 7 */
+ 4790 "10111010" // /* MW 6 */
+ 4791 "01110001" // /* MW 5 */
+ 4792 "01101111" // /* MW 4 */
+ 4793 "10000010" // /* MW 3 */
+ 4794 "10010000" // /* MW 2 */
+ 4795 "00000001" // /* MW 1 */
+.delay_slot
+ 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4797 "01100111" // /* MW 3 */
+ 4798 "10001010" // /* MW 2 */
+ 4799 "00000010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4801 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4803 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4805 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4807 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+ 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4809 "11111110" // /* MW 5 */
+ 4810 "00111111" // /* MW 4 */
+ 4811 "11111010" // /* MW 3 */
+ 4812 "11111111" // /* MW 2 */
+ 4813 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71 first
+ 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4815 "01000100" // /* MW 3 */
+ 4816 "10100101" // /* MW 2 */
+ 4817 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16_params.h" 259 71
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 4819 "00011100" // /* MW 13 */
+ 4820 "00000000" // /* MW 12 */
+ 4821 "00000000" // /* MW 11 */
+ 4822 "01010111" // /* MW 10 */
+ 4823 "00011010" // /* MW 9 */
+ 4824 "01000000" // /* MW 8 */
+ 4825 "00000000" // /* MW 7 */
+ 4826 "00000000" // /* MW 6 */
+ 4827 "10100011" // /* MW 5 */
+ 4828 "11101100" // /* MW 4 */
+ 4829 "11110110" // /* MW 3 */
+ 4830 "00101100" // /* MW 2 */
+ 4831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368
+.src_ref 2 "conv2d_bf16.h" 1841 65 first
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16.h" 1849 12 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4833 "01011000" // /* MW 9 */
+ 4834 "11111101" // /* MW 8 */
+ 4835 "11001111" // /* MW 7 */
+ 4836 "10000010" // /* MW 6 */
+ 4837 "01000100" // /* MW 5 */
+ 4838 "00100111" // /* MW 4 */
+ 4839 "11010000" // /* MW 3 */
+ 4840 "11010010" // /* MW 2 */
+ 4841 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 1841 34
+.src_ref 2 "conv2d_bf16.h" 1842 36
+.src_ref 2 "conv2d_bf16.h" 1842 67
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 4843 "01011000" // /* MW 9 */
+ 4844 "00100100" // /* MW 8 */
+ 4845 "00000000" // /* MW 7 */
+ 4846 "11111010" // /* MW 6 */
+ 4847 "01011111" // /* MW 5 */
+ 4848 "00101001" // /* MW 4 */
+ 4849 "00000000" // /* MW 3 */
+ 4850 "01010010" // /* MW 2 */
+ 4851 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 67 first
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1849 4
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4853 "01011000" // /* MW 11 */
+ 4854 "11001100" // /* MW 10 */
+ 4855 "00000111" // /* MW 9 */
+ 4856 "00100110" // /* MW 8 */
+ 4857 "01101011" // /* MW 7 */
+ 4858 "10101011" // /* MW 6 */
+ 4859 "00101101" // /* MW 5 */
+ 4860 "11010000" // /* MW 4 */
+ 4861 "11010111" // /* MW 3 */
+ 4862 "01011010" // /* MW 2 */
+ 4863 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52
+.src_ref 2 "conv2d_bf16.h" 1842 106
+.src_ref 2 "conv2d_bf16.h" 1845 80
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 4865 "01011000" // /* MW 11 */
+ 4866 "11000100" // /* MW 10 */
+ 4867 "00000000" // /* MW 9 */
+ 4868 "11101010" // /* MW 8 */
+ 4869 "00110111" // /* MW 7 */
+ 4870 "10111111" // /* MW 6 */
+ 4871 "11010101" // /* MW 5 */
+ 4872 "11011110" // /* MW 4 */
+ 4873 "11010111" // /* MW 3 */
+ 4874 "01011110" // /* MW 2 */
+ 4875 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 63 first
+ 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4877 "10110110" // /* MW 3 */
+ 4878 "11111111" // /* MW 2 */
+ 4879 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 862 52 first
+ 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4881 "11110110" // /* MW 3 */
+ 4882 "10001011" // /* MW 2 */
+ 4883 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4885 "10110110" // /* MW 3 */
+ 4886 "00000110" // /* MW 2 */
+ 4887 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+ 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4889 "01011011" // /* MW 5 */
+ 4890 "00100110" // /* MW 4 */
+ 4891 "11011010" // /* MW 3 */
+ 4892 "11010010" // /* MW 2 */
+ 4893 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4895 "11010110" // /* MW 3 */
+ 4896 "00000111" // /* MW 2 */
+ 4897 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+ 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4899 "00101101" // /* MW 3 */
+ 4900 "10101101" // /* MW 2 */
+ 4901 "00010101" // /* MW 1 */
+ 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4903 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 80 first
+ 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4905 "00111110" // /* MW 3 */
+ 4906 "01100111" // /* MW 2 */
+ 4907 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 21 first
+ 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4909 "00011000" // /* MW 3 */
+ 4910 "11100011" // /* MW 2 */
+ 4911 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 866 12
+ 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */
+ 4913 "00000001" // /* MW 5 */
+ 4914 "01000000" // /* MW 4 */
+ 4915 "11010000" // /* MW 3 */
+ 4916 "00001001" // /* MW 2 */
+ 4917 "10001000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 36 first
+.src_ref 2 "conv2d_bf16.h" 1842 75 first
+.delay_slot
+ 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4919 "10110010" // /* MW 5 */
+ 4920 "10110101" // /* MW 4 */
+ 4921 "10111010" // /* MW 3 */
+ 4922 "10100101" // /* MW 2 */
+ 4923 "10111101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1842 75
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4925 "10110010" // /* MW 5 */
+ 4926 "10010101" // /* MW 4 */
+ 4927 "10110000" // /* MW 3 */
+ 4928 "01100101" // /* MW 2 */
+ 4929 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1841 34 first
+.delay_slot
+ 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 4931 "10100000" // /* MW 7 */
+ 4932 "01101000" // /* MW 6 */
+ 4933 "11001010" // /* MW 5 */
+ 4934 "00000001" // /* MW 4 */
+ 4935 "10110000" // /* MW 3 */
+ 4936 "10000100" // /* MW 2 */
+ 4937 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4939 "10000000" // /* MW 3 */
+ 4940 "11010000" // /* MW 2 */
+ 4941 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1845 31 first
+.delay_slot
+ 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4943 "11111001" // /* MW 3 */
+ 4944 "01101010" // /* MW 2 */
+ 4945 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4947 "11010000" // /* MW 5 */
+ 4948 "11001000" // /* MW 4 */
+ 4949 "11001110" // /* MW 3 */
+ 4950 "00000111" // /* MW 2 */
+ 4951 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18 first
+ 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4953 "10000000" // /* MW 5 */
+ 4954 "10110100" // /* MW 4 */
+ 4955 "01010000" // /* MW 3 */
+ 4956 "11000100" // /* MW 2 */
+ 4957 "11100000" // /* MW 1 */
+ 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4959 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4961 "00000000" // /* MW 5 */
+ 4962 "00100000" // /* MW 4 */
+ 4963 "00001010" // /* MW 3 */
+ 4964 "01111111" // /* MW 2 */
+ 4965 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4967 "10010001" // /* MW 3 */
+ 4968 "00000010" // /* MW 2 */
+ 4969 "00011000" // /* MW 1 */
+ 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4971 "11100000" // /* MW 3 */
+ 4972 "00010101" // /* MW 2 */
+ 4973 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4975 "01011111" // /* MW 3 */
+ 4976 "01101010" // /* MW 2 */
+ 4977 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 4979 "00100101" // /* MW 5 */
+ 4980 "00000001" // /* MW 4 */
+ 4981 "11100000" // /* MW 3 */
+ 4982 "11000110" // /* MW 2 */
+ 4983 "11100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4985 "10000000" // /* MW 3 */
+ 4986 "01111010" // /* MW 2 */
+ 4987 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4989 "00010110" // /* MW 3 */
+ 4990 "01000000" // /* MW 2 */
+ 4991 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4993 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 4995 "00000001" // /* MW 3 */
+ 4996 "01000001" // /* MW 2 */
+ 4997 "00011100" // /* MW 1 */
+ 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 4999 "00000000" // /* MW 1 */
+ 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5001 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 867 18
+ 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5003 "00110010" // /* MW 3 */
+ 5004 "00000110" // /* MW 2 */
+ 5005 "00000111" // /* MW 1 */
+ 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5007 "00000000" // /* MW 1 */
+ 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5009 "00000000" // /* MW 1 */
+ 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5011 "00000000" // /* MW 1 */
+ 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5013 "00000000" // /* MW 1 */
+ 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5015 "00000000" // /* MW 1 */
+ 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5017 "00000000" // /* MW 1 */
+ 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5019 "01101011" // /* MW 5 */
+ 5020 "10100100" // /* MW 4 */
+ 5021 "11111111" // /* MW 3 */
+ 5022 "00101100" // /* MW 2 */
+ 5023 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560
+.src_ref 2 "conv2d_bf16.h" 881 76
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5025 "00010000" // /* MW 11 */
+ 5026 "00110100" // /* MW 10 */
+ 5027 "10110010" // /* MW 9 */
+ 5028 "11110001" // /* MW 8 */
+ 5029 "00000001" // /* MW 7 */
+ 5030 "00000000" // /* MW 6 */
+ 5031 "00001011" // /* MW 5 */
+ 5032 "10001110" // /* MW 4 */
+ 5033 "10000001" // /* MW 3 */
+ 5034 "10010000" // /* MW 2 */
+ 5035 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.src_ref 2 "conv2d_bf16.h" 876 51 first
+.src_ref 2 "conv2d_bf16.h" 881 76 first
+.src_ref 2 "conv2d_bf16.h" 883 4
+.src_ref 2 "conv2d_bf16.h" 884 4
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5037 "01011000" // /* MW 11 */
+ 5038 "00001011" // /* MW 10 */
+ 5039 "01101000" // /* MW 9 */
+ 5040 "10010010" // /* MW 8 */
+ 5041 "00011001" // /* MW 7 */
+ 5042 "00110011" // /* MW 6 */
+ 5043 "10001011" // /* MW 5 */
+ 5044 "10000100" // /* MW 4 */
+ 5045 "01010000" // /* MW 3 */
+ 5046 "01000101" // /* MW 2 */
+ 5047 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5049 "01111000" // /* MW 9 */
+ 5050 "01100000" // /* MW 8 */
+ 5051 "10101010" // /* MW 7 */
+ 5052 "01100101" // /* MW 6 */
+ 5053 "10111001" // /* MW 5 */
+ 5054 "00111001" // /* MW 4 */
+ 5055 "00000000" // /* MW 3 */
+ 5056 "10010110" // /* MW 2 */
+ 5057 "01100001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 883 4 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5059 "01100111" // /* MW 3 */
+ 5060 "00000110" // /* MW 2 */
+ 5061 "00000011" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5063 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 884 4 first
+.aggressive_scheduled_block_id 4
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5065 "00000001" // /* MW 5 */
+ 5066 "00000000" // /* MW 4 */
+ 5067 "00110000" // /* MW 3 */
+ 5068 "00001000" // /* MW 2 */
+ 5069 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12 first
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5071 "00101101" // /* MW 3 */
+ 5072 "01101011" // /* MW 2 */
+ 5073 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 876 12
+.delay_slot
+ 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5075 "11111001" // /* MW 3 */
+ 5076 "01101010" // /* MW 2 */
+ 5077 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45 first
+.delay_slot
+ 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5079 "00010001" // /* MW 3 */
+ 5080 "01100011" // /* MW 2 */
+ 5081 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80
+.delay_slot
+ 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5083 "00110101" // /* MW 5 */
+ 5084 "00101100" // /* MW 4 */
+ 5085 "10111010" // /* MW 3 */
+ 5086 "01100101" // /* MW 2 */
+ 5087 "10001100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 881 45
+.src_ref 2 "conv2d_bf16_params.h" 243 80 first
+.delay_slot
+ 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5089 "00000000" // /* MW 15 */
+ 5090 "00000000" // /* MW 14 */
+ 5091 "10101000" // /* MW 13 */
+ 5092 "11100010" // /* MW 12 */
+ 5093 "10001011" // /* MW 11 */
+ 5094 "00010001" // /* MW 10 */
+ 5095 "10011010" // /* MW 9 */
+ 5096 "00101100" // /* MW 8 */
+ 5097 "01011011" // /* MW 7 */
+ 5098 "00000001" // /* MW 6 */
+ 5099 "00100000" // /* MW 5 */
+ 5100 "00000000" // /* MW 4 */
+ 5101 "11110000" // /* MW 3 */
+ 5102 "00101100" // /* MW 2 */
+ 5103 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.return_address
+ 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5105 "10011001" // /* MW 3 */
+ 5106 "11010100" // /* MW 2 */
+ 5107 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4 first
+.no_stack_arguments
+ 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */
+ 5109 "00000001" // /* MW 5 */
+ 5110 "00000000" // /* MW 4 */
+ 5111 "00110000" // /* MW 3 */
+ 5112 "00001000" // /* MW 2 */
+ 5113 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5115 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5117 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 44
+.delay_slot
+ 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5119 "10010000" // /* MW 3 */
+ 5120 "01010110" // /* MW 2 */
+ 5121 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5123 "10100000" // /* MW 3 */
+ 5124 "01100110" // /* MW 2 */
+ 5125 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 885 4
+.delay_slot
+ 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5127 "00000000" // /* MW 9 */
+ 5128 "00000000" // /* MW 8 */
+ 5129 "00000000" // /* MW 7 */
+ 5130 "00000000" // /* MW 6 */
+ 5131 "00001011" // /* MW 5 */
+ 5132 "10001111" // /* MW 4 */
+ 5133 "11110000" // /* MW 3 */
+ 5134 "00101100" // /* MW 2 */
+ 5135 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.src_ref 2 "conv2d_bf16.h" 1115 26
+.return_address
+ 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5137 "00001000" // /* MW 9 */
+ 5138 "01100011" // /* MW 8 */
+ 5139 "00110011" // /* MW 7 */
+ 5140 "11101010" // /* MW 6 */
+ 5141 "00110111" // /* MW 5 */
+ 5142 "00000001" // /* MW 4 */
+ 5143 "10000000" // /* MW 3 */
+ 5144 "10011010" // /* MW 2 */
+ 5145 "11010110" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 886 4
+.src_ref 2 "conv2d_bf16.h" 896 23 first
+.src_ref 2 "conv2d_bf16.h" 1123 71
+ 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5147 "01100010" // /* MW 5 */
+ 5148 "00110100" // /* MW 4 */
+ 5149 "11010000" // /* MW 3 */
+ 5150 "10000100" // /* MW 2 */
+ 5151 "10000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5153 "01000110" // /* MW 3 */
+ 5154 "00011100" // /* MW 2 */
+ 5155 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5157 "00100110" // /* MW 3 */
+ 5158 "00011110" // /* MW 2 */
+ 5159 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5161 "01000110" // /* MW 3 */
+ 5162 "00011110" // /* MW 2 */
+ 5163 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5165 "00000110" // /* MW 3 */
+ 5166 "00011100" // /* MW 2 */
+ 5167 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5169 "01100110" // /* MW 3 */
+ 5170 "00011100" // /* MW 2 */
+ 5171 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 896 23
+ 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5173 "01100110" // /* MW 3 */
+ 5174 "00011110" // /* MW 2 */
+ 5175 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23 first
+ 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5177 "11010110" // /* MW 3 */
+ 5178 "00011110" // /* MW 2 */
+ 5179 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5181 "00110110" // /* MW 3 */
+ 5182 "00011110" // /* MW 2 */
+ 5183 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5185 "10010110" // /* MW 3 */
+ 5186 "00011111" // /* MW 2 */
+ 5187 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5189 "10110110" // /* MW 3 */
+ 5190 "00011110" // /* MW 2 */
+ 5191 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5193 "11110110" // /* MW 3 */
+ 5194 "00011110" // /* MW 2 */
+ 5195 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5197 "10011110" // /* MW 3 */
+ 5198 "00011101" // /* MW 2 */
+ 5199 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 897 23
+ 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5201 "00100110" // /* MW 3 */
+ 5202 "00011101" // /* MW 2 */
+ 5203 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5205 "10100110" // /* MW 3 */
+ 5206 "00011100" // /* MW 2 */
+ 5207 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5209 "11000110" // /* MW 3 */
+ 5210 "00011100" // /* MW 2 */
+ 5211 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5213 "10100110" // /* MW 3 */
+ 5214 "00011110" // /* MW 2 */
+ 5215 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5217 "11010110" // /* MW 3 */
+ 5218 "00011111" // /* MW 2 */
+ 5219 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5221 "10110110" // /* MW 3 */
+ 5222 "00011111" // /* MW 2 */
+ 5223 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22
+ 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5225 "11100110" // /* MW 3 */
+ 5226 "00011100" // /* MW 2 */
+ 5227 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5229 "01001010" // /* MW 3 */
+ 5230 "11000010" // /* MW 2 */
+ 5231 "00000100" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25
+ 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5233 "10010001" // /* MW 3 */
+ 5234 "11010010" // /* MW 2 */
+ 5235 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 898 22 first
+ 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5237 "01010110" // /* MW 3 */
+ 5238 "00000100" // /* MW 2 */
+ 5239 "00000100" // /* MW 1 */
+ 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5241 "00000000" // /* MW 1 */
+ 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5243 "00000000" // /* MW 1 */
+ 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5245 "00000000" // /* MW 1 */
+ 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5247 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 26 first
+ 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5249 "00101100" // /* MW 3 */
+ 5250 "11100111" // /* MW 2 */
+ 5251 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1115 12
+ 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */
+ 5253 "00000001" // /* MW 5 */
+ 5254 "01000000" // /* MW 4 */
+ 5255 "00010000" // /* MW 3 */
+ 5256 "00001100" // /* MW 2 */
+ 5257 "10011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4
+.delay_slot
+ 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5259 "11010000" // /* MW 5 */
+ 5260 "11001000" // /* MW 4 */
+ 5261 "11000100" // /* MW 3 */
+ 5262 "00000111" // /* MW 2 */
+ 5263 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 886 4 first
+.delay_slot
+ 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5265 "10100111" // /* MW 3 */
+ 5266 "00000101" // /* MW 2 */
+ 5267 "00000010" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5269 "01110010" // /* MW 3 */
+ 5270 "11010001" // /* MW 2 */
+ 5271 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5273 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 5275 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1123 71 first
+ 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5277 "01011000" // /* MW 9 */
+ 5278 "10000100" // /* MW 8 */
+ 5279 "10000000" // /* MW 7 */
+ 5280 "00111111" // /* MW 6 */
+ 5281 "10111001" // /* MW 5 */
+ 5282 "00011011" // /* MW 4 */
+ 5283 "00100000" // /* MW 3 */
+ 5284 "01000011" // /* MW 2 */
+ 5285 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+.src_ref 2 "conv2d_bf16.h" 1154 80
+ 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5287 "01011000" // /* MW 9 */
+ 5288 "00111100" // /* MW 8 */
+ 5289 "00000000" // /* MW 7 */
+ 5290 "00001010" // /* MW 6 */
+ 5291 "00100000" // /* MW 5 */
+ 5292 "00111101" // /* MW 4 */
+ 5293 "00000000" // /* MW 3 */
+ 5294 "00010011" // /* MW 2 */
+ 5295 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5297 "01111000" // /* MW 9 */
+ 5298 "11010000" // /* MW 8 */
+ 5299 "11100100" // /* MW 7 */
+ 5300 "00001011" // /* MW 6 */
+ 5301 "10100000" // /* MW 5 */
+ 5302 "00000001" // /* MW 4 */
+ 5303 "10000000" // /* MW 3 */
+ 5304 "00010100" // /* MW 2 */
+ 5305 "11111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 746 83
+ 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5307 "01111000" // /* MW 11 */
+ 5308 "11000000" // /* MW 10 */
+ 5309 "10100111" // /* MW 9 */
+ 5310 "00000001" // /* MW 8 */
+ 5311 "11010100" // /* MW 7 */
+ 5312 "00011011" // /* MW 6 */
+ 5313 "01001011" // /* MW 5 */
+ 5314 "00011100" // /* MW 4 */
+ 5315 "10000010" // /* MW 3 */
+ 5316 "10011000" // /* MW 2 */
+ 5317 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.src_ref 2 "conv2d_bf16.h" 1199 26
+.src_ref 2 "conv2d_bf16.h" 1200 26
+.src_ref 2 "conv2d_bf16.h" 1201 26
+.src_ref 2 "conv2d_bf16.h" 1202 26
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id first
+ 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5319 "01011000" // /* MW 11 */
+ 5320 "00000111" // /* MW 10 */
+ 5321 "11101000" // /* MW 9 */
+ 5322 "10001001" // /* MW 8 */
+ 5323 "11110111" // /* MW 7 */
+ 5324 "00000001" // /* MW 6 */
+ 5325 "01001011" // /* MW 5 */
+ 5326 "00011100" // /* MW 4 */
+ 5327 "00100110" // /* MW 3 */
+ 5328 "10010110" // /* MW 2 */
+ 5329 "11111010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 12
+.src_ref 2 "conv2d_bf16.h" 1218 20
+.aggressive_scheduled_block_id 5
+.noswbrkpt
+ 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5331 "00010000" // /* MW 9 */
+ 5332 "10100000" // /* MW 8 */
+ 5333 "00110010" // /* MW 7 */
+ 5334 "00000101" // /* MW 6 */
+ 5335 "00000000" // /* MW 5 */
+ 5336 "00000000" // /* MW 4 */
+ 5337 "00100000" // /* MW 3 */
+ 5338 "11001010" // /* MW 2 */
+ 5339 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 749 26
+.src_ref 2 "conv2d_bf16.h" 750 26
+.src_ref 2 "conv2d_bf16.h" 751 26
+.src_ref 2 "conv2d_bf16.h" 752 26
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.aggressive_scheduled_block_id 5
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5341 "01011000" // /* MW 9 */
+ 5342 "00001100" // /* MW 8 */
+ 5343 "10001011" // /* MW 7 */
+ 5344 "00010010" // /* MW 6 */
+ 5345 "01101001" // /* MW 5 */
+ 5346 "00110100" // /* MW 4 */
+ 5347 "00100000" // /* MW 3 */
+ 5348 "00110110" // /* MW 2 */
+ 5349 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1123 47
+.src_ref 2 "conv2d_bf16.h" 1873
+ 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5351 "01011000" // /* MW 11 */
+ 5352 "00000000" // /* MW 10 */
+ 5353 "00001000" // /* MW 9 */
+ 5354 "00001011" // /* MW 8 */
+ 5355 "10010000" // /* MW 7 */
+ 5356 "00000001" // /* MW 6 */
+ 5357 "00100000" // /* MW 5 */
+ 5358 "11010111" // /* MW 4 */
+ 5359 "00101001" // /* MW 3 */
+ 5360 "10000111" // /* MW 2 */
+ 5361 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5363 "00010110" // /* MW 3 */
+ 5364 "10001000" // /* MW 2 */
+ 5365 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5367 "00100110" // /* MW 3 */
+ 5368 "10101011" // /* MW 2 */
+ 5369 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16
+ 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5371 "01110110" // /* MW 3 */
+ 5372 "00101111" // /* MW 2 */
+ 5373 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 80 first
+ 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5375 "10000110" // /* MW 3 */
+ 5376 "00011110" // /* MW 2 */
+ 5377 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 80 first
+ 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5379 "11000110" // /* MW 3 */
+ 5380 "10001010" // /* MW 2 */
+ 5381 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 87 first
+ 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5383 "00000110" // /* MW 3 */
+ 5384 "10011110" // /* MW 2 */
+ 5385 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 83 first
+ 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5387 "00110110" // /* MW 3 */
+ 5388 "00011100" // /* MW 2 */
+ 5389 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 83 first
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5391 "00000010" // /* MW 5 */
+ 5392 "00000110" // /* MW 4 */
+ 5393 "11011101" // /* MW 3 */
+ 5394 "00000010" // /* MW 2 */
+ 5395 "10011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1143 66 first
+ 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5397 "01110110" // /* MW 3 */
+ 5398 "00010100" // /* MW 2 */
+ 5399 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1206 63 first
+ 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5401 "10010110" // /* MW 3 */
+ 5402 "00000100" // /* MW 2 */
+ 5403 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89
+ 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5405 "00000000" // /* MW 3 */
+ 5406 "11011010" // /* MW 2 */
+ 5407 "00011001" // /* MW 1 */
+ 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5409 "10011001" // /* MW 3 */
+ 5410 "10000011" // /* MW 2 */
+ 5411 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89
+ 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5413 "00000000" // /* MW 3 */
+ 5414 "00011011" // /* MW 2 */
+ 5415 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1125 16 first
+ 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5417 "10011001" // /* MW 3 */
+ 5418 "00001101" // /* MW 2 */
+ 5419 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1149 89 first
+ 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5421 "11100000" // /* MW 3 */
+ 5422 "00000011" // /* MW 2 */
+ 5423 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1154 89 first
+ 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5425 "11000000" // /* MW 5 */
+ 5426 "00010000" // /* MW 4 */
+ 5427 "11101110" // /* MW 3 */
+ 5428 "11111111" // /* MW 2 */
+ 5429 "00011000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5431 "01111110" // /* MW 9 */
+ 5432 "10000000" // /* MW 8 */
+ 5433 "10000010" // /* MW 7 */
+ 5434 "00000000" // /* MW 6 */
+ 5435 "00010000" // /* MW 5 */
+ 5436 "00000000" // /* MW 4 */
+ 5437 "11110000" // /* MW 3 */
+ 5438 "00101100" // /* MW 2 */
+ 5439 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 738 8
+.src_ref 2 "conv2d_bf16.h" 1147 31 first
+.src_ref 2 "conv2d_bf16.h" 1187 40 first
+.loop_nesting 1
+ 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5441 "01111000" // /* MW 11 */
+ 5442 "10010000" // /* MW 10 */
+ 5443 "00110011" // /* MW 9 */
+ 5444 "11101100" // /* MW 8 */
+ 5445 "11100111" // /* MW 7 */
+ 5446 "00000100" // /* MW 6 */
+ 5447 "00001011" // /* MW 5 */
+ 5448 "10000101" // /* MW 4 */
+ 5449 "01110001" // /* MW 3 */
+ 5450 "10000101" // /* MW 2 */
+ 5451 "11000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1188 50 first
+ 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5453 "10100000" // /* MW 11 */
+ 5454 "10011000" // /* MW 10 */
+ 5455 "00110011" // /* MW 9 */
+ 5456 "00000010" // /* MW 8 */
+ 5457 "01001011" // /* MW 7 */
+ 5458 "00001110" // /* MW 6 */
+ 5459 "00101011" // /* MW 5 */
+ 5460 "00101000" // /* MW 4 */
+ 5461 "01111000" // /* MW 3 */
+ 5462 "10000001" // /* MW 2 */
+ 5463 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+ 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5465 "01110000" // /* MW 11 */
+ 5466 "10000000" // /* MW 10 */
+ 5467 "11000110" // /* MW 9 */
+ 5468 "00000011" // /* MW 8 */
+ 5469 "01001011" // /* MW 7 */
+ 5470 "01011010" // /* MW 6 */
+ 5471 "00101111" // /* MW 5 */
+ 5472 "00101000" // /* MW 4 */
+ 5473 "01111000" // /* MW 3 */
+ 5474 "00111001" // /* MW 2 */
+ 5475 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1149 31 first
+ 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5477 "01110000" // /* MW 11 */
+ 5478 "00000000" // /* MW 10 */
+ 5479 "10000010" // /* MW 9 */
+ 5480 "00000001" // /* MW 8 */
+ 5481 "00001011" // /* MW 7 */
+ 5482 "01010011" // /* MW 6 */
+ 5483 "00101011" // /* MW 5 */
+ 5484 "00000011" // /* MW 4 */
+ 5485 "01110100" // /* MW 3 */
+ 5486 "00001101" // /* MW 2 */
+ 5487 "11011001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+ 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5489 "01011110" // /* MW 9 */
+ 5490 "00000000" // /* MW 8 */
+ 5491 "11000000" // /* MW 7 */
+ 5492 "00000001" // /* MW 6 */
+ 5493 "11010100" // /* MW 5 */
+ 5494 "00010010" // /* MW 4 */
+ 5495 "01110100" // /* MW 3 */
+ 5496 "01000001" // /* MW 2 */
+ 5497 "01110001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1152 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+ 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5499 "00010000" // /* MW 11 */
+ 5500 "01000000" // /* MW 10 */
+ 5501 "10111011" // /* MW 9 */
+ 5502 "00000101" // /* MW 8 */
+ 5503 "00000000" // /* MW 7 */
+ 5504 "00000000" // /* MW 6 */
+ 5505 "00101000" // /* MW 5 */
+ 5506 "00101000" // /* MW 4 */
+ 5507 "01111000" // /* MW 3 */
+ 5508 "10010101" // /* MW 2 */
+ 5509 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 1154 31 first
+.src_ref 2 "conv2d_bf16.h" 1206 8
+ 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5511 "00010000" // /* MW 11 */
+ 5512 "00101000" // /* MW 10 */
+ 5513 "01111011" // /* MW 9 */
+ 5514 "00000100" // /* MW 8 */
+ 5515 "00000000" // /* MW 7 */
+ 5516 "00000000" // /* MW 6 */
+ 5517 "00101000" // /* MW 5 */
+ 5518 "00101000" // /* MW 4 */
+ 5519 "01111000" // /* MW 3 */
+ 5520 "00011101" // /* MW 2 */
+ 5521 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+ 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5523 "00101000" // /* MW 5 */
+ 5524 "00000001" // /* MW 4 */
+ 5525 "01110100" // /* MW 3 */
+ 5526 "10110101" // /* MW 2 */
+ 5527 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1157 31 first
+ 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5529 "00101000" // /* MW 5 */
+ 5530 "00100010" // /* MW 4 */
+ 5531 "01111000" // /* MW 3 */
+ 5532 "10100101" // /* MW 2 */
+ 5533 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 1159 31 first
+ 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5535 "00101000" // /* MW 5 */
+ 5536 "00101000" // /* MW 4 */
+ 5537 "01111000" // /* MW 3 */
+ 5538 "00101101" // /* MW 2 */
+ 5539 "11011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id first
+ 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5541 "00101000" // /* MW 5 */
+ 5542 "00101000" // /* MW 4 */
+ 5543 "01111000" // /* MW 3 */
+ 5544 "10000001" // /* MW 2 */
+ 5545 "00100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 1192 29 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5547 "00101000" // /* MW 5 */
+ 5548 "00000001" // /* MW 4 */
+ 5549 "01110100" // /* MW 3 */
+ 5550 "10111101" // /* MW 2 */
+ 5551 "10000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5553 "11101110" // /* MW 9 */
+ 5554 "11000011" // /* MW 8 */
+ 5555 "10011010" // /* MW 7 */
+ 5556 "00000010" // /* MW 6 */
+ 5557 "00010100" // /* MW 5 */
+ 5558 "00010001" // /* MW 4 */
+ 5559 "01110100" // /* MW 3 */
+ 5560 "11001101" // /* MW 2 */
+ 5561 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1162 81
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5563 "11100000" // /* MW 11 */
+ 5564 "11000001" // /* MW 10 */
+ 5565 "10011010" // /* MW 9 */
+ 5566 "00000001" // /* MW 8 */
+ 5567 "10001011" // /* MW 7 */
+ 5568 "10011000" // /* MW 6 */
+ 5569 "00101100" // /* MW 5 */
+ 5570 "00101000" // /* MW 4 */
+ 5571 "01111000" // /* MW 3 */
+ 5572 "11000101" // /* MW 2 */
+ 5573 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5575 "11101001" // /* MW 9 */
+ 5576 "00010100" // /* MW 8 */
+ 5577 "01001000" // /* MW 7 */
+ 5578 "00011101" // /* MW 6 */
+ 5579 "01010100" // /* MW 5 */
+ 5580 "00000000" // /* MW 4 */
+ 5581 "01110011" // /* MW 3 */
+ 5582 "10000001" // /* MW 2 */
+ 5583 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5585 "11101001" // /* MW 13 */
+ 5586 "00101100" // /* MW 12 */
+ 5587 "01001001" // /* MW 11 */
+ 5588 "00000111" // /* MW 10 */
+ 5589 "01011000" // /* MW 9 */
+ 5590 "01011100" // /* MW 8 */
+ 5591 "00000000" // /* MW 7 */
+ 5592 "00000000" // /* MW 6 */
+ 5593 "10010110" // /* MW 5 */
+ 5594 "10010100" // /* MW 4 */
+ 5595 "01110110" // /* MW 3 */
+ 5596 "00110101" // /* MW 2 */
+ 5597 "11001111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1162 81 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5599 "00001001" // /* MW 13 */
+ 5600 "01010101" // /* MW 12 */
+ 5601 "01001010" // /* MW 11 */
+ 5602 "00111110" // /* MW 10 */
+ 5603 "10010000" // /* MW 9 */
+ 5604 "01001100" // /* MW 8 */
+ 5605 "00000000" // /* MW 7 */
+ 5606 "00000000" // /* MW 6 */
+ 5607 "10010110" // /* MW 5 */
+ 5608 "00111000" // /* MW 4 */
+ 5609 "01111010" // /* MW 3 */
+ 5610 "10111101" // /* MW 2 */
+ 5611 "10000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 1199 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5613 "00111101" // /* MW 13 */
+ 5614 "01100000" // /* MW 12 */
+ 5615 "11111000" // /* MW 11 */
+ 5616 "00011110" // /* MW 10 */
+ 5617 "10010000" // /* MW 9 */
+ 5618 "01010100" // /* MW 8 */
+ 5619 "00000000" // /* MW 7 */
+ 5620 "00000000" // /* MW 6 */
+ 5621 "10010110" // /* MW 5 */
+ 5622 "10011000" // /* MW 4 */
+ 5623 "01110100" // /* MW 3 */
+ 5624 "00000001" // /* MW 2 */
+ 5625 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 1200 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5627 "00111101" // /* MW 7 */
+ 5628 "01100100" // /* MW 6 */
+ 5629 "11111001" // /* MW 5 */
+ 5630 "00000100" // /* MW 4 */
+ 5631 "01110000" // /* MW 3 */
+ 5632 "10000001" // /* MW 2 */
+ 5633 "00100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1201 26 first
+.aggressive_scheduled_block_id 6
+.noswbrkpt
+ 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5635 "00111101" // /* MW 7 */
+ 5636 "10001000" // /* MW 6 */
+ 5637 "11111010" // /* MW 5 */
+ 5638 "00000100" // /* MW 4 */
+ 5639 "01110000" // /* MW 3 */
+ 5640 "00001001" // /* MW 2 */
+ 5641 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5643 "00001001" // /* MW 7 */
+ 5644 "01101101" // /* MW 6 */
+ 5645 "01001011" // /* MW 5 */
+ 5646 "00000100" // /* MW 4 */
+ 5647 "01110000" // /* MW 3 */
+ 5648 "00000001" // /* MW 2 */
+ 5649 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 5651 "00101000" // /* MW 5 */
+ 5652 "00000001" // /* MW 4 */
+ 5653 "01110100" // /* MW 3 */
+ 5654 "10000001" // /* MW 2 */
+ 5655 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5657 "00010100" // /* MW 3 */
+ 5658 "00010001" // /* MW 2 */
+ 5659 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 1202 26 first
+.src_ref 2 "conv2d_bf16.h" 1206 8 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5661 "00111101" // /* MW 11 */
+ 5662 "10001100" // /* MW 10 */
+ 5663 "11111011" // /* MW 9 */
+ 5664 "10000010" // /* MW 8 */
+ 5665 "01111101" // /* MW 7 */
+ 5666 "01110010" // /* MW 6 */
+ 5667 "00101101" // /* MW 5 */
+ 5668 "00101000" // /* MW 4 */
+ 5669 "01111000" // /* MW 3 */
+ 5670 "00001001" // /* MW 2 */
+ 5671 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5673 "00101001" // /* MW 9 */
+ 5674 "00000110" // /* MW 8 */
+ 5675 "10100000" // /* MW 7 */
+ 5676 "00011101" // /* MW 6 */
+ 5677 "00010100" // /* MW 5 */
+ 5678 "00010100" // /* MW 4 */
+ 5679 "01110100" // /* MW 3 */
+ 5680 "00000001" // /* MW 2 */
+ 5681 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5683 "00001001" // /* MW 13 */
+ 5684 "01000110" // /* MW 12 */
+ 5685 "10100010" // /* MW 11 */
+ 5686 "00001111" // /* MW 10 */
+ 5687 "10101010" // /* MW 9 */
+ 5688 "01011000" // /* MW 8 */
+ 5689 "00000000" // /* MW 7 */
+ 5690 "00000000" // /* MW 6 */
+ 5691 "00101000" // /* MW 5 */
+ 5692 "00000001" // /* MW 4 */
+ 5693 "01110100" // /* MW 3 */
+ 5694 "10000001" // /* MW 2 */
+ 5695 "00100010" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5697 "01010001" // /* MW 15 */
+ 5698 "00001001" // /* MW 14 */
+ 5699 "11101101" // /* MW 13 */
+ 5700 "00000011" // /* MW 12 */
+ 5701 "11001001" // /* MW 11 */
+ 5702 "00000000" // /* MW 10 */
+ 5703 "00000000" // /* MW 9 */
+ 5704 "00000000" // /* MW 8 */
+ 5705 "01011011" // /* MW 7 */
+ 5706 "00000001" // /* MW 6 */
+ 5707 "00101000" // /* MW 5 */
+ 5708 "00100010" // /* MW 4 */
+ 5709 "11111000" // /* MW 3 */
+ 5710 "00101100" // /* MW 2 */
+ 5711 "00000000" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248
+.src_ref 2 "conv2d_bf16.h" 736 8 first
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.begin_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5713 "01010000" // /* MW 15 */
+ 5714 "00011011" // /* MW 14 */
+ 5715 "11101101" // /* MW 13 */
+ 5716 "00000001" // /* MW 12 */
+ 5717 "01001001" // /* MW 11 */
+ 5718 "00000001" // /* MW 10 */
+ 5719 "00000000" // /* MW 9 */
+ 5720 "00000000" // /* MW 8 */
+ 5721 "01011011" // /* MW 7 */
+ 5722 "00000001" // /* MW 6 */
+ 5723 "00101000" // /* MW 5 */
+ 5724 "00101000" // /* MW 4 */
+ 5725 "01111000" // /* MW 3 */
+ 5726 "00001001" // /* MW 2 */
+ 5727 "10100000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 737 8 first
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5729 "00110001" // /* MW 15 */
+ 5730 "00000000" // /* MW 14 */
+ 5731 "01111101" // /* MW 13 */
+ 5732 "10100101" // /* MW 12 */
+ 5733 "00000001" // /* MW 11 */
+ 5734 "00000000" // /* MW 10 */
+ 5735 "00000000" // /* MW 9 */
+ 5736 "00000000" // /* MW 8 */
+ 5737 "01011011" // /* MW 7 */
+ 5738 "00000001" // /* MW 6 */
+ 5739 "00101000" // /* MW 5 */
+ 5740 "00101000" // /* MW 4 */
+ 5741 "01111000" // /* MW 3 */
+ 5742 "00000001" // /* MW 2 */
+ 5743 "01110001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 738 8 first
+.src_ref 2 "conv2d_bf16.h" 740 30 first
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5745 "00110000" // /* MW 15 */
+ 5746 "00010010" // /* MW 14 */
+ 5747 "01111101" // /* MW 13 */
+ 5748 "10100101" // /* MW 12 */
+ 5749 "00000001" // /* MW 11 */
+ 5750 "00000000" // /* MW 10 */
+ 5751 "00000000" // /* MW 9 */
+ 5752 "00000000" // /* MW 8 */
+ 5753 "01011011" // /* MW 7 */
+ 5754 "00000001" // /* MW 6 */
+ 5755 "00101000" // /* MW 5 */
+ 5756 "00000001" // /* MW 4 */
+ 5757 "01110100" // /* MW 3 */
+ 5758 "10000001" // /* MW 2 */
+ 5759 "00100010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.end_of_loop
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 5761 "01010001" // /* MW 15 */
+ 5762 "00001001" // /* MW 14 */
+ 5763 "11101101" // /* MW 13 */
+ 5764 "00000011" // /* MW 12 */
+ 5765 "11001001" // /* MW 11 */
+ 5766 "00000000" // /* MW 10 */
+ 5767 "00000000" // /* MW 9 */
+ 5768 "00000000" // /* MW 8 */
+ 5769 "01011011" // /* MW 7 */
+ 5770 "00000001" // /* MW 6 */
+ 5771 "00101000" // /* MW 5 */
+ 5772 "00100010" // /* MW 4 */
+ 5773 "11111000" // /* MW 3 */
+ 5774 "00101100" // /* MW 2 */
+ 5775 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 742 30 first
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5777 "00001001" // /* MW 13 */
+ 5778 "01101010" // /* MW 12 */
+ 5779 "10100011" // /* MW 11 */
+ 5780 "00011110" // /* MW 10 */
+ 5781 "10010000" // /* MW 9 */
+ 5782 "01010100" // /* MW 8 */
+ 5783 "00000000" // /* MW 7 */
+ 5784 "00000000" // /* MW 6 */
+ 5785 "10010110" // /* MW 5 */
+ 5786 "10111100" // /* MW 4 */
+ 5787 "01111100" // /* MW 3 */
+ 5788 "00001001" // /* MW 2 */
+ 5789 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 743 30 first
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 5791 "00101001" // /* MW 13 */
+ 5792 "00000110" // /* MW 12 */
+ 5793 "10100000" // /* MW 11 */
+ 5794 "00000111" // /* MW 10 */
+ 5795 "00111000" // /* MW 9 */
+ 5796 "01111100" // /* MW 8 */
+ 5797 "00000000" // /* MW 7 */
+ 5798 "00000000" // /* MW 6 */
+ 5799 "10010110" // /* MW 5 */
+ 5800 "00011100" // /* MW 4 */
+ 5801 "01111110" // /* MW 3 */
+ 5802 "00000001" // /* MW 2 */
+ 5803 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5805 "00001001" // /* MW 9 */
+ 5806 "01000110" // /* MW 8 */
+ 5807 "10100010" // /* MW 7 */
+ 5808 "11100100" // /* MW 6 */
+ 5809 "00000000" // /* MW 5 */
+ 5810 "01010101" // /* MW 4 */
+ 5811 "01100001" // /* MW 3 */
+ 5812 "10010001" // /* MW 2 */
+ 5813 "01100001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5815 "00101001" // /* MW 9 */
+ 5816 "00101010" // /* MW 8 */
+ 5817 "10100001" // /* MW 7 */
+ 5818 "11000100" // /* MW 6 */
+ 5819 "00000111" // /* MW 5 */
+ 5820 "10010010" // /* MW 4 */
+ 5821 "01100001" // /* MW 3 */
+ 5822 "11000001" // /* MW 2 */
+ 5823 "01101010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5825 "00001001" // /* MW 9 */
+ 5826 "01101010" // /* MW 8 */
+ 5827 "10100011" // /* MW 7 */
+ 5828 "11000100" // /* MW 6 */
+ 5829 "00000011" // /* MW 5 */
+ 5830 "10010010" // /* MW 4 */
+ 5831 "01100010" // /* MW 3 */
+ 5832 "10000001" // /* MW 2 */
+ 5833 "11101011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+.src_ref 2 "conv2d_bf16.h" 1285 32 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5835 "00101001" // /* MW 11 */
+ 5836 "00000110" // /* MW 10 */
+ 5837 "10100000" // /* MW 9 */
+ 5838 "11100110" // /* MW 8 */
+ 5839 "00000000" // /* MW 7 */
+ 5840 "10001111" // /* MW 6 */
+ 5841 "00100010" // /* MW 5 */
+ 5842 "01010111" // /* MW 4 */
+ 5843 "01101111" // /* MW 3 */
+ 5844 "10010001" // /* MW 2 */
+ 5845 "10110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5847 "00001001" // /* MW 9 */
+ 5848 "01000110" // /* MW 8 */
+ 5849 "10100010" // /* MW 7 */
+ 5850 "11100100" // /* MW 6 */
+ 5851 "00000000" // /* MW 5 */
+ 5852 "00000110" // /* MW 4 */
+ 5853 "01100010" // /* MW 3 */
+ 5854 "10010001" // /* MW 2 */
+ 5855 "10010011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 746 30 first
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+.aggressive_scheduled_block_id 6
+.nohwbrkpt
+.noswbrkpt
+ 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5857 "00101001" // /* MW 7 */
+ 5858 "00101010" // /* MW 6 */
+ 5859 "10100001" // /* MW 5 */
+ 5860 "11000110" // /* MW 4 */
+ 5861 "00000011" // /* MW 3 */
+ 5862 "10010010" // /* MW 2 */
+ 5863 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 745 30 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.aggressive_scheduled_block_id 6
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5865 "00001001" // /* MW 7 */
+ 5866 "01101010" // /* MW 6 */
+ 5867 "10100011" // /* MW 5 */
+ 5868 "11000110" // /* MW 4 */
+ 5869 "00000111" // /* MW 3 */
+ 5870 "10010010" // /* MW 2 */
+ 5871 "00000001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+ 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 5873 "00000000" // /* MW 3 */
+ 5874 "10001011" // /* MW 2 */
+ 5875 "00011111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 750 26 first
+ 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5877 "00101001" // /* MW 7 */
+ 5878 "00101010" // /* MW 6 */
+ 5879 "10100001" // /* MW 5 */
+ 5880 "11100110" // /* MW 4 */
+ 5881 "10100000" // /* MW 3 */
+ 5882 "00001011" // /* MW 2 */
+ 5883 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 749 26 first
+ 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5885 "00101001" // /* MW 7 */
+ 5886 "00000110" // /* MW 6 */
+ 5887 "10100000" // /* MW 5 */
+ 5888 "11100110" // /* MW 4 */
+ 5889 "10100000" // /* MW 3 */
+ 5890 "10001000" // /* MW 2 */
+ 5891 "00000011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 752 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+ 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5893 "00001001" // /* MW 9 */
+ 5894 "01101010" // /* MW 8 */
+ 5895 "10100011" // /* MW 7 */
+ 5896 "11100110" // /* MW 6 */
+ 5897 "00000000" // /* MW 5 */
+ 5898 "00000101" // /* MW 4 */
+ 5899 "00100011" // /* MW 3 */
+ 5900 "11110111" // /* MW 2 */
+ 5901 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 751 26 first
+.src_ref 2 "conv2d_bf16.h" 1286 32 first
+ 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 5903 "00001001" // /* MW 11 */
+ 5904 "01000110" // /* MW 10 */
+ 5905 "10100010" // /* MW 9 */
+ 5906 "11100110" // /* MW 8 */
+ 5907 "10100000" // /* MW 7 */
+ 5908 "10000010" // /* MW 6 */
+ 5909 "00100101" // /* MW 5 */
+ 5910 "11010111" // /* MW 4 */
+ 5911 "01101110" // /* MW 3 */
+ 5912 "10001001" // /* MW 2 */
+ 5913 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+ 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5915 "01110000" // /* MW 7 */
+ 5916 "10000000" // /* MW 6 */
+ 5917 "11000101" // /* MW 5 */
+ 5918 "00000011" // /* MW 4 */
+ 5919 "01100000" // /* MW 3 */
+ 5920 "10001001" // /* MW 2 */
+ 5921 "01100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5923 "01110000" // /* MW 7 */
+ 5924 "00000000" // /* MW 6 */
+ 5925 "10000001" // /* MW 5 */
+ 5926 "00000001" // /* MW 4 */
+ 5927 "01100000" // /* MW 3 */
+ 5928 "01000001" // /* MW 2 */
+ 5929 "10100000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5931 "01110000" // /* MW 7 */
+ 5932 "01010000" // /* MW 6 */
+ 5933 "10000111" // /* MW 5 */
+ 5934 "00000000" // /* MW 4 */
+ 5935 "11000000" // /* MW 3 */
+ 5936 "00010010" // /* MW 2 */
+ 5937 "10110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5939 "01110000" // /* MW 7 */
+ 5940 "10010000" // /* MW 6 */
+ 5941 "11000111" // /* MW 5 */
+ 5942 "00000010" // /* MW 4 */
+ 5943 "11000000" // /* MW 3 */
+ 5944 "00000010" // /* MW 2 */
+ 5945 "10100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 736 8
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5947 "01110110" // /* MW 9 */
+ 5948 "01100000" // /* MW 8 */
+ 5949 "11001000" // /* MW 7 */
+ 5950 "00000001" // /* MW 6 */
+ 5951 "10010000" // /* MW 5 */
+ 5952 "00111011" // /* MW 4 */
+ 5953 "01100001" // /* MW 3 */
+ 5954 "10010001" // /* MW 2 */
+ 5955 "00010011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1287 37
+ 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5957 "01110000" // /* MW 7 */
+ 5958 "00000000" // /* MW 6 */
+ 5959 "10000011" // /* MW 5 */
+ 5960 "00000000" // /* MW 4 */
+ 5961 "11000000" // /* MW 3 */
+ 5962 "00001010" // /* MW 2 */
+ 5963 "01100010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1218 20 first
+.src_ref 2 "conv2d_bf16.h" 1287 37 first
+ 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */
+ 5965 "01100000" // /* MW 11 */
+ 5966 "00000000" // /* MW 10 */
+ 5967 "00000000" // /* MW 9 */
+ 5968 "11111010" // /* MW 8 */
+ 5969 "00000010" // /* MW 7 */
+ 5970 "00100100" // /* MW 6 */
+ 5971 "00100000" // /* MW 5 */
+ 5972 "01010111" // /* MW 4 */
+ 5973 "11000000" // /* MW 3 */
+ 5974 "00100010" // /* MW 2 */
+ 5975 "01010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 738 8
+.delay_slot
+ 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5977 "01110000" // /* MW 7 */
+ 5978 "01100000" // /* MW 6 */
+ 5979 "10101001" // /* MW 5 */
+ 5980 "00000000" // /* MW 4 */
+ 5981 "11000000" // /* MW 3 */
+ 5982 "00011010" // /* MW 2 */
+ 5983 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 5985 "01110000" // /* MW 7 */
+ 5986 "11000000" // /* MW 6 */
+ 5987 "10100111" // /* MW 5 */
+ 5988 "00000011" // /* MW 4 */
+ 5989 "11000000" // /* MW 3 */
+ 5990 "00110010" // /* MW 2 */
+ 5991 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 5993 "01110110" // /* MW 9 */
+ 5994 "01100000" // /* MW 8 */
+ 5995 "10110101" // /* MW 7 */
+ 5996 "00000000" // /* MW 6 */
+ 5997 "10010000" // /* MW 5 */
+ 5998 "00101011" // /* MW 4 */
+ 5999 "11000101" // /* MW 3 */
+ 6000 "00111010" // /* MW 2 */
+ 6001 "00010010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 2 "conv2d_bf16.h" 1286 32
+.delay_slot
+ 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6003 "01110000" // /* MW 7 */
+ 6004 "10000000" // /* MW 6 */
+ 6005 "11000010" // /* MW 5 */
+ 6006 "00000010" // /* MW 4 */
+ 6007 "11000000" // /* MW 3 */
+ 6008 "00101010" // /* MW 2 */
+ 6009 "00100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 1187 40
+.delay_slot
+ 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6011 "01110000" // /* MW 7 */
+ 6012 "11000000" // /* MW 6 */
+ 6013 "01001101" // /* MW 5 */
+ 6014 "00000000" // /* MW 4 */
+ 6015 "01100000" // /* MW 3 */
+ 6016 "10001001" // /* MW 2 */
+ 6017 "11100001" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6019 "11101100" // /* MW 3 */
+ 6020 "11011100" // /* MW 2 */
+ 6021 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6023 "11101100" // /* MW 3 */
+ 6024 "10111100" // /* MW 2 */
+ 6025 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6027 "01110000" // /* MW 7 */
+ 6028 "01110110" // /* MW 6 */
+ 6029 "10101010" // /* MW 5 */
+ 6030 "00000010" // /* MW 4 */
+ 6031 "01100000" // /* MW 3 */
+ 6032 "01011010" // /* MW 2 */
+ 6033 "00111100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6035 "01110000" // /* MW 7 */
+ 6036 "01110110" // /* MW 6 */
+ 6037 "11011010" // /* MW 5 */
+ 6038 "00000001" // /* MW 4 */
+ 6039 "01100000" // /* MW 3 */
+ 6040 "10111010" // /* MW 2 */
+ 6041 "10100010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */
+ 6043 "00100001" // /* MW 9 */
+ 6044 "00000000" // /* MW 8 */
+ 6045 "00000000" // /* MW 7 */
+ 6046 "11111110" // /* MW 6 */
+ 6047 "00000010" // /* MW 5 */
+ 6048 "00000000" // /* MW 4 */
+ 6049 "01100000" // /* MW 3 */
+ 6050 "11010010" // /* MW 2 */
+ 6051 "00100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6053 "01110000" // /* MW 7 */
+ 6054 "01110110" // /* MW 6 */
+ 6055 "10100010" // /* MW 5 */
+ 6056 "00000010" // /* MW 4 */
+ 6057 "01100000" // /* MW 3 */
+ 6058 "10111010" // /* MW 2 */
+ 6059 "00100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6061 "11101100" // /* MW 3 */
+ 6062 "10001100" // /* MW 2 */
+ 6063 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6065 "01110000" // /* MW 7 */
+ 6066 "01110110" // /* MW 6 */
+ 6067 "10010110" // /* MW 5 */
+ 6068 "00000010" // /* MW 4 */
+ 6069 "01100000" // /* MW 3 */
+ 6070 "11010010" // /* MW 2 */
+ 6071 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6073 "01110000" // /* MW 7 */
+ 6074 "01110110" // /* MW 6 */
+ 6075 "10001010" // /* MW 5 */
+ 6076 "00000000" // /* MW 4 */
+ 6077 "01100000" // /* MW 3 */
+ 6078 "10111010" // /* MW 2 */
+ 6079 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6081 "00000000" // /* MW 15 */
+ 6082 "00000000" // /* MW 14 */
+ 6083 "01111000" // /* MW 13 */
+ 6084 "10100101" // /* MW 12 */
+ 6085 "00000001" // /* MW 11 */
+ 6086 "00000000" // /* MW 10 */
+ 6087 "00000000" // /* MW 9 */
+ 6088 "00000000" // /* MW 8 */
+ 6089 "10010011" // /* MW 7 */
+ 6090 "10100010" // /* MW 6 */
+ 6091 "00100100" // /* MW 5 */
+ 6092 "00000000" // /* MW 4 */
+ 6093 "11110000" // /* MW 3 */
+ 6094 "00101100" // /* MW 2 */
+ 6095 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632
+.src_ref 4 "vector.hpp" 1152 43
+ 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6097 "10100011" // /* MW 3 */
+ 6098 "11100000" // /* MW 2 */
+ 6099 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6101 "11100011" // /* MW 3 */
+ 6102 "00010100" // /* MW 2 */
+ 6103 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6105 "00100011" // /* MW 3 */
+ 6106 "00000100" // /* MW 2 */
+ 6107 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6109 "01100011" // /* MW 3 */
+ 6110 "00010100" // /* MW 2 */
+ 6111 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6113 "00010011" // /* MW 3 */
+ 6114 "00000110" // /* MW 2 */
+ 6115 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6117 "11100011" // /* MW 3 */
+ 6118 "00010101" // /* MW 2 */
+ 6119 "00001000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6121 "01110000" // /* MW 7 */
+ 6122 "10100101" // /* MW 6 */
+ 6123 "00000001" // /* MW 5 */
+ 6124 "00000000" // /* MW 4 */
+ 6125 "01100000" // /* MW 3 */
+ 6126 "00100100" // /* MW 2 */
+ 6127 "10010100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1143 12 first
+ 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6129 "01011000" // /* MW 11 */
+ 6130 "00000000" // /* MW 10 */
+ 6131 "01000000" // /* MW 9 */
+ 6132 "00000001" // /* MW 8 */
+ 6133 "00110101" // /* MW 7 */
+ 6134 "00000110" // /* MW 6 */
+ 6135 "00100000" // /* MW 5 */
+ 6136 "01010111" // /* MW 4 */
+ 6137 "01101111" // /* MW 3 */
+ 6138 "10010010" // /* MW 2 */
+ 6139 "11100010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.delay_slot
+ 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6141 "10000000" // /* MW 3 */
+ 6142 "01000100" // /* MW 2 */
+ 6143 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.delay_slot
+ 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6145 "10100000" // /* MW 3 */
+ 6146 "01001001" // /* MW 2 */
+ 6147 "00011010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.delay_slot
+ 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6149 "00000001" // /* MW 5 */
+ 6150 "00011110" // /* MW 4 */
+ 6151 "00000101" // /* MW 3 */
+ 6152 "01110010" // /* MW 2 */
+ 6153 "11101011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.delay_slot
+ 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6155 "10000000" // /* MW 3 */
+ 6156 "01001110" // /* MW 2 */
+ 6157 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6159 "00000000" // /* MW 1 */
+.loop_nesting 0
+ 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */
+ 6161 "00000000" // /* MW 5 */
+ 6162 "00000000" // /* MW 4 */
+ 6163 "01011000" // /* MW 3 */
+ 6164 "00001101" // /* MW 2 */
+ 6165 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6167 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6169 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6171 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6173 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6175 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 1364 80
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6177 "01111000" // /* MW 11 */
+ 6178 "10010000" // /* MW 10 */
+ 6179 "10110011" // /* MW 9 */
+ 6180 "00001000" // /* MW 8 */
+ 6181 "11100001" // /* MW 7 */
+ 6182 "00000100" // /* MW 6 */
+ 6183 "10001011" // /* MW 5 */
+ 6184 "00001100" // /* MW 4 */
+ 6185 "00100010" // /* MW 3 */
+ 6186 "01111110" // /* MW 2 */
+ 6187 "11111011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1369 80
+ 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6189 "01111000" // /* MW 11 */
+ 6190 "01000000" // /* MW 10 */
+ 6191 "01100010" // /* MW 9 */
+ 6192 "00000011" // /* MW 8 */
+ 6193 "11010100" // /* MW 7 */
+ 6194 "00011011" // /* MW 6 */
+ 6195 "00001011" // /* MW 5 */
+ 6196 "01010110" // /* MW 4 */
+ 6197 "10000010" // /* MW 3 */
+ 6198 "10010000" // /* MW 2 */
+ 6199 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 807 26
+.src_ref 2 "conv2d_bf16.h" 808 26
+.src_ref 2 "conv2d_bf16.h" 809 26
+.src_ref 2 "conv2d_bf16.h" 810 26
+.src_ref 2 "conv2d_bf16.h" 1436 26
+.src_ref 2 "conv2d_bf16.h" 1437 26
+.src_ref 2 "conv2d_bf16.h" 1438 26
+.src_ref 2 "conv2d_bf16.h" 1439 26
+ 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6201 "01111000" // /* MW 9 */
+ 6202 "11010000" // /* MW 8 */
+ 6203 "00000101" // /* MW 7 */
+ 6204 "10001001" // /* MW 6 */
+ 6205 "00110001" // /* MW 5 */
+ 6206 "00011001" // /* MW 4 */
+ 6207 "00000000" // /* MW 3 */
+ 6208 "10010100" // /* MW 2 */
+ 6209 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 802 83
+.src_ref 2 "conv2d_bf16.h" 1428 39
+ 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6211 "01111000" // /* MW 11 */
+ 6212 "10010000" // /* MW 10 */
+ 6213 "11000111" // /* MW 9 */
+ 6214 "11001010" // /* MW 8 */
+ 6215 "00100000" // /* MW 7 */
+ 6216 "00000001" // /* MW 6 */
+ 6217 "00001011" // /* MW 5 */
+ 6218 "01011100" // /* MW 4 */
+ 6219 "10000110" // /* MW 3 */
+ 6220 "10011000" // /* MW 2 */
+ 6221 "11101111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 792 8
+.src_ref 2 "conv2d_bf16.h" 794 8
+ 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6223 "01111000" // /* MW 11 */
+ 6224 "01010000" // /* MW 10 */
+ 6225 "10000111" // /* MW 9 */
+ 6226 "00001000" // /* MW 8 */
+ 6227 "10010000" // /* MW 7 */
+ 6228 "00000001" // /* MW 6 */
+ 6229 "00001011" // /* MW 5 */
+ 6230 "00000010" // /* MW 4 */
+ 6231 "00100101" // /* MW 3 */
+ 6232 "10000011" // /* MW 2 */
+ 6233 "11111010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 794 8
+.src_ref 2 "conv2d_bf16.h" 1455 20
+ 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6235 "01111000" // /* MW 9 */
+ 6236 "01010000" // /* MW 8 */
+ 6237 "01000101" // /* MW 7 */
+ 6238 "00001011" // /* MW 6 */
+ 6239 "10000000" // /* MW 5 */
+ 6240 "00000001" // /* MW 4 */
+ 6241 "00100000" // /* MW 3 */
+ 6242 "11010110" // /* MW 2 */
+ 6243 "11111011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 12
+ 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6245 "00010000" // /* MW 9 */
+ 6246 "01011000" // /* MW 8 */
+ 6247 "00110100" // /* MW 7 */
+ 6248 "00000101" // /* MW 6 */
+ 6249 "00000000" // /* MW 5 */
+ 6250 "00000000" // /* MW 4 */
+ 6251 "00100000" // /* MW 3 */
+ 6252 "00110110" // /* MW 2 */
+ 6253 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80 first
+.src_ref 2 "conv2d_bf16.h" 1873
+ 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6255 "01110010" // /* MW 5 */
+ 6256 "11011111" // /* MW 4 */
+ 6257 "00100110" // /* MW 3 */
+ 6258 "10000111" // /* MW 2 */
+ 6259 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 80
+ 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6261 "11000110" // /* MW 3 */
+ 6262 "00011101" // /* MW 2 */
+ 6263 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 80 first
+ 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6265 "00000110" // /* MW 3 */
+ 6266 "10001010" // /* MW 2 */
+ 6267 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 799 87 first
+ 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6269 "10000110" // /* MW 3 */
+ 6270 "10011110" // /* MW 2 */
+ 6271 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 83 first
+ 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6273 "11010110" // /* MW 3 */
+ 6274 "00011110" // /* MW 2 */
+ 6275 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 83 first
+ 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6277 "11110110" // /* MW 3 */
+ 6278 "11001010" // /* MW 2 */
+ 6279 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1337 66 first
+ 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6281 "10110110" // /* MW 3 */
+ 6282 "00010111" // /* MW 2 */
+ 6283 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1443 71 first
+ 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6285 "10010110" // /* MW 3 */
+ 6286 "00000111" // /* MW 2 */
+ 6287 "00000011" // /* MW 1 */
+ 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6289 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1369 89
+ 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6291 "00000000" // /* MW 3 */
+ 6292 "10011000" // /* MW 2 */
+ 6293 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+.src_ref 2 "conv2d_bf16.h" 1518 37
+ 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6295 "00000000" // /* MW 3 */
+ 6296 "00000111" // /* MW 2 */
+ 6297 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89
+ 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6299 "00000000" // /* MW 3 */
+ 6300 "11011100" // /* MW 2 */
+ 6301 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1364 89 first
+ 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6303 "11100000" // /* MW 3 */
+ 6304 "00001111" // /* MW 2 */
+ 6305 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1369 89 first
+ 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6307 "11000000" // /* MW 5 */
+ 6308 "00011110" // /* MW 4 */
+ 6309 "11101110" // /* MW 3 */
+ 6310 "01111111" // /* MW 2 */
+ 6311 "11101111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+ 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6313 "01110000" // /* MW 7 */
+ 6314 "10010000" // /* MW 6 */
+ 6315 "11000111" // /* MW 5 */
+ 6316 "00000011" // /* MW 4 */
+ 6317 "01100000" // /* MW 3 */
+ 6318 "00101011" // /* MW 2 */
+ 6319 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1362 31 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+.loop_nesting 1
+ 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6321 "01100000" // /* MW 13 */
+ 6322 "10000001" // /* MW 12 */
+ 6323 "01110001" // /* MW 11 */
+ 6324 "00000010" // /* MW 10 */
+ 6325 "10010110" // /* MW 9 */
+ 6326 "10001111" // /* MW 8 */
+ 6327 "00000000" // /* MW 7 */
+ 6328 "00000000" // /* MW 6 */
+ 6329 "00101000" // /* MW 5 */
+ 6330 "00101000" // /* MW 4 */
+ 6331 "01111010" // /* MW 3 */
+ 6332 "10000101" // /* MW 2 */
+ 6333 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1364 31 first
+.src_ref 2 "conv2d_bf16.h" 1443 16
+ 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6335 "00010000" // /* MW 11 */
+ 6336 "11001000" // /* MW 10 */
+ 6337 "10111100" // /* MW 9 */
+ 6338 "00000101" // /* MW 8 */
+ 6339 "00000000" // /* MW 7 */
+ 6340 "00000000" // /* MW 6 */
+ 6341 "00101000" // /* MW 5 */
+ 6342 "00101000" // /* MW 4 */
+ 6343 "01111010" // /* MW 3 */
+ 6344 "00001101" // /* MW 2 */
+ 6345 "11001101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1428 39 first
+.src_ref 2 "conv2d_bf16.h" 1443 16 first
+ 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6347 "01001000" // /* MW 11 */
+ 6348 "00111111" // /* MW 10 */
+ 6349 "10111111" // /* MW 9 */
+ 6350 "01101110" // /* MW 8 */
+ 6351 "11101001" // /* MW 7 */
+ 6352 "00000101" // /* MW 6 */
+ 6353 "00101000" // /* MW 5 */
+ 6354 "00000101" // /* MW 4 */
+ 6355 "01110110" // /* MW 3 */
+ 6356 "10000001" // /* MW 2 */
+ 6357 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6359 "01111110" // /* MW 9 */
+ 6360 "10010000" // /* MW 8 */
+ 6361 "01000111" // /* MW 7 */
+ 6362 "00000001" // /* MW 6 */
+ 6363 "00010100" // /* MW 5 */
+ 6364 "00000001" // /* MW 4 */
+ 6365 "01110011" // /* MW 3 */
+ 6366 "01011001" // /* MW 2 */
+ 6367 "01010101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1367 31 first
+ 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6369 "00101000" // /* MW 5 */
+ 6370 "00000001" // /* MW 4 */
+ 6371 "01110110" // /* MW 3 */
+ 6372 "10010101" // /* MW 2 */
+ 6373 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 1369 31 first
+ 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6375 "10101000" // /* MW 5 */
+ 6376 "00100001" // /* MW 4 */
+ 6377 "01111010" // /* MW 3 */
+ 6378 "00011101" // /* MW 2 */
+ 6379 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 1372 31 first
+ 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6381 "00101000" // /* MW 5 */
+ 6382 "00101000" // /* MW 4 */
+ 6383 "01111010" // /* MW 3 */
+ 6384 "10100101" // /* MW 2 */
+ 6385 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 1374 31 first
+ 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6387 "00101000" // /* MW 5 */
+ 6388 "00101000" // /* MW 4 */
+ 6389 "01111010" // /* MW 3 */
+ 6390 "00101101" // /* MW 2 */
+ 6391 "11001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 1377 31 first
+ 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6393 "10101000" // /* MW 5 */
+ 6394 "00000000" // /* MW 4 */
+ 6395 "01110110" // /* MW 3 */
+ 6396 "10110101" // /* MW 2 */
+ 6397 "11000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1379 31 first
+ 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6399 "00101000" // /* MW 5 */
+ 6400 "00000011" // /* MW 4 */
+ 6401 "01110110" // /* MW 3 */
+ 6402 "00111101" // /* MW 2 */
+ 6403 "11011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 578 27 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50 first
+ 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6405 "10101000" // /* MW 5 */
+ 6406 "00000011" // /* MW 4 */
+ 6407 "01110110" // /* MW 3 */
+ 6408 "01000101" // /* MW 2 */
+ 6409 "01101000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 578 27
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.src_ref 2 "conv2d_bf16.h" 1429 50
+ 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6411 "11101110" // /* MW 9 */
+ 6412 "00101101" // /* MW 8 */
+ 6413 "01101001" // /* MW 7 */
+ 6414 "00000001" // /* MW 6 */
+ 6415 "00010100" // /* MW 5 */
+ 6416 "00010010" // /* MW 4 */
+ 6417 "01110101" // /* MW 3 */
+ 6418 "01001101" // /* MW 2 */
+ 6419 "01101000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6421 "11101110" // /* MW 9 */
+ 6422 "00101111" // /* MW 8 */
+ 6423 "10101001" // /* MW 7 */
+ 6424 "00000010" // /* MW 6 */
+ 6425 "00010100" // /* MW 5 */
+ 6426 "00010100" // /* MW 4 */
+ 6427 "01110101" // /* MW 3 */
+ 6428 "10000001" // /* MW 2 */
+ 6429 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6431 "01101001" // /* MW 11 */
+ 6432 "00001011" // /* MW 10 */
+ 6433 "01001000" // /* MW 9 */
+ 6434 "11000010" // /* MW 8 */
+ 6435 "11011011" // /* MW 7 */
+ 6436 "00010001" // /* MW 6 */
+ 6437 "00101010" // /* MW 5 */
+ 6438 "00101000" // /* MW 4 */
+ 6439 "01111010" // /* MW 3 */
+ 6440 "00000001" // /* MW 2 */
+ 6441 "01010101" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6443 "01101001" // /* MW 9 */
+ 6444 "00110101" // /* MW 8 */
+ 6445 "01001001" // /* MW 7 */
+ 6446 "11000010" // /* MW 6 */
+ 6447 "11011111" // /* MW 5 */
+ 6448 "00010001" // /* MW 4 */
+ 6449 "01110101" // /* MW 3 */
+ 6450 "10000001" // /* MW 2 */
+ 6451 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6453 "01101001" // /* MW 3 */
+ 6454 "01001001" // /* MW 2 */
+ 6455 "01001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id first
+ 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6457 "01101001" // /* MW 3 */
+ 6458 "01110101" // /* MW 2 */
+ 6459 "01001011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.src_ref 2 "conv2d_bf16.h" 1437 26 first
+.aggressive_scheduled_block_id 7
+.noswbrkpt
+ 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6461 "00111101" // /* MW 9 */
+ 6462 "10000100" // /* MW 8 */
+ 6463 "10100001" // /* MW 7 */
+ 6464 "11000110" // /* MW 6 */
+ 6465 "01011111" // /* MW 5 */
+ 6466 "10001011" // /* MW 4 */
+ 6467 "10101010" // /* MW 3 */
+ 6468 "00000000" // /* MW 2 */
+ 6469 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 1436 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6471 "00111101" // /* MW 7 */
+ 6472 "10000000" // /* MW 6 */
+ 6473 "10100000" // /* MW 5 */
+ 6474 "00000000" // /* MW 4 */
+ 6475 "10010100" // /* MW 3 */
+ 6476 "00000001" // /* MW 2 */
+ 6477 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 1438 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6479 "00111101" // /* MW 7 */
+ 6480 "10001000" // /* MW 6 */
+ 6481 "10100010" // /* MW 5 */
+ 6482 "00000000" // /* MW 4 */
+ 6483 "11010100" // /* MW 3 */
+ 6484 "00000001" // /* MW 2 */
+ 6485 "00000011" // /* MW 1 */
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 1439 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6487 "00111101" // /* MW 9 */
+ 6488 "10001100" // /* MW 8 */
+ 6489 "10100011" // /* MW 7 */
+ 6490 "00011101" // /* MW 6 */
+ 6491 "00010100" // /* MW 5 */
+ 6492 "00010010" // /* MW 4 */
+ 6493 "01110101" // /* MW 3 */
+ 6494 "00000001" // /* MW 2 */
+ 6495 "01010101" // /* MW 1 */
+.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032
+.src_ref 2 "conv2d_bf16.h" 792 8 first
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.begin_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 2
+ 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6497 "10110111" // /* MW 5 */
+ 6498 "00010110" // /* MW 4 */
+ 6499 "10000010" // /* MW 3 */
+ 6500 "10000010" // /* MW 2 */
+ 6501 "10100010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 793 8 first
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6503 "00001001" // /* MW 9 */
+ 6504 "00101010" // /* MW 8 */
+ 6505 "10011001" // /* MW 7 */
+ 6506 "11000110" // /* MW 6 */
+ 6507 "01011111" // /* MW 5 */
+ 6508 "00111100" // /* MW 4 */
+ 6509 "00101010" // /* MW 3 */
+ 6510 "00101000" // /* MW 2 */
+ 6511 "00001010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 795 30 first
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6513 "00001001" // /* MW 9 */
+ 6514 "00000100" // /* MW 8 */
+ 6515 "10011000" // /* MW 7 */
+ 6516 "11000110" // /* MW 6 */
+ 6517 "01011011" // /* MW 5 */
+ 6518 "10111100" // /* MW 4 */
+ 6519 "10101001" // /* MW 3 */
+ 6520 "00000000" // /* MW 2 */
+ 6521 "00000110" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 796 30 first
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6523 "00001001" // /* MW 7 */
+ 6524 "01101000" // /* MW 6 */
+ 6525 "10011011" // /* MW 5 */
+ 6526 "00000000" // /* MW 4 */
+ 6527 "10010100" // /* MW 3 */
+ 6528 "00000001" // /* MW 2 */
+ 6529 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 794 8 first
+.src_ref 2 "conv2d_bf16.h" 797 30 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 6531 "00001001" // /* MW 13 */
+ 6532 "01000110" // /* MW 12 */
+ 6533 "10011010" // /* MW 11 */
+ 6534 "01101100" // /* MW 10 */
+ 6535 "00000101" // /* MW 9 */
+ 6536 "00000000" // /* MW 8 */
+ 6537 "00000000" // /* MW 7 */
+ 6538 "00000000" // /* MW 6 */
+ 6539 "10101000" // /* MW 5 */
+ 6540 "00000011" // /* MW 4 */
+ 6541 "01110110" // /* MW 3 */
+ 6542 "10000001" // /* MW 2 */
+ 6543 "00000010" // /* MW 1 */
+.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080
+.src_ref 3 "kernel_helpers.h" 978 11 first
+.src_ref 2 "conv2d_bf16.h" 799 30 first
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+.end_of_loop
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6545 "00000000" // /* MW 15 */
+ 6546 "00000000" // /* MW 14 */
+ 6547 "11101000" // /* MW 13 */
+ 6548 "10101111" // /* MW 12 */
+ 6549 "01000101" // /* MW 11 */
+ 6550 "00000001" // /* MW 10 */
+ 6551 "00000000" // /* MW 9 */
+ 6552 "00000000" // /* MW 8 */
+ 6553 "01011011" // /* MW 7 */
+ 6554 "00000001" // /* MW 6 */
+ 6555 "00101000" // /* MW 5 */
+ 6556 "00100100" // /* MW 4 */
+ 6557 "01111010" // /* MW 3 */
+ 6558 "00000001" // /* MW 2 */
+ 6559 "01010101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6561 "11100000" // /* MW 11 */
+ 6562 "10101101" // /* MW 10 */
+ 6563 "10000101" // /* MW 9 */
+ 6564 "00000000" // /* MW 8 */
+ 6565 "10001011" // /* MW 7 */
+ 6566 "10011100" // /* MW 6 */
+ 6567 "00100101" // /* MW 5 */
+ 6568 "10010111" // /* MW 4 */
+ 6569 "11111111" // /* MW 3 */
+ 6570 "00001100" // /* MW 2 */
+ 6571 "00000111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+.src_ref 2 "conv2d_bf16.h" 1517 32 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6573 "00001001" // /* MW 11 */
+ 6574 "00101010" // /* MW 10 */
+ 6575 "10011001" // /* MW 9 */
+ 6576 "11000110" // /* MW 8 */
+ 6577 "01011111" // /* MW 7 */
+ 6578 "00111100" // /* MW 6 */
+ 6579 "00100010" // /* MW 5 */
+ 6580 "00010111" // /* MW 4 */
+ 6581 "01101111" // /* MW 3 */
+ 6582 "10010001" // /* MW 2 */
+ 6583 "10010011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+.src_ref 2 "conv2d_bf16.h" 1518 37 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6585 "00001001" // /* MW 11 */
+ 6586 "00000100" // /* MW 10 */
+ 6587 "10011000" // /* MW 9 */
+ 6588 "11000110" // /* MW 8 */
+ 6589 "01011011" // /* MW 7 */
+ 6590 "10111100" // /* MW 6 */
+ 6591 "00100001" // /* MW 5 */
+ 6592 "10010111" // /* MW 4 */
+ 6593 "01101111" // /* MW 3 */
+ 6594 "10010001" // /* MW 2 */
+ 6595 "01110011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+.aggressive_scheduled_block_id 7
+.nohwbrkpt
+.noswbrkpt
+ 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6597 "00001001" // /* MW 7 */
+ 6598 "01101000" // /* MW 6 */
+ 6599 "10011011" // /* MW 5 */
+ 6600 "11100110" // /* MW 4 */
+ 6601 "10100000" // /* MW 3 */
+ 6602 "10001000" // /* MW 2 */
+ 6603 "00000010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 143 15 first
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+.src_ref 2 "conv2d_bf16.h" 1428 39
+.aggressive_scheduled_block_id 7
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6605 "00001001" // /* MW 9 */
+ 6606 "01000110" // /* MW 8 */
+ 6607 "10011010" // /* MW 7 */
+ 6608 "11100110" // /* MW 6 */
+ 6609 "10000000" // /* MW 5 */
+ 6610 "10011011" // /* MW 4 */
+ 6611 "00100000" // /* MW 3 */
+ 6612 "10110111" // /* MW 2 */
+ 6613 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 801 30 first
+ 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6615 "01011011" // /* MW 3 */
+ 6616 "00001011" // /* MW 2 */
+ 6617 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 802 30 first
+ 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6619 "01011111" // /* MW 3 */
+ 6620 "10001011" // /* MW 2 */
+ 6621 "00011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 803 30 first
+.src_ref 2 "conv2d_bf16.h" 807 26 first
+ 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6623 "00001001" // /* MW 7 */
+ 6624 "00000100" // /* MW 6 */
+ 6625 "10011000" // /* MW 5 */
+ 6626 "11000110" // /* MW 4 */
+ 6627 "01011011" // /* MW 3 */
+ 6628 "10111100" // /* MW 2 */
+ 6629 "00000001" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 804 30 first
+.src_ref 2 "conv2d_bf16.h" 808 26 first
+ 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6631 "00001001" // /* MW 7 */
+ 6632 "00101010" // /* MW 6 */
+ 6633 "10011001" // /* MW 5 */
+ 6634 "11000110" // /* MW 4 */
+ 6635 "01011111" // /* MW 3 */
+ 6636 "00111100" // /* MW 2 */
+ 6637 "00000010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 809 26 first
+ 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6639 "00001001" // /* MW 3 */
+ 6640 "01000110" // /* MW 2 */
+ 6641 "10011010" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 810 26 first
+ 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6643 "00001001" // /* MW 3 */
+ 6644 "01101000" // /* MW 2 */
+ 6645 "10011011" // /* MW 1 */
+ 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6647 "00000000" // /* MW 1 */
+ 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6649 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+ 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6651 "00010110" // /* MW 3 */
+ 6652 "00010000" // /* MW 2 */
+ 6653 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+ 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6655 "10010110" // /* MW 3 */
+ 6656 "10010000" // /* MW 2 */
+ 6657 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_bf16.h" 1455 20 first
+ 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */
+ 6659 "01100001" // /* MW 9 */
+ 6660 "00000000" // /* MW 8 */
+ 6661 "00000000" // /* MW 7 */
+ 6662 "01001110" // /* MW 6 */
+ 6663 "00000011" // /* MW 5 */
+ 6664 "00101010" // /* MW 4 */
+ 6665 "11000000" // /* MW 3 */
+ 6666 "00011010" // /* MW 2 */
+ 6667 "00010010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.delay_slot
+ 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6669 "01010110" // /* MW 3 */
+ 6670 "00010000" // /* MW 2 */
+ 6671 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6673 "10010110" // /* MW 3 */
+ 6674 "00010001" // /* MW 2 */
+ 6675 "00001001" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6677 "11010110" // /* MW 3 */
+ 6678 "10010001" // /* MW 2 */
+ 6679 "00001011" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6681 "00010110" // /* MW 3 */
+ 6682 "10010001" // /* MW 2 */
+ 6683 "00001010" // /* MW 1 */
+.src_ref 5 "accum.hpp" 149 115
+.src_ref 5 "accum.hpp" 1110 102
+.delay_slot
+ 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6685 "01010110" // /* MW 3 */
+ 6686 "00010001" // /* MW 2 */
+ 6687 "00001100" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6689 "11101100" // /* MW 3 */
+ 6690 "11011100" // /* MW 2 */
+ 6691 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+ 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6693 "11101100" // /* MW 3 */
+ 6694 "10001100" // /* MW 2 */
+ 6695 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+ 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6697 "01110000" // /* MW 7 */
+ 6698 "01110110" // /* MW 6 */
+ 6699 "10101010" // /* MW 5 */
+ 6700 "00000010" // /* MW 4 */
+ 6701 "01100000" // /* MW 3 */
+ 6702 "01011010" // /* MW 2 */
+ 6703 "10101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+ 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6705 "01110000" // /* MW 7 */
+ 6706 "01110110" // /* MW 6 */
+ 6707 "01011010" // /* MW 5 */
+ 6708 "00000000" // /* MW 4 */
+ 6709 "01100000" // /* MW 3 */
+ 6710 "10001010" // /* MW 2 */
+ 6711 "10000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+ 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */
+ 6713 "00100001" // /* MW 9 */
+ 6714 "00000000" // /* MW 8 */
+ 6715 "00000000" // /* MW 7 */
+ 6716 "01010010" // /* MW 6 */
+ 6717 "00000011" // /* MW 5 */
+ 6718 "00000000" // /* MW 4 */
+ 6719 "01100000" // /* MW 3 */
+ 6720 "11010010" // /* MW 2 */
+ 6721 "10100000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6723 "01110000" // /* MW 7 */
+ 6724 "01110110" // /* MW 6 */
+ 6725 "10001010" // /* MW 5 */
+ 6726 "00000010" // /* MW 4 */
+ 6727 "01100000" // /* MW 3 */
+ 6728 "10001010" // /* MW 2 */
+ 6729 "10100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6731 "11101100" // /* MW 3 */
+ 6732 "10111100" // /* MW 2 */
+ 6733 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.src_ref 4 "max_min.hpp" 20 104
+.delay_slot
+ 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6735 "01110000" // /* MW 7 */
+ 6736 "01110110" // /* MW 6 */
+ 6737 "10010110" // /* MW 5 */
+ 6738 "00000010" // /* MW 4 */
+ 6739 "01100000" // /* MW 3 */
+ 6740 "01010010" // /* MW 2 */
+ 6741 "01101100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6743 "01110010" // /* MW 9 */
+ 6744 "01110110" // /* MW 8 */
+ 6745 "00100010" // /* MW 7 */
+ 6746 "00000010" // /* MW 6 */
+ 6747 "01010011" // /* MW 5 */
+ 6748 "00010100" // /* MW 4 */
+ 6749 "11110111" // /* MW 3 */
+ 6750 "00101100" // /* MW 2 */
+ 6751 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43 first
+.delay_slot
+ 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6753 "00000000" // /* MW 15 */
+ 6754 "00000000" // /* MW 14 */
+ 6755 "01111000" // /* MW 13 */
+ 6756 "10100101" // /* MW 12 */
+ 6757 "00000001" // /* MW 11 */
+ 6758 "00000000" // /* MW 10 */
+ 6759 "00000000" // /* MW 9 */
+ 6760 "00000000" // /* MW 8 */
+ 6761 "10010011" // /* MW 7 */
+ 6762 "11100010" // /* MW 6 */
+ 6763 "00100100" // /* MW 5 */
+ 6764 "00000000" // /* MW 4 */
+ 6765 "11110000" // /* MW 3 */
+ 6766 "00101100" // /* MW 2 */
+ 6767 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304
+.src_ref 4 "vector.hpp" 1152 43
+ 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6769 "10100011" // /* MW 3 */
+ 6770 "01100000" // /* MW 2 */
+ 6771 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6773 "11100011" // /* MW 3 */
+ 6774 "00010100" // /* MW 2 */
+ 6775 "00001100" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6777 "00100011" // /* MW 3 */
+ 6778 "00000100" // /* MW 2 */
+ 6779 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6781 "01100011" // /* MW 3 */
+ 6782 "00010100" // /* MW 2 */
+ 6783 "00001101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6785 "10100011" // /* MW 3 */
+ 6786 "01100001" // /* MW 2 */
+ 6787 "00001011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6789 "11100011" // /* MW 3 */
+ 6790 "00010101" // /* MW 2 */
+ 6791 "00001111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1152 43
+ 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6793 "01110000" // /* MW 7 */
+ 6794 "10100101" // /* MW 6 */
+ 6795 "00000001" // /* MW 5 */
+ 6796 "00000000" // /* MW 4 */
+ 6797 "01100000" // /* MW 3 */
+ 6798 "00100100" // /* MW 2 */
+ 6799 "10011100" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336
+.src_ref 4 "vector.hpp" 1152 43
+.src_ref 2 "conv2d_bf16.h" 1337 12 first
+ 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 6801 "01000000" // /* MW 5 */
+ 6802 "11110101" // /* MW 4 */
+ 6803 "01101110" // /* MW 3 */
+ 6804 "11000010" // /* MW 2 */
+ 6805 "01100010" // /* MW 1 */
+.delay_slot
+ 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6807 "10010000" // /* MW 3 */
+ 6808 "10001011" // /* MW 2 */
+ 6809 "00111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6811 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6813 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6815 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 6817 "00000000" // /* MW 15 */
+ 6818 "00000000" // /* MW 14 */
+ 6819 "01111000" // /* MW 13 */
+ 6820 "10100101" // /* MW 12 */
+ 6821 "00000001" // /* MW 11 */
+ 6822 "00000000" // /* MW 10 */
+ 6823 "00000000" // /* MW 9 */
+ 6824 "00000000" // /* MW 8 */
+ 6825 "01011011" // /* MW 7 */
+ 6826 "00000001" // /* MW 6 */
+ 6827 "00100000" // /* MW 5 */
+ 6828 "00000000" // /* MW 4 */
+ 6829 "11110000" // /* MW 3 */
+ 6830 "00101100" // /* MW 2 */
+ 6831 "00000000" // /* MW 1 */
+.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368
+.loop_nesting 0
+ 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6833 "11110001" // /* MW 3 */
+ 6834 "11101101" // /* MW 2 */
+ 6835 "00000111" // /* MW 1 */
+ 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6837 "10010001" // /* MW 3 */
+ 6838 "11110001" // /* MW 2 */
+ 6839 "00000111" // /* MW 1 */
+ 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6841 "00110001" // /* MW 3 */
+ 6842 "11110101" // /* MW 2 */
+ 6843 "00000111" // /* MW 1 */
+ 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6845 "00011001" // /* MW 3 */
+ 6846 "11101011" // /* MW 2 */
+ 6847 "00000111" // /* MW 1 */
+ 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6849 "10011001" // /* MW 3 */
+ 6850 "11111011" // /* MW 2 */
+ 6851 "00000111" // /* MW 1 */
+ 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6853 "11010001" // /* MW 3 */
+ 6854 "11111101" // /* MW 2 */
+ 6855 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873 first
+ 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 6857 "00000000" // /* MW 3 */
+ 6858 "00101000" // /* MW 2 */
+ 6859 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_bf16.h" 1873
+.delay_slot
+ 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6861 "00000001" // /* MW 5 */
+ 6862 "00000000" // /* MW 4 */
+ 6863 "00000000" // /* MW 3 */
+ 6864 "11110000" // /* MW 2 */
+ 6865 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6869 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6871 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end
+.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0
+ 6873 "00000000" // /* MW 1 */
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 74 first
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 81 4
+.function_start
+ 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6881 "00010000" // /* MW 9 */
+ 6882 "00100000" // /* MW 8 */
+ 6883 "00110010" // /* MW 7 */
+ 6884 "11110010" // /* MW 6 */
+ 6885 "00000001" // /* MW 5 */
+ 6886 "00000000" // /* MW 4 */
+ 6887 "00000000" // /* MW 3 */
+ 6888 "00100000" // /* MW 2 */
+ 6889 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6 first
+.src_ref 7 "superkernels.cpp" 81 4
+ 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6891 "01111000" // /* MW 9 */
+ 6892 "11010000" // /* MW 8 */
+ 6893 "01001011" // /* MW 7 */
+ 6894 "00001000" // /* MW 6 */
+ 6895 "00010000" // /* MW 5 */
+ 6896 "00000000" // /* MW 4 */
+ 6897 "11010000" // /* MW 3 */
+ 6898 "11000010" // /* MW 2 */
+ 6899 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 74
+ 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6901 "00000001" // /* MW 5 */
+ 6902 "00000000" // /* MW 4 */
+ 6903 "00000000" // /* MW 3 */
+ 6904 "00001000" // /* MW 2 */
+ 6905 "00000000" // /* MW 1 */
+ 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6907 "01010101" // /* MW 3 */
+ 6908 "11110000" // /* MW 2 */
+ 6909 "00001111" // /* MW 1 */
+ 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6911 "00000000" // /* MW 1 */
+ 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6913 "00000000" // /* MW 1 */
+ 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6915 "00000000" // /* MW 1 */
+ 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6917 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 79 6
+.src_ref 7 "superkernels.cpp" 79 16
+ 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */
+ 6919 "00000001" // /* MW 5 */
+ 6920 "01000000" // /* MW 4 */
+ 6921 "11011000" // /* MW 3 */
+ 6922 "00001101" // /* MW 2 */
+ 6923 "10000000" // /* MW 1 */
+.delay_slot
+ 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6925 "10011101" // /* MW 3 */
+ 6926 "11111011" // /* MW 2 */
+ 6927 "00001111" // /* MW 1 */
+.delay_slot
+ 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6929 "00011101" // /* MW 3 */
+ 6930 "11111111" // /* MW 2 */
+ 6931 "00001111" // /* MW 1 */
+.delay_slot
+ 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6933 "10011101" // /* MW 3 */
+ 6934 "11101101" // /* MW 2 */
+ 6935 "00001111" // /* MW 1 */
+.delay_slot
+ 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6937 "00111101" // /* MW 3 */
+ 6938 "11110100" // /* MW 2 */
+ 6939 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6940 "01000100" // MOVXM r15, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 6941 "00000000" // /* MW 5 */
+ 6942 "10101100" // /* MW 4 */
+ 6943 "11000111" // /* MW 3 */
+ 6944 "00000111" // /* MW 2 */
+ 6945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 6947 "00010001" // /* MW 9 */
+ 6948 "00110100" // /* MW 8 */
+ 6949 "10110010" // /* MW 7 */
+ 6950 "11110011" // /* MW 6 */
+ 6951 "00000001" // /* MW 5 */
+ 6952 "00000000" // /* MW 4 */
+ 6953 "01100000" // /* MW 3 */
+ 6954 "10010001" // /* MW 2 */
+ 6955 "11010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6957 "00010000" // /* MW 11 */
+ 6958 "00110010" // /* MW 10 */
+ 6959 "10110010" // /* MW 9 */
+ 6960 "11110011" // /* MW 8 */
+ 6961 "00000001" // /* MW 7 */
+ 6962 "00000000" // /* MW 6 */
+ 6963 "00001011" // /* MW 5 */
+ 6964 "10001111" // /* MW 4 */
+ 6965 "11100001" // /* MW 3 */
+ 6966 "11000000" // /* MW 2 */
+ 6967 "11100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6969 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6971 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */
+ 6973 "00000001" // /* MW 5 */
+ 6974 "00000000" // /* MW 4 */
+ 6975 "01100000" // /* MW 3 */
+ 6976 "00000101" // /* MW 2 */
+ 6977 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 6979 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6981 "00110001" // /* MW 3 */
+ 6982 "00100000" // /* MW 2 */
+ 6983 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 6985 "00000101" // /* MW 3 */
+ 6986 "00100000" // /* MW 2 */
+ 6987 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 6989 "01110000" // /* MW 7 */
+ 6990 "01100000" // /* MW 6 */
+ 6991 "10110000" // /* MW 5 */
+ 6992 "00000011" // /* MW 4 */
+ 6993 "00110000" // /* MW 3 */
+ 6994 "11000010" // /* MW 2 */
+ 6995 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.delay_slot
+ 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 6997 "01110000" // /* MW 11 */
+ 6998 "01100000" // /* MW 10 */
+ 6999 "00110010" // /* MW 9 */
+ 7000 "00000000" // /* MW 8 */
+ 7001 "01011011" // /* MW 7 */
+ 7002 "00000001" // /* MW 6 */
+ 7003 "00100000" // /* MW 5 */
+ 7004 "00000000" // /* MW 4 */
+ 7005 "11110000" // /* MW 3 */
+ 7006 "00101100" // /* MW 2 */
+ 7007 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 81 4
+.return_address
+ 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7009 "10000101" // /* MW 3 */
+ 7010 "01100111" // /* MW 2 */
+ 7011 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19
+.src_ref 7 "superkernels.cpp" 87 35 first
+ 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7013 "00010000" // /* MW 9 */
+ 7014 "00100010" // /* MW 8 */
+ 7015 "10110010" // /* MW 7 */
+ 7016 "11110000" // /* MW 6 */
+ 7017 "00000001" // /* MW 5 */
+ 7018 "00000000" // /* MW 4 */
+ 7019 "01010000" // /* MW 3 */
+ 7020 "11000001" // /* MW 2 */
+ 7021 "01001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 37 first
+.src_ref 7 "superkernels.cpp" 89 13
+ 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7023 "00010000" // /* MW 9 */
+ 7024 "00110000" // /* MW 8 */
+ 7025 "00110010" // /* MW 7 */
+ 7026 "11110000" // /* MW 6 */
+ 7027 "00000001" // /* MW 5 */
+ 7028 "00000000" // /* MW 4 */
+ 7029 "01010000" // /* MW 3 */
+ 7030 "11001111" // /* MW 2 */
+ 7031 "01000011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 73
+ 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7033 "00111010" // /* MW 3 */
+ 7034 "00000110" // /* MW 2 */
+ 7035 "00000010" // /* MW 1 */
+ 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7037 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 110
+ 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7039 "01011010" // /* MW 3 */
+ 7040 "00010110" // /* MW 2 */
+ 7041 "00000010" // /* MW 1 */
+ 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7043 "00000000" // /* MW 1 */
+ 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7045 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 87 19 first
+.src_ref 7 "superkernels.cpp" 113 2
+ 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7047 "01110000" // /* MW 7 */
+ 7048 "01100000" // /* MW 6 */
+ 7049 "10110110" // /* MW 5 */
+ 7050 "00000000" // /* MW 4 */
+ 7051 "00110000" // /* MW 3 */
+ 7052 "11000010" // /* MW 2 */
+ 7053 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 57 first
+ 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7055 "00001111" // /* MW 3 */
+ 7056 "11100001" // /* MW 2 */
+ 7057 "00010100" // /* MW 1 */
+ 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7059 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 88 94
+ 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7061 "00001111" // /* MW 3 */
+ 7062 "01100001" // /* MW 2 */
+ 7063 "00010100" // /* MW 1 */
+ 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7065 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 28 first
+ 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7067 "00001111" // /* MW 3 */
+ 7068 "10100001" // /* MW 2 */
+ 7069 "00010100" // /* MW 1 */
+ 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7071 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 89 13
+.src_ref 7 "superkernels.cpp" 113 2
+ 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 7073 "00000000" // /* MW 15 */
+ 7074 "00000000" // /* MW 14 */
+ 7075 "01111000" // /* MW 13 */
+ 7076 "01100000" // /* MW 12 */
+ 7077 "00110111" // /* MW 11 */
+ 7078 "00000000" // /* MW 10 */
+ 7079 "00000000" // /* MW 9 */
+ 7080 "10000000" // /* MW 8 */
+ 7081 "00010001" // /* MW 7 */
+ 7082 "00000110" // /* MW 6 */
+ 7083 "00100000" // /* MW 5 */
+ 7084 "00000000" // /* MW 4 */
+ 7085 "11110000" // /* MW 3 */
+ 7086 "00101100" // /* MW 2 */
+ 7087 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 106 12
+.src_ref 7 "superkernels.cpp" 113 2
+.src_ref 7 "superkernels.cpp" 117 6
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 1 "io_buffer_main.h" 218 49
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7089 "00010000" // /* MW 9 */
+ 7090 "00100100" // /* MW 8 */
+ 7091 "00110010" // /* MW 7 */
+ 7092 "11110011" // /* MW 6 */
+ 7093 "00000001" // /* MW 5 */
+ 7094 "00000000" // /* MW 4 */
+ 7095 "00100000" // /* MW 3 */
+ 7096 "10111110" // /* MW 2 */
+ 7097 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.src_ref 7 "superkernels.cpp" 108 13
+ 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7099 "00010000" // /* MW 9 */
+ 7100 "00100110" // /* MW 8 */
+ 7101 "00110010" // /* MW 7 */
+ 7102 "11110001" // /* MW 6 */
+ 7103 "00000001" // /* MW 5 */
+ 7104 "00000000" // /* MW 4 */
+ 7105 "11010000" // /* MW 3 */
+ 7106 "11000010" // /* MW 2 */
+ 7107 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11
+.src_ref 7 "superkernels.cpp" 108 13 first
+.src_ref 7 "superkernels.cpp" 139 6
+.src_ref 7 "superkernels.cpp" 140 14
+ 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7109 "00010000" // /* MW 9 */
+ 7110 "00100000" // /* MW 8 */
+ 7111 "10110010" // /* MW 7 */
+ 7112 "11110011" // /* MW 6 */
+ 7113 "00000001" // /* MW 5 */
+ 7114 "00000000" // /* MW 4 */
+ 7115 "11010000" // /* MW 3 */
+ 7116 "11000110" // /* MW 2 */
+ 7117 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+ 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7119 "01010110" // /* MW 3 */
+ 7120 "00000110" // /* MW 2 */
+ 7121 "00000111" // /* MW 1 */
+ 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7123 "00000000" // /* MW 1 */
+ 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7125 "00000000" // /* MW 1 */
+ 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7127 "00000000" // /* MW 1 */
+ 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7129 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 110 6 first
+.src_ref 7 "superkernels.cpp" 110 17 first
+ 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */
+ 7131 "00000001" // /* MW 5 */
+ 7132 "01000000" // /* MW 4 */
+ 7133 "00011000" // /* MW 3 */
+ 7134 "00001110" // /* MW 2 */
+ 7135 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 108 13 first
+.delay_slot
+ 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7137 "00000111" // /* MW 3 */
+ 7138 "01100010" // /* MW 2 */
+ 7139 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.src_ref 7 "superkernels.cpp" 108 13
+.delay_slot
+ 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7141 "00001110" // /* MW 5 */
+ 7142 "01000100" // /* MW 4 */
+ 7143 "00111001" // /* MW 3 */
+ 7144 "11000110" // /* MW 2 */
+ 7145 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12 first
+.delay_slot
+ 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7147 "00000111" // /* MW 3 */
+ 7148 "00100110" // /* MW 2 */
+ 7149 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 106 12
+.delay_slot
+ 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7151 "01110001" // /* MW 3 */
+ 7152 "00000110" // /* MW 2 */
+ 7153 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 107 11 first
+.delay_slot
+ 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7155 "00110001" // /* MW 3 */
+ 7156 "00000110" // /* MW 2 */
+ 7157 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7159 "10000110" // /* MW 3 */
+ 7160 "01100111" // /* MW 2 */
+ 7161 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7163 "01110110" // /* MW 3 */
+ 7164 "11111111" // /* MW 2 */
+ 7165 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7167 "00010110" // /* MW 3 */
+ 7168 "11111110" // /* MW 2 */
+ 7169 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7171 "00110110" // /* MW 3 */
+ 7172 "11111110" // /* MW 2 */
+ 7173 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7175 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7177 "00010110" // /* MW 3 */
+ 7178 "01000110" // /* MW 2 */
+ 7179 "00000010" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7181 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7183 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7185 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7187 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7189 "00000010" // /* MW 3 */
+ 7190 "01100001" // /* MW 2 */
+ 7191 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7193 "00010001" // /* MW 3 */
+ 7194 "00000110" // /* MW 2 */
+ 7195 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7197 "11111101" // /* MW 3 */
+ 7198 "11100010" // /* MW 2 */
+ 7199 "00010111" // /* MW 1 */
+ 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7201 "00000000" // /* MW 1 */
+ 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7203 "00000000" // /* MW 1 */
+ 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7205 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7207 "00011000" // /* MW 9 */
+ 7208 "00010011" // /* MW 8 */
+ 7209 "00000100" // /* MW 7 */
+ 7210 "00000000" // /* MW 6 */
+ 7211 "01011011" // /* MW 5 */
+ 7212 "00000001" // /* MW 4 */
+ 7213 "11110000" // /* MW 3 */
+ 7214 "00101100" // /* MW 2 */
+ 7215 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336
+.src_ref 7 "superkernels.cpp" 113 2 first
+.no_stack_arguments
+ 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */
+ 7217 "00000001" // /* MW 5 */
+ 7218 "00000000" // /* MW 4 */
+ 7219 "10111000" // /* MW 3 */
+ 7220 "00001000" // /* MW 2 */
+ 7221 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7222 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7223 "00000000" // /* MW 5 */
+ 7224 "11001100" // /* MW 4 */
+ 7225 "11000110" // /* MW 3 */
+ 7226 "00000111" // /* MW 2 */
+ 7227 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7229 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7233 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 113 2
+.delay_slot
+ 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7235 "00011100" // /* MW 13 */
+ 7236 "00000000" // /* MW 12 */
+ 7237 "00000000" // /* MW 11 */
+ 7238 "00000111" // /* MW 10 */
+ 7239 "00111101" // /* MW 9 */
+ 7240 "01010011" // /* MW 8 */
+ 7241 "00000000" // /* MW 7 */
+ 7242 "00000000" // /* MW 6 */
+ 7243 "10110110" // /* MW 5 */
+ 7244 "00000010" // /* MW 4 */
+ 7245 "11110000" // /* MW 3 */
+ 7246 "00101100" // /* MW 2 */
+ 7247 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6 first
+.src_ref 7 "superkernels.cpp" 117 20
+.return_address
+ 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7249 "00010000" // /* MW 9 */
+ 7250 "00100010" // /* MW 8 */
+ 7251 "10110010" // /* MW 7 */
+ 7252 "11110000" // /* MW 6 */
+ 7253 "00000001" // /* MW 5 */
+ 7254 "00000000" // /* MW 4 */
+ 7255 "11010000" // /* MW 3 */
+ 7256 "11000010" // /* MW 2 */
+ 7257 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 20
+ 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7259 "00110110" // /* MW 3 */
+ 7260 "00000110" // /* MW 2 */
+ 7261 "00000001" // /* MW 1 */
+ 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7263 "00010001" // /* MW 3 */
+ 7264 "11110000" // /* MW 2 */
+ 7265 "00000111" // /* MW 1 */
+ 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7267 "00000000" // /* MW 1 */
+ 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7269 "00000000" // /* MW 1 */
+ 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7271 "00000000" // /* MW 1 */
+ 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7273 "00000000" // /* MW 1 */
+ 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7275 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 17
+ 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7277 "00001000" // /* MW 3 */
+ 7278 "01100001" // /* MW 2 */
+ 7279 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 117 6
+ 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */
+ 7281 "00000001" // /* MW 5 */
+ 7282 "01000000" // /* MW 4 */
+ 7283 "01100000" // /* MW 3 */
+ 7284 "00001110" // /* MW 2 */
+ 7285 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15
+.src_ref 7 "superkernels.cpp" 140 14
+.delay_slot
+ 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7287 "00000001" // /* MW 3 */
+ 7288 "00110000" // /* MW 2 */
+ 7289 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7291 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7293 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7295 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7297 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7299 "00010100" // /* MW 5 */
+ 7300 "11001111" // /* MW 4 */
+ 7301 "10100010" // /* MW 3 */
+ 7302 "00000000" // /* MW 2 */
+ 7303 "00000100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7305 "00110110" // /* MW 3 */
+ 7306 "00000110" // /* MW 2 */
+ 7307 "00000001" // /* MW 1 */
+ 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7309 "00000000" // /* MW 1 */
+ 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7311 "00000000" // /* MW 1 */
+ 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7313 "00000000" // /* MW 1 */
+ 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7315 "00000000" // /* MW 1 */
+ 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7317 "00000000" // /* MW 1 */
+ 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7319 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7321 "00001000" // /* MW 3 */
+ 7322 "01010001" // /* MW 2 */
+ 7323 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 136 15 first
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7325 "00100011" // /* MW 5 */
+ 7326 "00001110" // /* MW 4 */
+ 7327 "11011100" // /* MW 3 */
+ 7328 "11000110" // /* MW 2 */
+ 7329 "00111100" // /* MW 1 */
+ 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7331 "00000000" // /* MW 1 */
+ 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7333 "00000000" // /* MW 1 */
+ 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7335 "00000000" // /* MW 1 */
+ 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7337 "00000000" // /* MW 1 */
+ 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7339 "00000000" // /* MW 1 */
+ 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7341 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7343 "00010001" // /* MW 3 */
+ 7344 "00100001" // /* MW 2 */
+ 7345 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 7347 "00011100" // /* MW 13 */
+ 7348 "00000000" // /* MW 12 */
+ 7349 "00000000" // /* MW 11 */
+ 7350 "01010111" // /* MW 10 */
+ 7351 "00011010" // /* MW 9 */
+ 7352 "01000000" // /* MW 8 */
+ 7353 "00000000" // /* MW 7 */
+ 7354 "00000000" // /* MW 6 */
+ 7355 "00100011" // /* MW 5 */
+ 7356 "11001100" // /* MW 4 */
+ 7357 "11110011" // /* MW 3 */
+ 7358 "00101100" // /* MW 2 */
+ 7359 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480
+.src_ref 7 "superkernels.cpp" 139 6 first
+.src_ref 7 "superkernels.cpp" 139 19
+ 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7361 "00010000" // /* MW 9 */
+ 7362 "00110000" // /* MW 8 */
+ 7363 "00110010" // /* MW 7 */
+ 7364 "11110011" // /* MW 6 */
+ 7365 "00000001" // /* MW 5 */
+ 7366 "00000000" // /* MW 4 */
+ 7367 "11010000" // /* MW 3 */
+ 7368 "11000010" // /* MW 2 */
+ 7369 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 19
+ 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7371 "00110110" // /* MW 3 */
+ 7372 "00000110" // /* MW 2 */
+ 7373 "00000110" // /* MW 1 */
+ 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7375 "10011001" // /* MW 3 */
+ 7376 "11111000" // /* MW 2 */
+ 7377 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+ 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7379 "00111001" // /* MW 3 */
+ 7380 "11110100" // /* MW 2 */
+ 7381 "00000111" // /* MW 1 */
+ 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7383 "00000000" // /* MW 1 */
+ 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7385 "00000000" // /* MW 1 */
+ 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7387 "00000000" // /* MW 1 */
+ 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7389 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 16
+ 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7391 "00001000" // /* MW 3 */
+ 7392 "01100001" // /* MW 2 */
+ 7393 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 139 6
+ 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */
+ 7395 "00000001" // /* MW 5 */
+ 7396 "01000000" // /* MW 4 */
+ 7397 "10000000" // /* MW 3 */
+ 7398 "00001110" // /* MW 2 */
+ 7399 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7407 "00000000" // /* MW 1 */
+.delay_slot
+ 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7409 "00100000" // /* MW 3 */
+ 7410 "11010000" // /* MW 2 */
+ 7411 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 140 14 first
+ 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 7413 "11000001" // /* MW 11 */
+ 7414 "10001000" // /* MW 10 */
+ 7415 "10000011" // /* MW 9 */
+ 7416 "00000011" // /* MW 8 */
+ 7417 "00000000" // /* MW 7 */
+ 7418 "00000000" // /* MW 6 */
+ 7419 "00100000" // /* MW 5 */
+ 7420 "00000000" // /* MW 4 */
+ 7421 "11110000" // /* MW 3 */
+ 7422 "00101100" // /* MW 2 */
+ 7423 "00000000" // /* MW 1 */
+.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544
+ 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7425 "00011001" // /* MW 3 */
+ 7426 "11111111" // /* MW 2 */
+ 7427 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142 first
+ 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7429 "00000000" // /* MW 3 */
+ 7430 "00101000" // /* MW 2 */
+ 7431 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 142
+.delay_slot
+ 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7433 "00000001" // /* MW 5 */
+ 7434 "00000000" // /* MW 4 */
+ 7435 "00000000" // /* MW 3 */
+ 7436 "11111000" // /* MW 2 */
+ 7437 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7443 "00000000" // /* MW 1 */
+.delay_slot
+ 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7445 "10001011" // /* MW 3 */
+ 7446 "10000100" // /* MW 2 */
+.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 7447 "00001111" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7457 "00000001" // /* MW 5 */
+ 7458 "00100001" // /* MW 4 */
+ 7459 "00000000" // /* MW 3 */
+ 7460 "00000000" // /* MW 2 */
+ 7461 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7463 "11000000" // /* MW 3 */
+ 7464 "01010000" // /* MW 2 */
+ 7465 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7467 "10010000" // /* MW 3 */
+ 7468 "01100000" // /* MW 2 */
+ 7469 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7471 "00010001" // /* MW 3 */
+ 7472 "00000100" // /* MW 2 */
+ 7473 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7475 "00010001" // /* MW 3 */
+ 7476 "00010100" // /* MW 2 */
+ 7477 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7479 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7489 "00101110" // /* MW 3 */
+ 7490 "00011100" // /* MW 2 */
+ 7491 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7493 "00000001" // /* MW 5 */
+ 7494 "00000000" // /* MW 4 */
+ 7495 "00000000" // /* MW 3 */
+ 7496 "00001000" // /* MW 2 */
+ 7497 "00000000" // /* MW 1 */
+ 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7499 "00111101" // /* MW 3 */
+ 7500 "11111000" // /* MW 2 */
+ 7501 "00001111" // /* MW 1 */
+ 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7503 "11110101" // /* MW 3 */
+ 7504 "11111101" // /* MW 2 */
+ 7505 "00001111" // /* MW 1 */
+ 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7507 "00000000" // /* MW 1 */
+ 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7509 "00000000" // /* MW 1 */
+ 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7511 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7513 "00101001" // /* MW 3 */
+ 7514 "00011100" // /* MW 2 */
+ 7515 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7517 "00101110" // /* MW 3 */
+ 7518 "00011100" // /* MW 2 */
+ 7519 "00000001" // /* MW 1 */
+ 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7521 "00000000" // /* MW 1 */
+ 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7523 "00000000" // /* MW 1 */
+ 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7525 "00000000" // /* MW 1 */
+ 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7527 "00000000" // /* MW 1 */
+ 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7529 "00000000" // /* MW 1 */
+ 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7531 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7533 "00101001" // /* MW 3 */
+ 7534 "00011100" // /* MW 2 */
+ 7535 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7537 "00101110" // /* MW 3 */
+ 7538 "00000100" // /* MW 2 */
+ 7539 "00000001" // /* MW 1 */
+ 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7541 "00000000" // /* MW 1 */
+ 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7543 "00000000" // /* MW 1 */
+ 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7545 "00000000" // /* MW 1 */
+ 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7547 "00000000" // /* MW 1 */
+ 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7549 "00000000" // /* MW 1 */
+ 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7551 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7553 "00101001" // /* MW 3 */
+ 7554 "00011100" // /* MW 2 */
+ 7555 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7557 "00101110" // /* MW 3 */
+ 7558 "00010100" // /* MW 2 */
+ 7559 "00000001" // /* MW 1 */
+ 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7561 "00000000" // /* MW 1 */
+ 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7563 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */
+ 7565 "00000001" // /* MW 5 */
+ 7566 "00000000" // /* MW 4 */
+ 7567 "10010000" // /* MW 3 */
+ 7568 "00001110" // /* MW 2 */
+ 7569 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7571 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7573 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7575 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7577 "00101001" // /* MW 3 */
+ 7578 "11011100" // /* MW 2 */
+ 7579 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.delay_slot
+ 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7581 "11000000" // /* MW 3 */
+ 7582 "11010000" // /* MW 2 */
+ 7583 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "add_impl.h" 146 29
+.return_address
+ 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7585 "00001000" // /* MW 9 */
+ 7586 "11000100" // /* MW 8 */
+ 7587 "00110011" // /* MW 7 */
+ 7588 "01101000" // /* MW 6 */
+ 7589 "00000000" // /* MW 5 */
+ 7590 "00000001" // /* MW 4 */
+ 7591 "00100000" // /* MW 3 */
+ 7592 "00000111" // /* MW 2 */
+ 7593 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29
+.src_ref 8 "add_impl.h" 147 37
+.src_ref 8 "add_impl.h" 147 39
+ 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7595 "01011000" // /* MW 9 */
+ 7596 "11111101" // /* MW 8 */
+ 7597 "00000111" // /* MW 7 */
+ 7598 "00001000" // /* MW 6 */
+ 7599 "10000000" // /* MW 5 */
+ 7600 "00000001" // /* MW 4 */
+ 7601 "10000000" // /* MW 3 */
+ 7602 "11100010" // /* MW 2 */
+ 7603 "00000001" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29 first
+.src_ref 8 "add_impl.h" 147 39
+ 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7605 "00000001" // /* MW 9 */
+ 7606 "10100000" // /* MW 8 */
+ 7607 "00000111" // /* MW 7 */
+ 7608 "10000000" // /* MW 6 */
+ 7609 "00010001" // /* MW 5 */
+ 7610 "00001010" // /* MW 4 */
+ 7611 "00100000" // /* MW 3 */
+ 7612 "10111110" // /* MW 2 */
+ 7613 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 50 first
+ 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7615 "01001010" // /* MW 3 */
+ 7616 "00000110" // /* MW 2 */
+ 7617 "00000000" // /* MW 1 */
+ 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7619 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7621 "00000000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 37
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7623 "00010111" // /* MW 3 */
+ 7624 "00000010" // /* MW 2 */
+ 7625 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7627 "00000000" // /* MW 3 */
+ 7628 "00101000" // /* MW 2 */
+ 7629 "00010000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7631 "00000101" // /* MW 3 */
+ 7632 "00100010" // /* MW 2 */
+ 7633 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7635 "00000001" // /* MW 5 */
+ 7636 "00000000" // /* MW 4 */
+ 7637 "00000000" // /* MW 3 */
+ 7638 "11111000" // /* MW 2 */
+ 7639 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7641 "00100111" // /* MW 3 */
+ 7642 "01110111" // /* MW 2 */
+ 7643 "00010100" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 39
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7645 "10000010" // /* MW 3 */
+ 7646 "00100001" // /* MW 2 */
+ 7647 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7649 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 81 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25
+.function_start
+ 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7665 "01111000" // /* MW 9 */
+ 7666 "01100000" // /* MW 8 */
+ 7667 "00001000" // /* MW 7 */
+ 7668 "11001000" // /* MW 6 */
+ 7669 "00010000" // /* MW 5 */
+ 7670 "00000000" // /* MW 4 */
+ 7671 "10000000" // /* MW 3 */
+ 7672 "10000000" // /* MW 2 */
+ 7673 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 81
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33
+ 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7675 "00001100" // /* MW 5 */
+ 7676 "11000000" // /* MW 4 */
+ 7677 "10100000" // /* MW 3 */
+ 7678 "00000000" // /* MW 2 */
+ 7679 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+ 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7681 "01001010" // /* MW 3 */
+ 7682 "00001000" // /* MW 2 */
+ 7683 "00000000" // /* MW 1 */
+ 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7685 "00000000" // /* MW 1 */
+ 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7687 "00000000" // /* MW 1 */
+ 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7689 "00000000" // /* MW 1 */
+ 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7691 "00000000" // /* MW 1 */
+ 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7693 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first
+ 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 7695 "00000000" // /* MW 3 */
+ 7696 "00101000" // /* MW 2 */
+ 7697 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.delay_slot
+ 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7699 "00001000" // /* MW 3 */
+ 7700 "10000000" // /* MW 2 */
+ 7701 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first
+.delay_slot
+ 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7703 "00011101" // /* MW 3 */
+ 7704 "00000000" // /* MW 2 */
+ 7705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 83 23
+.delay_slot
+ 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7707 "11100000" // /* MW 5 */
+ 7708 "00001101" // /* MW 4 */
+ 7709 "00110001" // /* MW 3 */
+ 7710 "10000010" // /* MW 2 */
+ 7711 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 25
+.delay_slot
+ 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7713 "00011101" // /* MW 3 */
+ 7714 "11000100" // /* MW 2 */
+ 7715 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 82 23
+.delay_slot
+ 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7717 "01010001" // /* MW 3 */
+ 7718 "00000100" // /* MW 2 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0
+ 7719 "00001000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_broadcasting.h" 76
+.src_ref 3 "elementwise_binary_broadcasting.h" 76 first
+.function_start
+ 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7729 "00000001" // /* MW 5 */
+ 7730 "00000000" // /* MW 4 */
+ 7731 "00000000" // /* MW 3 */
+ 7732 "00001000" // /* MW 2 */
+ 7733 "00000000" // /* MW 1 */
+ 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7735 "00111101" // /* MW 3 */
+ 7736 "11111100" // /* MW 2 */
+ 7737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first
+.no_stack_arguments
+ 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */
+ 7739 "00000001" // /* MW 5 */
+ 7740 "00000000" // /* MW 4 */
+ 7741 "10100000" // /* MW 3 */
+ 7742 "00001110" // /* MW 2 */
+ 7743 "00000000" // /* MW 1 */
+.delay_slot
+ 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7745 "10011101" // /* MW 3 */
+ 7746 "11111011" // /* MW 2 */
+ 7747 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+ 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7749 "11000000" // /* MW 3 */
+ 7750 "01100000" // /* MW 2 */
+ 7751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7757 "01100111" // /* MW 3 */
+ 7758 "00000001" // /* MW 2 */
+ 7759 "00000000" // /* MW 1 */
+.return_address
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7761 "10011001" // /* MW 3 */
+ 7762 "11111011" // /* MW 2 */
+ 7763 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7765 "00111001" // /* MW 3 */
+ 7766 "11111100" // /* MW 2 */
+ 7767 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first
+.tail_call
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */
+ 7769 "00000000" // /* MW 5 */
+ 7770 "00000000" // /* MW 4 */
+ 7771 "11111000" // /* MW 3 */
+ 7772 "00001110" // /* MW 2 */
+ 7773 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 78 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7775 "11000000" // /* MW 3 */
+ 7776 "01101110" // /* MW 2 */
+ 7777 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first
+.delay_slot
+ 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7779 "00000001" // /* MW 5 */
+ 7780 "00000000" // /* MW 4 */
+ 7781 "00000000" // /* MW 3 */
+ 7782 "11111000" // /* MW 2 */
+ 7783 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7787 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 7789 "00000000" // /* MW 1 */
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_broadcasting.h" 89 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19
+.function_start
+ 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7793 "01010001" // /* MW 5 */
+ 7794 "00000000" // /* MW 4 */
+ 7795 "11010000" // /* MW 3 */
+ 7796 "10000010" // /* MW 2 */
+ 7797 "01100111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7799 "10000001" // /* MW 5 */
+ 7800 "11001101" // /* MW 4 */
+ 7801 "01011000" // /* MW 3 */
+ 7802 "00000101" // /* MW 2 */
+ 7803 "01100001" // /* MW 1 */
+ 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7805 "00000000" // /* MW 1 */
+ 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7807 "00000000" // /* MW 1 */
+ 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7809 "00000000" // /* MW 1 */
+ 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7811 "00000000" // /* MW 1 */
+ 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7813 "00000000" // /* MW 1 */
+ 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7815 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 12
+.src_ref 3 "elementwise_binary_broadcasting.h" 102 35
+ 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */
+ 7817 "00000001" // /* MW 5 */
+ 7818 "01000000" // /* MW 4 */
+ 7819 "01100000" // /* MW 3 */
+ 7820 "00001111" // /* MW 2 */
+ 7821 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78
+.delay_slot
+ 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7823 "11101001" // /* MW 3 */
+ 7824 "11000100" // /* MW 2 */
+ 7825 "00010111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first
+.delay_slot
+ 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7827 "00101101" // /* MW 3 */
+ 7828 "00000000" // /* MW 2 */
+ 7829 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7835 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first
+ 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7837 "00110010" // /* MW 3 */
+ 7838 "00000100" // /* MW 2 */
+ 7839 "00000000" // /* MW 1 */
+ 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7841 "00000000" // /* MW 1 */
+ 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7843 "00000000" // /* MW 1 */
+ 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7845 "00000000" // /* MW 1 */
+ 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */
+ 7847 "00000000" // /* MW 5 */
+ 7848 "00000000" // /* MW 4 */
+ 7849 "01110000" // /* MW 3 */
+ 7850 "00001111" // /* MW 2 */
+ 7851 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7853 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.delay_slot
+ 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7857 "01110010" // /* MW 3 */
+ 7858 "00000101" // /* MW 2 */
+ 7859 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7861 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7863 "00000000" // /* MW 9 */
+ 7864 "00000000" // /* MW 8 */
+ 7865 "00000000" // /* MW 7 */
+ 7866 "00000000" // /* MW 6 */
+ 7867 "00010011" // /* MW 5 */
+ 7868 "00000100" // /* MW 4 */
+ 7869 "11110000" // /* MW 3 */
+ 7870 "00101100" // /* MW 2 */
+ 7871 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80
+.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first
+ 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7873 "00110010" // /* MW 3 */
+ 7874 "00000100" // /* MW 2 */
+ 7875 "00000001" // /* MW 1 */
+ 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7877 "00000000" // /* MW 1 */
+ 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7879 "00000000" // /* MW 1 */
+ 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7881 "00000000" // /* MW 1 */
+ 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7883 "00000000" // /* MW 1 */
+ 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7885 "00000000" // /* MW 1 */
+ 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7887 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7889 "01110010" // /* MW 3 */
+ 7890 "00000101" // /* MW 2 */
+ 7891 "00011000" // /* MW 1 */
+ 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7893 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7895 "00000000" // /* MW 9 */
+ 7896 "00000000" // /* MW 8 */
+ 7897 "00000000" // /* MW 7 */
+ 7898 "00000000" // /* MW 6 */
+ 7899 "00010011" // /* MW 5 */
+ 7900 "00000100" // /* MW 4 */
+ 7901 "11110001" // /* MW 3 */
+ 7902 "00101100" // /* MW 2 */
+ 7903 "00000000" // /* MW 1 */
+.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first
+ 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7905 "01001000" // /* MW 9 */
+ 7906 "00111111" // /* MW 8 */
+ 7907 "10111000" // /* MW 7 */
+ 7908 "10001010" // /* MW 6 */
+ 7909 "00000111" // /* MW 5 */
+ 7910 "00000000" // /* MW 4 */
+ 7911 "11010000" // /* MW 3 */
+ 7912 "10000000" // /* MW 2 */
+ 7913 "10001010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 7915 "00010000" // /* MW 9 */
+ 7916 "10101000" // /* MW 8 */
+ 7917 "01111111" // /* MW 7 */
+ 7918 "00000100" // /* MW 6 */
+ 7919 "00000000" // /* MW 5 */
+ 7920 "00000000" // /* MW 4 */
+ 7921 "11010000" // /* MW 3 */
+ 7922 "10010000" // /* MW 2 */
+ 7923 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7925 "11100000" // /* MW 5 */
+ 7926 "11111110" // /* MW 4 */
+ 7927 "00010110" // /* MW 3 */
+ 7928 "00000000" // /* MW 2 */
+ 7929 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7931 "11010000" // /* MW 5 */
+ 7932 "11001000" // /* MW 4 */
+ 7933 "11001000" // /* MW 3 */
+ 7934 "00000111" // /* MW 2 */
+ 7935 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_broadcasting.h" 117 8
+ 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7937 "00100010" // /* MW 3 */
+ 7938 "00000100" // /* MW 2 */
+ 7939 "00000100" // /* MW 1 */
+ 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7941 "00000000" // /* MW 1 */
+ 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 7943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7945 "10101011" // /* MW 3 */
+ 7946 "00001000" // /* MW 2 */
+ 7947 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 189 20 first
+ 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7949 "00101011" // /* MW 3 */
+ 7950 "00101001" // /* MW 2 */
+ 7951 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+ 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7953 "00101011" // /* MW 3 */
+ 7954 "00001000" // /* MW 2 */
+ 7955 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7957 "00101011" // /* MW 3 */
+ 7958 "00101010" // /* MW 2 */
+ 7959 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 7961 "00000000" // /* MW 5 */
+ 7962 "11110101" // /* MW 4 */
+ 7963 "01110000" // /* MW 3 */
+ 7964 "00010101" // /* MW 2 */
+ 7965 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7967 "00111101" // /* MW 7 */
+ 7968 "00101000" // /* MW 6 */
+ 7969 "00000011" // /* MW 5 */
+ 7970 "00000100" // /* MW 4 */
+ 7971 "01110000" // /* MW 3 */
+ 7972 "00100101" // /* MW 2 */
+ 7973 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7975 "00101011" // /* MW 3 */
+ 7976 "00001000" // /* MW 2 */
+ 7977 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7979 "00111101" // /* MW 7 */
+ 7980 "00010000" // /* MW 6 */
+ 7981 "00000100" // /* MW 5 */
+ 7982 "00000100" // /* MW 4 */
+ 7983 "01110000" // /* MW 3 */
+ 7984 "01000101" // /* MW 2 */
+ 7985 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7987 "10101011" // /* MW 3 */
+ 7988 "00001000" // /* MW 2 */
+ 7989 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 7991 "00111101" // /* MW 7 */
+ 7992 "00101000" // /* MW 6 */
+ 7993 "00000011" // /* MW 5 */
+ 7994 "00000100" // /* MW 4 */
+ 7995 "01110000" // /* MW 3 */
+ 7996 "00100101" // /* MW 2 */
+ 7997 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 7999 "00101011" // /* MW 3 */
+ 8000 "00001000" // /* MW 2 */
+ 8001 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8003 "00111101" // /* MW 13 */
+ 8004 "00010000" // /* MW 12 */
+ 8005 "00000100" // /* MW 11 */
+ 8006 "01010111" // /* MW 10 */
+ 8007 "00011010" // /* MW 9 */
+ 8008 "01000000" // /* MW 8 */
+ 8009 "00000000" // /* MW 7 */
+ 8010 "00000000" // /* MW 6 */
+ 8011 "01000110" // /* MW 5 */
+ 8012 "00111011" // /* MW 4 */
+ 8013 "01110100" // /* MW 3 */
+ 8014 "01000101" // /* MW 2 */
+ 8015 "00100101" // /* MW 1 */
+.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8017 "10101011" // /* MW 3 */
+ 8018 "00001000" // /* MW 2 */
+ 8019 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8021 "00111101" // /* MW 11 */
+ 8022 "00101000" // /* MW 10 */
+ 8023 "00000011" // /* MW 9 */
+ 8024 "10001110" // /* MW 8 */
+ 8025 "00010001" // /* MW 7 */
+ 8026 "00001111" // /* MW 6 */
+ 8027 "00100001" // /* MW 5 */
+ 8028 "00000000" // /* MW 4 */
+ 8029 "01110000" // /* MW 3 */
+ 8030 "00100101" // /* MW 2 */
+ 8031 "00100101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8033 "00000000" // /* MW 15 */
+ 8034 "00000000" // /* MW 14 */
+ 8035 "01111000" // /* MW 13 */
+ 8036 "10100101" // /* MW 12 */
+ 8037 "00000001" // /* MW 11 */
+ 8038 "00000000" // /* MW 10 */
+ 8039 "00000000" // /* MW 9 */
+ 8040 "00000000" // /* MW 8 */
+ 8041 "01011011" // /* MW 7 */
+ 8042 "00000001" // /* MW 6 */
+ 8043 "00100000" // /* MW 5 */
+ 8044 "00000000" // /* MW 4 */
+ 8045 "01110000" // /* MW 3 */
+ 8046 "00000101" // /* MW 2 */
+ 8047 "00000001" // /* MW 1 */
+.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8049 "10000001" // /* MW 15 */
+ 8050 "00100000" // /* MW 14 */
+ 8051 "01111000" // /* MW 13 */
+ 8052 "10100101" // /* MW 12 */
+ 8053 "00000001" // /* MW 11 */
+ 8054 "00000000" // /* MW 10 */
+ 8055 "00000000" // /* MW 9 */
+ 8056 "00000000" // /* MW 8 */
+ 8057 "10100011" // /* MW 7 */
+ 8058 "00011101" // /* MW 6 */
+ 8059 "00100010" // /* MW 5 */
+ 8060 "00000000" // /* MW 4 */
+ 8061 "01110000" // /* MW 3 */
+ 8062 "01000101" // /* MW 2 */
+ 8063 "00100101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8065 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "add_accum.hpp" 19 92
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8067 "00111101" // /* MW 7 */
+ 8068 "00101000" // /* MW 6 */
+ 8069 "00000011" // /* MW 5 */
+ 8070 "00000010" // /* MW 4 */
+ 8071 "01100000" // /* MW 3 */
+ 8072 "11000100" // /* MW 2 */
+ 8073 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8075 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "add_accum.hpp" 19 92 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8077 "00111101" // /* MW 7 */
+ 8078 "00010000" // /* MW 6 */
+ 8079 "00000100" // /* MW 5 */
+ 8080 "00000010" // /* MW 4 */
+ 8081 "01100000" // /* MW 3 */
+ 8082 "10110100" // /* MW 2 */
+ 8083 "01000011" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8085 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8087 "00000000" // /* MW 5 */
+ 8088 "01010000" // /* MW 4 */
+ 8089 "01100000" // /* MW 3 */
+ 8090 "11000100" // /* MW 2 */
+ 8091 "01000011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8093 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8095 "10100011" // /* MW 3 */
+ 8096 "00011101" // /* MW 2 */
+ 8097 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8099 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8101 "00100011" // /* MW 3 */
+ 8102 "00011110" // /* MW 2 */
+ 8103 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8105 "00000000" // /* MW 1 */
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first
+.function_start
+ 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8113 "00000001" // /* MW 5 */
+ 8114 "00000000" // /* MW 4 */
+ 8115 "00000000" // /* MW 3 */
+ 8116 "00010000" // /* MW 2 */
+ 8117 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24
+ 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8119 "01110000" // /* MW 7 */
+ 8120 "01100000" // /* MW 6 */
+ 8121 "00001010" // /* MW 5 */
+ 8122 "00000010" // /* MW 4 */
+ 8123 "10110000" // /* MW 3 */
+ 8124 "10000111" // /* MW 2 */
+ 8125 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+ 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8127 "00000000" // /* MW 7 */
+ 8128 "00000011" // /* MW 6 */
+ 8129 "10110100" // /* MW 5 */
+ 8130 "00000001" // /* MW 4 */
+ 8131 "01100000" // /* MW 3 */
+ 8132 "10010001" // /* MW 2 */
+ 8133 "01010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+ 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8135 "10000001" // /* MW 5 */
+ 8136 "00100001" // /* MW 4 */
+ 8137 "01011000" // /* MW 3 */
+ 8138 "11101101" // /* MW 2 */
+ 8139 "01100101" // /* MW 1 */
+ 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8141 "11000001" // /* MW 5 */
+ 8142 "10101011" // /* MW 4 */
+ 8143 "01011000" // /* MW 3 */
+ 8144 "11001010" // /* MW 2 */
+ 8145 "01110011" // /* MW 1 */
+ 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8147 "11000000" // /* MW 3 */
+ 8148 "01101000" // /* MW 2 */
+ 8149 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+ 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8151 "00101011" // /* MW 3 */
+ 8152 "00000111" // /* MW 2 */
+ 8153 "00001000" // /* MW 1 */
+ 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8155 "01010111" // /* MW 3 */
+ 8156 "00000110" // /* MW 2 */
+ 8157 "00000000" // /* MW 1 */
+ 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8159 "00000000" // /* MW 1 */
+ 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8161 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first
+.no_stack_arguments
+ 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */
+ 8163 "00000001" // /* MW 5 */
+ 8164 "00000000" // /* MW 4 */
+ 8165 "00111000" // /* MW 3 */
+ 8166 "00001111" // /* MW 2 */
+ 8167 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.delay_slot
+ 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8169 "11000000" // /* MW 3 */
+ 8170 "01010000" // /* MW 2 */
+ 8171 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first
+.delay_slot
+ 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8175 "00010010" // /* MW 3 */
+ 8176 "00100101" // /* MW 2 */
+ 8177 "00010100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8179 "01000001" // /* MW 5 */
+ 8180 "11010010" // /* MW 4 */
+ 8181 "01000010" // /* MW 3 */
+ 8182 "00100000" // /* MW 2 */
+ 8183 "10001100" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8
+.delay_slot
+ 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8185 "01110000" // /* MW 7 */
+ 8186 "00010000" // /* MW 6 */
+ 8187 "00110100" // /* MW 5 */
+ 8188 "00000000" // /* MW 4 */
+ 8189 "01100000" // /* MW 3 */
+ 8190 "00101011" // /* MW 2 */
+ 8191 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.return_address
+ 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8193 "00111001" // /* MW 3 */
+ 8194 "11111100" // /* MW 2 */
+ 8195 "00000111" // /* MW 1 */
+ 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8197 "00000000" // /* MW 1 */
+ 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8199 "00000000" // /* MW 1 */
+ 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8201 "00000000" // /* MW 1 */
+ 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8203 "00000000" // /* MW 1 */
+ 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8205 "00000000" // /* MW 1 */
+ 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8207 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first
+ 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8209 "00000000" // /* MW 3 */
+ 8210 "00101000" // /* MW 2 */
+ 8211 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4
+.delay_slot
+ 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8213 "00000001" // /* MW 5 */
+ 8214 "00000000" // /* MW 4 */
+ 8215 "00000000" // /* MW 3 */
+ 8216 "11110000" // /* MW 2 */
+ 8217 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8219 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8221 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8223 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 8225 "00000000" // /* MW 1 */
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 147 first
+.src_ref 7 "superkernels.cpp" 152 6
+.function_start
+ 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8241 "10000000" // /* MW 5 */
+ 8242 "11001000" // /* MW 4 */
+ 8243 "11000110" // /* MW 3 */
+ 8244 "00000111" // /* MW 2 */
+ 8245 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6 first
+ 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8247 "11000001" // /* MW 5 */
+ 8248 "10110101" // /* MW 4 */
+ 8249 "11011000" // /* MW 3 */
+ 8250 "11000010" // /* MW 2 */
+ 8251 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 147
+ 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8253 "00000001" // /* MW 5 */
+ 8254 "00000000" // /* MW 4 */
+ 8255 "00000000" // /* MW 3 */
+ 8256 "00001000" // /* MW 2 */
+ 8257 "00000000" // /* MW 1 */
+ 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8259 "01110000" // /* MW 7 */
+ 8260 "11010000" // /* MW 6 */
+ 8261 "00001011" // /* MW 5 */
+ 8262 "00000000" // /* MW 4 */
+ 8263 "10110000" // /* MW 3 */
+ 8264 "01100011" // /* MW 2 */
+ 8265 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+ 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8267 "00010001" // /* MW 9 */
+ 8268 "00101000" // /* MW 8 */
+ 8269 "00110010" // /* MW 7 */
+ 8270 "11110011" // /* MW 6 */
+ 8271 "00000001" // /* MW 5 */
+ 8272 "00000000" // /* MW 4 */
+ 8273 "10110000" // /* MW 3 */
+ 8274 "10000010" // /* MW 2 */
+ 8275 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8277 "11000000" // /* MW 3 */
+ 8278 "11010100" // /* MW 2 */
+ 8279 "00011011" // /* MW 1 */
+ 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8281 "00000000" // /* MW 1 */
+ 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8283 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 152 6
+.src_ref 7 "superkernels.cpp" 152 16
+ 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */
+ 8285 "00000001" // /* MW 5 */
+ 8286 "01000000" // /* MW 4 */
+ 8287 "10000000" // /* MW 3 */
+ 8288 "00010000" // /* MW 2 */
+ 8289 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 22 first
+.delay_slot
+ 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8291 "10010000" // /* MW 3 */
+ 8292 "01100010" // /* MW 2 */
+ 8293 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 30
+.delay_slot
+ 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8295 "11111011" // /* MW 3 */
+ 8296 "01100011" // /* MW 2 */
+ 8297 "00010100" // /* MW 1 */
+.delay_slot
+ 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8299 "00111101" // /* MW 3 */
+ 8300 "11110100" // /* MW 2 */
+ 8301 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 149 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8303 "01110000" // /* MW 7 */
+ 8304 "01100000" // /* MW 6 */
+ 8305 "00110000" // /* MW 5 */
+ 8306 "00000011" // /* MW 4 */
+ 8307 "00110000" // /* MW 3 */
+ 8308 "11000110" // /* MW 2 */
+ 8309 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4
+.src_ref 7 "superkernels.cpp" 166 2
+.delay_slot
+ 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8311 "10000000" // /* MW 5 */
+ 8312 "11001001" // /* MW 4 */
+ 8313 "11000000" // /* MW 3 */
+ 8314 "00000111" // /* MW 2 */
+ 8315 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8317 "11010000" // /* MW 5 */
+ 8318 "11001000" // /* MW 4 */
+ 8319 "11000100" // /* MW 3 */
+ 8320 "00000111" // /* MW 2 */
+ 8321 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8323 "00010000" // /* MW 9 */
+ 8324 "00110010" // /* MW 8 */
+ 8325 "00110010" // /* MW 7 */
+ 8326 "11110001" // /* MW 6 */
+ 8327 "00000001" // /* MW 5 */
+ 8328 "00000000" // /* MW 4 */
+ 8329 "11100000" // /* MW 3 */
+ 8330 "11000000" // /* MW 2 */
+ 8331 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8333 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 155 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */
+ 8335 "00000001" // /* MW 5 */
+ 8336 "00000000" // /* MW 4 */
+ 8337 "00011000" // /* MW 3 */
+ 8338 "00001111" // /* MW 2 */
+ 8339 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8341 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8343 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8345 "00110001" // /* MW 3 */
+ 8346 "00100000" // /* MW 2 */
+ 8347 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8349 "00000101" // /* MW 3 */
+ 8350 "00100000" // /* MW 2 */
+ 8351 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8353 "00000000" // /* MW 15 */
+ 8354 "00000000" // /* MW 14 */
+ 8355 "01111000" // /* MW 13 */
+ 8356 "10100101" // /* MW 12 */
+ 8357 "00000001" // /* MW 11 */
+ 8358 "00000000" // /* MW 10 */
+ 8359 "00000000" // /* MW 9 */
+ 8360 "10000000" // /* MW 8 */
+ 8361 "00010001" // /* MW 7 */
+ 8362 "00000110" // /* MW 6 */
+ 8363 "00100010" // /* MW 5 */
+ 8364 "00000000" // /* MW 4 */
+ 8365 "11110000" // /* MW 3 */
+ 8366 "00101100" // /* MW 2 */
+ 8367 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18
+.return_address
+ 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8369 "10100000" // /* MW 5 */
+ 8370 "11001000" // /* MW 4 */
+ 8371 "11000100" // /* MW 3 */
+ 8372 "00000111" // /* MW 2 */
+ 8373 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 18 first
+.src_ref 7 "superkernels.cpp" 159 65
+ 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8375 "00010000" // /* MW 9 */
+ 8376 "01100000" // /* MW 8 */
+ 8377 "00110010" // /* MW 7 */
+ 8378 "11110001" // /* MW 6 */
+ 8379 "00000001" // /* MW 5 */
+ 8380 "00000000" // /* MW 4 */
+ 8381 "11010000" // /* MW 3 */
+ 8382 "11000010" // /* MW 2 */
+ 8383 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51
+.src_ref 7 "superkernels.cpp" 159 65
+.src_ref 7 "superkernels.cpp" 166 2
+ 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8385 "00010000" // /* MW 9 */
+ 8386 "01100000" // /* MW 8 */
+ 8387 "00110010" // /* MW 7 */
+ 8388 "11110001" // /* MW 6 */
+ 8389 "00000001" // /* MW 5 */
+ 8390 "00000000" // /* MW 4 */
+ 8391 "11010000" // /* MW 3 */
+ 8392 "11000110" // /* MW 2 */
+ 8393 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 51 first
+.src_ref 7 "superkernels.cpp" 159 16
+.src_ref 7 "superkernels.cpp" 164 47
+ 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8395 "00010000" // /* MW 9 */
+ 8396 "00101010" // /* MW 8 */
+ 8397 "10110010" // /* MW 7 */
+ 8398 "11110000" // /* MW 6 */
+ 8399 "00000001" // /* MW 5 */
+ 8400 "00000000" // /* MW 4 */
+ 8401 "01010000" // /* MW 3 */
+ 8402 "11001011" // /* MW 2 */
+ 8403 "01001010" // /* MW 1 */
+ 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8405 "00000000" // /* MW 1 */
+ 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8407 "00000000" // /* MW 1 */
+ 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */
+ 8409 "00000000" // /* MW 5 */
+ 8410 "00000000" // /* MW 4 */
+ 8411 "10001000" // /* MW 3 */
+ 8412 "00010000" // /* MW 2 */
+ 8413 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13
+.delay_slot
+ 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8415 "11000000" // /* MW 5 */
+ 8416 "11001000" // /* MW 4 */
+ 8417 "11000000" // /* MW 3 */
+ 8418 "00000111" // /* MW 2 */
+ 8419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8421 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 27 first
+.delay_slot
+ 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8423 "00001111" // /* MW 3 */
+ 8424 "01100001" // /* MW 2 */
+ 8425 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 157 13 first
+.delay_slot
+ 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8427 "10100011" // /* MW 5 */
+ 8428 "00001100" // /* MW 4 */
+ 8429 "11110000" // /* MW 3 */
+ 8430 "00101100" // /* MW 2 */
+ 8431 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 159 16 first
+.delay_slot
+ 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8433 "00000000" // /* MW 15 */
+ 8434 "00000000" // /* MW 14 */
+ 8435 "01111000" // /* MW 13 */
+ 8436 "10100101" // /* MW 12 */
+ 8437 "00000001" // /* MW 11 */
+ 8438 "00000000" // /* MW 10 */
+ 8439 "00000000" // /* MW 9 */
+ 8440 "10000000" // /* MW 8 */
+ 8441 "00010001" // /* MW 7 */
+ 8442 "00000110" // /* MW 6 */
+ 8443 "00100001" // /* MW 5 */
+ 8444 "00000000" // /* MW 4 */
+ 8445 "11110000" // /* MW 3 */
+ 8446 "00101100" // /* MW 2 */
+ 8447 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 164 47
+.src_ref 7 "superkernels.cpp" 166 2
+ 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8449 "00000000" // /* MW 15 */
+ 8450 "00000000" // /* MW 14 */
+ 8451 "00010000" // /* MW 13 */
+ 8452 "00101010" // /* MW 12 */
+ 8453 "10110010" // /* MW 11 */
+ 8454 "11110000" // /* MW 10 */
+ 8455 "00000001" // /* MW 9 */
+ 8456 "00000000" // /* MW 8 */
+ 8457 "10001011" // /* MW 7 */
+ 8458 "10000000" // /* MW 6 */
+ 8459 "00100010" // /* MW 5 */
+ 8460 "00000000" // /* MW 4 */
+ 8461 "11110000" // /* MW 3 */
+ 8462 "00101100" // /* MW 2 */
+ 8463 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 8465 "00000000" // /* MW 7 */
+ 8466 "11000011" // /* MW 6 */
+ 8467 "10110011" // /* MW 5 */
+ 8468 "00000011" // /* MW 4 */
+ 8469 "01100000" // /* MW 3 */
+ 8470 "10010001" // /* MW 2 */
+ 8471 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8473 "00010000" // /* MW 9 */
+ 8474 "00100000" // /* MW 8 */
+ 8475 "00110010" // /* MW 7 */
+ 8476 "11110000" // /* MW 6 */
+ 8477 "00000001" // /* MW 5 */
+ 8478 "00000000" // /* MW 4 */
+ 8479 "11010000" // /* MW 3 */
+ 8480 "11101110" // /* MW 2 */
+ 8481 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8483 "00010110" // /* MW 3 */
+ 8484 "11111110" // /* MW 2 */
+ 8485 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8487 "00110110" // /* MW 3 */
+ 8488 "11111110" // /* MW 2 */
+ 8489 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8491 "01010110" // /* MW 3 */
+ 8492 "01000110" // /* MW 2 */
+ 8493 "00000111" // /* MW 1 */
+ 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8495 "00000000" // /* MW 1 */
+ 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8497 "00000000" // /* MW 1 */
+ 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8499 "00000000" // /* MW 1 */
+ 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8501 "00000000" // /* MW 1 */
+ 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8503 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8505 "00000010" // /* MW 3 */
+ 8506 "01100001" // /* MW 2 */
+ 8507 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8509 "00010001" // /* MW 3 */
+ 8510 "00000110" // /* MW 2 */
+ 8511 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8513 "11111101" // /* MW 3 */
+ 8514 "11100000" // /* MW 2 */
+ 8515 "00010111" // /* MW 1 */
+ 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8517 "00000000" // /* MW 1 */
+ 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8519 "00000000" // /* MW 1 */
+ 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8521 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8523 "00001000" // /* MW 3 */
+ 8524 "10010011" // /* MW 2 */
+ 8525 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+ 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8527 "10000001" // /* MW 5 */
+ 8528 "10101101" // /* MW 4 */
+ 8529 "10100111" // /* MW 3 */
+ 8530 "00000000" // /* MW 2 */
+ 8531 "00000100" // /* MW 1 */
+ 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8533 "00000000" // /* MW 1 */
+ 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8535 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+ 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8537 "00110110" // /* MW 3 */
+ 8538 "00000110" // /* MW 2 */
+ 8539 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8541 "10000001" // /* MW 5 */
+ 8542 "11011101" // /* MW 4 */
+ 8543 "11011100" // /* MW 3 */
+ 8544 "11001010" // /* MW 2 */
+ 8545 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 47 first
+ 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8547 "01110110" // /* MW 3 */
+ 8548 "00000110" // /* MW 2 */
+ 8549 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8551 "10011110" // /* MW 3 */
+ 8552 "01011100" // /* MW 2 */
+ 8553 "00000111" // /* MW 1 */
+ 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 166 2 first
+.no_stack_arguments
+ 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */
+ 8557 "00000001" // /* MW 5 */
+ 8558 "00000000" // /* MW 4 */
+ 8559 "11011000" // /* MW 3 */
+ 8560 "00001111" // /* MW 2 */
+ 8561 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8563 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2 first
+.delay_slot
+ 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8565 "00000111" // /* MW 3 */
+ 8566 "01100010" // /* MW 2 */
+ 8567 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 163 2
+.delay_slot
+ 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8569 "00110001" // /* MW 3 */
+ 8570 "00000110" // /* MW 2 */
+ 8571 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45 first
+.delay_slot
+ 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8573 "00001101" // /* MW 3 */
+ 8574 "11100001" // /* MW 2 */
+ 8575 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 164 45
+.delay_slot
+ 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8577 "00000000" // /* MW 15 */
+ 8578 "00000000" // /* MW 14 */
+ 8579 "10101000" // /* MW 13 */
+ 8580 "10100000" // /* MW 12 */
+ 8581 "00110100" // /* MW 11 */
+ 8582 "00000000" // /* MW 10 */
+ 8583 "00000000" // /* MW 9 */
+ 8584 "00000000" // /* MW 8 */
+ 8585 "01011011" // /* MW 7 */
+ 8586 "00000001" // /* MW 6 */
+ 8587 "00100000" // /* MW 5 */
+ 8588 "00000000" // /* MW 4 */
+ 8589 "11110000" // /* MW 3 */
+ 8590 "00101100" // /* MW 2 */
+ 8591 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+.src_ref 7 "superkernels.cpp" 169 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8593 "00010000" // /* MW 9 */
+ 8594 "00100000" // /* MW 8 */
+ 8595 "00110010" // /* MW 7 */
+ 8596 "11110011" // /* MW 6 */
+ 8597 "00000001" // /* MW 5 */
+ 8598 "00000000" // /* MW 4 */
+ 8599 "11010000" // /* MW 3 */
+ 8600 "11000110" // /* MW 2 */
+ 8601 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8603 "00000101" // /* MW 3 */
+ 8604 "00100000" // /* MW 2 */
+ 8605 "00010000" // /* MW 1 */
+ 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8607 "00000000" // /* MW 1 */
+ 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8609 "00000000" // /* MW 1 */
+ 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8611 "00000000" // /* MW 1 */
+ 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8613 "00000000" // /* MW 1 */
+ 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8615 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8617 "00001000" // /* MW 3 */
+ 8618 "01010001" // /* MW 2 */
+ 8619 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8621 "00010000" // /* MW 9 */
+ 8622 "00110000" // /* MW 8 */
+ 8623 "00110010" // /* MW 7 */
+ 8624 "11110001" // /* MW 6 */
+ 8625 "00000001" // /* MW 5 */
+ 8626 "00000000" // /* MW 4 */
+ 8627 "11010000" // /* MW 3 */
+ 8628 "11001110" // /* MW 2 */
+ 8629 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6 first
+ 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8631 "00110110" // /* MW 3 */
+ 8632 "00000110" // /* MW 2 */
+ 8633 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 19
+ 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8635 "01010110" // /* MW 3 */
+ 8636 "00000110" // /* MW 2 */
+ 8637 "00000010" // /* MW 1 */
+ 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8639 "00000000" // /* MW 1 */
+ 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8641 "00000000" // /* MW 1 */
+ 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8643 "00000000" // /* MW 1 */
+ 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8645 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8647 "00110001" // /* MW 3 */
+ 8648 "00100001" // /* MW 2 */
+ 8649 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8651 "00010001" // /* MW 3 */
+ 8652 "11100110" // /* MW 2 */
+ 8653 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 16 first
+ 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8655 "00101000" // /* MW 3 */
+ 8656 "01100001" // /* MW 2 */
+ 8657 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 168 6
+ 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */
+ 8659 "00000001" // /* MW 5 */
+ 8660 "01000000" // /* MW 4 */
+ 8661 "11111000" // /* MW 3 */
+ 8662 "00010000" // /* MW 2 */
+ 8663 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8665 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8667 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8669 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8671 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8673 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14
+ 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8675 "00000001" // /* MW 3 */
+ 8676 "00100000" // /* MW 2 */
+ 8677 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 169 14 first
+ 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8679 "00000000" // /* MW 9 */
+ 8680 "00000000" // /* MW 8 */
+ 8681 "00000000" // /* MW 7 */
+ 8682 "10000000" // /* MW 6 */
+ 8683 "00010001" // /* MW 5 */
+ 8684 "00000110" // /* MW 4 */
+ 8685 "11110110" // /* MW 3 */
+ 8686 "00101100" // /* MW 2 */
+ 8687 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 171
+ 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8689 "00111001" // /* MW 3 */
+ 8690 "11110100" // /* MW 2 */
+ 8691 "00000111" // /* MW 1 */
+ 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8693 "00011001" // /* MW 3 */
+ 8694 "11111011" // /* MW 2 */
+ 8695 "00000111" // /* MW 1 */
+ 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8697 "00000000" // /* MW 1 */
+ 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8699 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8701 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8703 "11110001" // /* MW 3 */
+ 8704 "11111101" // /* MW 2 */
+ 8705 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8707 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8709 "00000000" // /* MW 3 */
+ 8710 "00101000" // /* MW 2 */
+ 8711 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8713 "10100000" // /* MW 3 */
+ 8714 "01100111" // /* MW 2 */
+ 8715 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 171
+.delay_slot
+ 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8717 "00000001" // /* MW 5 */
+ 8718 "00000000" // /* MW 4 */
+ 8719 "00000000" // /* MW 3 */
+ 8720 "11111000" // /* MW 2 */
+ 8721 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8723 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8725 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 8727 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv
+.src_ref 3 "elementwise_unary.h" 124 first
+.src_ref 3 "elementwise_unary.h" 126 24 first
+.function_start
+ 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8737 "00101110" // /* MW 3 */
+ 8738 "00011100" // /* MW 2 */
+ 8739 "00000001" // /* MW 1 */
+ 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8741 "00000000" // /* MW 1 */
+ 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8743 "00000000" // /* MW 1 */
+ 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8745 "00000000" // /* MW 1 */
+ 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8747 "00000000" // /* MW 1 */
+ 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8749 "00000000" // /* MW 1 */
+ 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8751 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 126 22 first
+ 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8753 "00101001" // /* MW 3 */
+ 8754 "00011100" // /* MW 2 */
+ 8755 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 24 first
+ 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8757 "00101110" // /* MW 3 */
+ 8758 "00011100" // /* MW 2 */
+ 8759 "00000001" // /* MW 1 */
+ 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8761 "00000000" // /* MW 1 */
+ 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8763 "00000000" // /* MW 1 */
+ 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8765 "00000000" // /* MW 1 */
+ 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8767 "00000000" // /* MW 1 */
+ 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8769 "00000000" // /* MW 1 */
+ 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8771 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 127 22
+ 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8773 "00101001" // /* MW 3 */
+ 8774 "00011100" // /* MW 2 */
+ 8775 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 24 first
+ 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8777 "00101110" // /* MW 3 */
+ 8778 "01101100" // /* MW 2 */
+ 8779 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8781 "00010010" // /* MW 3 */
+ 8782 "00000100" // /* MW 2 */
+ 8783 "00000001" // /* MW 1 */
+ 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8785 "00000000" // /* MW 1 */
+ 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8787 "00000000" // /* MW 1 */
+ 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8789 "00000000" // /* MW 1 */
+ 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8791 "00000000" // /* MW 1 */
+ 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8793 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 128 22 first
+ 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8795 "00101001" // /* MW 3 */
+ 8796 "01101100" // /* MW 2 */
+ 8797 "00001000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 113 33 first
+ 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8799 "00010111" // /* MW 3 */
+ 8800 "00000100" // /* MW 2 */
+ 8801 "00000000" // /* MW 1 */
+ 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8803 "00000000" // /* MW 1 */
+ 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8805 "00000000" // /* MW 1 */
+ 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8807 "00000000" // /* MW 1 */
+ 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8809 "00000000" // /* MW 1 */
+ 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8811 "00000000" // /* MW 1 */
+ 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8813 "00000000" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33 first
+ 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8815 "00010010" // /* MW 3 */
+ 8816 "00100100" // /* MW 2 */
+ 8817 "00000001" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 114 33
+ 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8819 "00010111" // /* MW 3 */
+ 8820 "00010100" // /* MW 2 */
+ 8821 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 130 4 first
+ 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 8823 "00000000" // /* MW 3 */
+ 8824 "00101000" // /* MW 2 */
+ 8825 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8827 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8829 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8831 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0
+ 8835 "00000000" // /* MW 1 */
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 136 first
+.src_ref 3 "elementwise_unary.h" 142 37
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 171 19
+.function_start
+ 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8849 "00010000" // /* MW 11 */
+ 8850 "10001000" // /* MW 10 */
+ 8851 "01111001" // /* MW 9 */
+ 8852 "00001000" // /* MW 8 */
+ 8853 "00000000" // /* MW 7 */
+ 8854 "00000000" // /* MW 6 */
+ 8855 "01101000" // /* MW 5 */
+ 8856 "00111010" // /* MW 4 */
+ 8857 "10000000" // /* MW 3 */
+ 8858 "11000010" // /* MW 2 */
+ 8859 "11111011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 142 78
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+ 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 8861 "00010000" // /* MW 11 */
+ 8862 "10100000" // /* MW 10 */
+ 8863 "10111001" // /* MW 9 */
+ 8864 "00001001" // /* MW 8 */
+ 8865 "00000000" // /* MW 7 */
+ 8866 "00000000" // /* MW 6 */
+ 8867 "01101000" // /* MW 5 */
+ 8868 "00111001" // /* MW 4 */
+ 8869 "00000000" // /* MW 3 */
+ 8870 "01010001" // /* MW 2 */
+ 8871 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136
+ 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8873 "11000000" // /* MW 3 */
+ 8874 "00010100" // /* MW 2 */
+ 8875 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 136 first
+ 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8877 "00010000" // /* MW 3 */
+ 8878 "01100000" // /* MW 2 */
+ 8879 "00011010" // /* MW 1 */
+.src_ref 8 "clip_impl.h" 103 16 first
+ 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8881 "01010010" // /* MW 3 */
+ 8882 "00011100" // /* MW 2 */
+ 8883 "00000010" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 142 37 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8885 "00010110" // /* MW 3 */
+ 8886 "00000000" // /* MW 2 */
+ 8887 "00000010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 8 "clip_impl.h" 104 16 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 8889 "01101000" // /* MW 5 */
+ 8890 "00111010" // /* MW 4 */
+ 8891 "01010000" // /* MW 3 */
+ 8892 "10000110" // /* MW 2 */
+ 8893 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8895 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8897 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 8899 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8901 "10110100" // /* MW 3 */
+ 8902 "00011100" // /* MW 2 */
+ 8903 "00111000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8905 "01110010" // /* MW 3 */
+ 8906 "00001001" // /* MW 2 */
+ 8907 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 142 78 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 8909 "01111000" // /* MW 9 */
+ 8910 "00110110" // /* MW 8 */
+ 8911 "01010000" // /* MW 7 */
+ 8912 "11101101" // /* MW 6 */
+ 8913 "00011000" // /* MW 5 */
+ 8914 "00000001" // /* MW 4 */
+ 8915 "01101000" // /* MW 3 */
+ 8916 "00111010" // /* MW 2 */
+ 8917 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_unary.h" 154 8 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8919 "11111110" // /* MW 3 */
+ 8920 "01111000" // /* MW 2 */
+ 8921 "00011101" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8923 "01110010" // /* MW 3 */
+ 8924 "10000101" // /* MW 2 */
+ 8925 "00011000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 8927 "10101100" // /* MW 3 */
+ 8928 "10101000" // /* MW 2 */
+ 8929 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 8931 "01100000" // /* MW 13 */
+ 8932 "00101011" // /* MW 12 */
+ 8933 "00000000" // /* MW 11 */
+ 8934 "11001111" // /* MW 10 */
+ 8935 "00000110" // /* MW 9 */
+ 8936 "00110001" // /* MW 8 */
+ 8937 "00000000" // /* MW 7 */
+ 8938 "00000000" // /* MW 6 */
+ 8939 "01101000" // /* MW 5 */
+ 8940 "00111001" // /* MW 4 */
+ 8941 "11110000" // /* MW 3 */
+ 8942 "00101100" // /* MW 2 */
+ 8943 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8945 "00000000" // /* MW 15 */
+ 8946 "00000000" // /* MW 14 */
+ 8947 "01111000" // /* MW 13 */
+ 8948 "01010110" // /* MW 12 */
+ 8949 "11011000" // /* MW 11 */
+ 8950 "00000001" // /* MW 10 */
+ 8951 "00000000" // /* MW 9 */
+ 8952 "00000000" // /* MW 8 */
+ 8953 "11010011" // /* MW 7 */
+ 8954 "00011100" // /* MW 6 */
+ 8955 "00100001" // /* MW 5 */
+ 8956 "00000000" // /* MW 4 */
+ 8957 "11110000" // /* MW 3 */
+ 8958 "00101100" // /* MW 2 */
+ 8959 "00000000" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8961 "00000000" // /* MW 15 */
+ 8962 "00000000" // /* MW 14 */
+ 8963 "01111000" // /* MW 13 */
+ 8964 "00110110" // /* MW 12 */
+ 8965 "01010000" // /* MW 11 */
+ 8966 "00000001" // /* MW 10 */
+ 8967 "00000000" // /* MW 9 */
+ 8968 "00000000" // /* MW 8 */
+ 8969 "01011011" // /* MW 7 */
+ 8970 "00000001" // /* MW 6 */
+ 8971 "00100000" // /* MW 5 */
+ 8972 "00000000" // /* MW 4 */
+ 8973 "11110000" // /* MW 3 */
+ 8974 "00101100" // /* MW 2 */
+ 8975 "00000000" // /* MW 1 */
+.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 171 19 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8977 "00000000" // /* MW 15 */
+ 8978 "00000000" // /* MW 14 */
+ 8979 "01111000" // /* MW 13 */
+ 8980 "01010110" // /* MW 12 */
+ 8981 "11010100" // /* MW 11 */
+ 8982 "00000000" // /* MW 10 */
+ 8983 "00000000" // /* MW 9 */
+ 8984 "00000000" // /* MW 8 */
+ 8985 "11010011" // /* MW 7 */
+ 8986 "00011101" // /* MW 6 */
+ 8987 "01101001" // /* MW 5 */
+ 8988 "00111010" // /* MW 4 */
+ 8989 "11110000" // /* MW 3 */
+ 8990 "00101100" // /* MW 2 */
+ 8991 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 190 19 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 8993 "00000000" // /* MW 15 */
+ 8994 "00000000" // /* MW 14 */
+ 8995 "01111000" // /* MW 13 */
+ 8996 "00110110" // /* MW 12 */
+ 8997 "10001000" // /* MW 11 */
+ 8998 "00000001" // /* MW 10 */
+ 8999 "00000000" // /* MW 9 */
+ 9000 "00000000" // /* MW 8 */
+ 9001 "01011011" // /* MW 7 */
+ 9002 "00000001" // /* MW 6 */
+ 9003 "01101000" // /* MW 5 */
+ 9004 "00111001" // /* MW 4 */
+ 9005 "11110000" // /* MW 3 */
+ 9006 "00101100" // /* MW 2 */
+ 9007 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9009 "00000000" // /* MW 15 */
+ 9010 "00000000" // /* MW 14 */
+ 9011 "01111000" // /* MW 13 */
+ 9012 "01010110" // /* MW 12 */
+ 9013 "11011000" // /* MW 11 */
+ 9014 "00000001" // /* MW 10 */
+ 9015 "00000000" // /* MW 9 */
+ 9016 "00000000" // /* MW 8 */
+ 9017 "11010011" // /* MW 7 */
+ 9018 "00011100" // /* MW 6 */
+ 9019 "00100001" // /* MW 5 */
+ 9020 "00000000" // /* MW 4 */
+ 9021 "11110000" // /* MW 3 */
+ 9022 "00101100" // /* MW 2 */
+ 9023 "00000000" // /* MW 1 */
+.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176
+.src_ref 4 "max_min.hpp" 20 104 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9025 "00000000" // /* MW 15 */
+ 9026 "00000000" // /* MW 14 */
+ 9027 "01111000" // /* MW 13 */
+ 9028 "00110110" // /* MW 12 */
+ 9029 "01010000" // /* MW 11 */
+ 9030 "00000001" // /* MW 10 */
+ 9031 "00000000" // /* MW 9 */
+ 9032 "00000000" // /* MW 8 */
+ 9033 "01011011" // /* MW 7 */
+ 9034 "00000001" // /* MW 6 */
+ 9035 "00100000" // /* MW 5 */
+ 9036 "00000000" // /* MW 4 */
+ 9037 "11110000" // /* MW 3 */
+ 9038 "00101100" // /* MW 2 */
+ 9039 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9041 "01110000" // /* MW 7 */
+ 9042 "01010110" // /* MW 6 */
+ 9043 "11010100" // /* MW 5 */
+ 9044 "00000000" // /* MW 4 */
+ 9045 "01100000" // /* MW 3 */
+ 9046 "10111010" // /* MW 2 */
+ 9047 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9049 "01101100" // /* MW 3 */
+ 9050 "00010000" // /* MW 2 */
+ 9051 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+ 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9053 "01110000" // /* MW 7 */
+ 9054 "01010110" // /* MW 6 */
+ 9055 "11011000" // /* MW 5 */
+ 9056 "00000001" // /* MW 4 */
+ 9057 "01100000" // /* MW 3 */
+ 9058 "10011010" // /* MW 2 */
+ 9059 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 3 "elementwise_unary.h" 158 4 first
+ 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9061 "11011001" // /* MW 5 */
+ 9062 "01000000" // /* MW 4 */
+ 9063 "00000101" // /* MW 3 */
+ 9064 "00000000" // /* MW 2 */
+ 9065 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 21 104 first
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9067 "01110000" // /* MW 7 */
+ 9068 "01010110" // /* MW 6 */
+ 9069 "11010100" // /* MW 5 */
+ 9070 "00000000" // /* MW 4 */
+ 9071 "01100000" // /* MW 3 */
+ 9072 "10111010" // /* MW 2 */
+ 9073 "00100011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9075 "01101100" // /* MW 3 */
+ 9076 "00010000" // /* MW 2 */
+ 9077 "00011011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 21 104 first
+.delay_slot
+ 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9079 "10101100" // /* MW 3 */
+ 9080 "10110000" // /* MW 2 */
+ 9081 "00011011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 3 "elementwise_unary.h" 195 20 first
+.delay_slot
+ 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9083 "11010011" // /* MW 3 */
+ 9084 "00011100" // /* MW 2 */
+ 9085 "00001001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_unary.h" 176 20 first
+.delay_slot
+ 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9087 "11010011" // /* MW 3 */
+ 9088 "00011101" // /* MW 2 */
+.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end
+.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0
+ 9089 "00001001" // /* MW 1 */
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 176 first
+.src_ref 7 "superkernels.cpp" 181 6
+.function_start
+ 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9105 "10000000" // /* MW 5 */
+ 9106 "11001000" // /* MW 4 */
+ 9107 "11000110" // /* MW 3 */
+ 9108 "00000111" // /* MW 2 */
+ 9109 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6 first
+ 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9111 "11000001" // /* MW 5 */
+ 9112 "10110101" // /* MW 4 */
+ 9113 "11011000" // /* MW 3 */
+ 9114 "11000010" // /* MW 2 */
+ 9115 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 176
+ 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9117 "00000001" // /* MW 5 */
+ 9118 "00000000" // /* MW 4 */
+ 9119 "00000000" // /* MW 3 */
+ 9120 "00001000" // /* MW 2 */
+ 9121 "00000000" // /* MW 1 */
+ 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9123 "01110000" // /* MW 7 */
+ 9124 "11010000" // /* MW 6 */
+ 9125 "00001011" // /* MW 5 */
+ 9126 "00000000" // /* MW 4 */
+ 9127 "10110000" // /* MW 3 */
+ 9128 "01100011" // /* MW 2 */
+ 9129 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+ 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9131 "00010001" // /* MW 9 */
+ 9132 "00101000" // /* MW 8 */
+ 9133 "00110010" // /* MW 7 */
+ 9134 "11110011" // /* MW 6 */
+ 9135 "00000001" // /* MW 5 */
+ 9136 "00000000" // /* MW 4 */
+ 9137 "10110000" // /* MW 3 */
+ 9138 "10000010" // /* MW 2 */
+ 9139 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9141 "11000000" // /* MW 3 */
+ 9142 "11010100" // /* MW 2 */
+ 9143 "00011011" // /* MW 1 */
+ 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9145 "00000000" // /* MW 1 */
+ 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9147 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 181 6
+.src_ref 7 "superkernels.cpp" 181 16
+ 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */
+ 9149 "00000001" // /* MW 5 */
+ 9150 "01000000" // /* MW 4 */
+ 9151 "00110000" // /* MW 3 */
+ 9152 "00010010" // /* MW 2 */
+ 9153 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 22 first
+.delay_slot
+ 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9155 "10010000" // /* MW 3 */
+ 9156 "01100010" // /* MW 2 */
+ 9157 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 30
+.delay_slot
+ 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9159 "11111011" // /* MW 3 */
+ 9160 "01100011" // /* MW 2 */
+ 9161 "00010100" // /* MW 1 */
+.delay_slot
+ 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9163 "00111101" // /* MW 3 */
+ 9164 "11110100" // /* MW 2 */
+ 9165 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 178 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9167 "01110000" // /* MW 7 */
+ 9168 "01100000" // /* MW 6 */
+ 9169 "00110000" // /* MW 5 */
+ 9170 "00000011" // /* MW 4 */
+ 9171 "00110000" // /* MW 3 */
+ 9172 "11000110" // /* MW 2 */
+ 9173 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4
+.src_ref 7 "superkernels.cpp" 195 2
+.delay_slot
+ 9174 "01000100" // MOVXM p0, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9175 "10000000" // /* MW 5 */
+ 9176 "11001011" // /* MW 4 */
+ 9177 "11000000" // /* MW 3 */
+ 9178 "00000111" // /* MW 2 */
+ 9179 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9181 "11010000" // /* MW 5 */
+ 9182 "11001000" // /* MW 4 */
+ 9183 "11000100" // /* MW 3 */
+ 9184 "00000111" // /* MW 2 */
+ 9185 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9187 "00010000" // /* MW 9 */
+ 9188 "00110010" // /* MW 8 */
+ 9189 "00110010" // /* MW 7 */
+ 9190 "11110001" // /* MW 6 */
+ 9191 "00000001" // /* MW 5 */
+ 9192 "00000000" // /* MW 4 */
+ 9193 "11100000" // /* MW 3 */
+ 9194 "11000000" // /* MW 2 */
+ 9195 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9197 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 184 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */
+ 9199 "00000001" // /* MW 5 */
+ 9200 "00000000" // /* MW 4 */
+ 9201 "00010000" // /* MW 3 */
+ 9202 "00010001" // /* MW 2 */
+ 9203 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9205 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9207 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9209 "00110001" // /* MW 3 */
+ 9210 "00100000" // /* MW 2 */
+ 9211 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9213 "00000101" // /* MW 3 */
+ 9214 "00100000" // /* MW 2 */
+ 9215 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9217 "00000000" // /* MW 15 */
+ 9218 "00000000" // /* MW 14 */
+ 9219 "01111000" // /* MW 13 */
+ 9220 "10100101" // /* MW 12 */
+ 9221 "00000001" // /* MW 11 */
+ 9222 "00000000" // /* MW 10 */
+ 9223 "00000000" // /* MW 9 */
+ 9224 "10000000" // /* MW 8 */
+ 9225 "00010001" // /* MW 7 */
+ 9226 "00000110" // /* MW 6 */
+ 9227 "00100010" // /* MW 5 */
+ 9228 "00000000" // /* MW 4 */
+ 9229 "11110000" // /* MW 3 */
+ 9230 "00101100" // /* MW 2 */
+ 9231 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18
+.return_address
+ 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9233 "10100000" // /* MW 5 */
+ 9234 "11001000" // /* MW 4 */
+ 9235 "11000100" // /* MW 3 */
+ 9236 "00000111" // /* MW 2 */
+ 9237 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 18 first
+.src_ref 7 "superkernels.cpp" 188 43
+ 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9239 "00010000" // /* MW 9 */
+ 9240 "11100000" // /* MW 8 */
+ 9241 "00110010" // /* MW 7 */
+ 9242 "11110001" // /* MW 6 */
+ 9243 "00000001" // /* MW 5 */
+ 9244 "00000000" // /* MW 4 */
+ 9245 "11010000" // /* MW 3 */
+ 9246 "11000010" // /* MW 2 */
+ 9247 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29
+.src_ref 7 "superkernels.cpp" 188 43
+.src_ref 7 "superkernels.cpp" 195 2
+ 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9249 "00010000" // /* MW 9 */
+ 9250 "11100000" // /* MW 8 */
+ 9251 "00110010" // /* MW 7 */
+ 9252 "11110001" // /* MW 6 */
+ 9253 "00000001" // /* MW 5 */
+ 9254 "00000000" // /* MW 4 */
+ 9255 "11010000" // /* MW 3 */
+ 9256 "11000110" // /* MW 2 */
+ 9257 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 29 first
+.src_ref 7 "superkernels.cpp" 188 16
+.src_ref 7 "superkernels.cpp" 193 47
+ 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9259 "00010000" // /* MW 9 */
+ 9260 "00101010" // /* MW 8 */
+ 9261 "10110010" // /* MW 7 */
+ 9262 "11110000" // /* MW 6 */
+ 9263 "00000001" // /* MW 5 */
+ 9264 "00000000" // /* MW 4 */
+ 9265 "01010000" // /* MW 3 */
+ 9266 "11001011" // /* MW 2 */
+ 9267 "01001000" // /* MW 1 */
+ 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9269 "00000000" // /* MW 1 */
+ 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9271 "00000000" // /* MW 1 */
+ 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */
+ 9273 "00000000" // /* MW 5 */
+ 9274 "00000000" // /* MW 4 */
+ 9275 "00111000" // /* MW 3 */
+ 9276 "00010010" // /* MW 2 */
+ 9277 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13
+.delay_slot
+ 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9279 "11000000" // /* MW 5 */
+ 9280 "11001000" // /* MW 4 */
+ 9281 "11000000" // /* MW 3 */
+ 9282 "00000111" // /* MW 2 */
+ 9283 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9285 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 27 first
+.delay_slot
+ 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9287 "00001111" // /* MW 3 */
+ 9288 "01100001" // /* MW 2 */
+ 9289 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 186 13 first
+.delay_slot
+ 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9291 "10100011" // /* MW 5 */
+ 9292 "00001100" // /* MW 4 */
+ 9293 "11110000" // /* MW 3 */
+ 9294 "00101100" // /* MW 2 */
+ 9295 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 188 16 first
+.delay_slot
+ 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9297 "00000000" // /* MW 15 */
+ 9298 "00000000" // /* MW 14 */
+ 9299 "01111000" // /* MW 13 */
+ 9300 "10100101" // /* MW 12 */
+ 9301 "00000001" // /* MW 11 */
+ 9302 "00000000" // /* MW 10 */
+ 9303 "00000000" // /* MW 9 */
+ 9304 "10000000" // /* MW 8 */
+ 9305 "00010001" // /* MW 7 */
+ 9306 "00000110" // /* MW 6 */
+ 9307 "00100001" // /* MW 5 */
+ 9308 "00000000" // /* MW 4 */
+ 9309 "11110000" // /* MW 3 */
+ 9310 "00101100" // /* MW 2 */
+ 9311 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 193 47
+.src_ref 7 "superkernels.cpp" 195 2
+ 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9313 "00000000" // /* MW 15 */
+ 9314 "00000000" // /* MW 14 */
+ 9315 "00010000" // /* MW 13 */
+ 9316 "00101010" // /* MW 12 */
+ 9317 "10110010" // /* MW 11 */
+ 9318 "11110000" // /* MW 10 */
+ 9319 "00000001" // /* MW 9 */
+ 9320 "00000000" // /* MW 8 */
+ 9321 "10001011" // /* MW 7 */
+ 9322 "10000000" // /* MW 6 */
+ 9323 "00100010" // /* MW 5 */
+ 9324 "00000000" // /* MW 4 */
+ 9325 "11110000" // /* MW 3 */
+ 9326 "00101100" // /* MW 2 */
+ 9327 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9329 "00000000" // /* MW 7 */
+ 9330 "11000011" // /* MW 6 */
+ 9331 "10110011" // /* MW 5 */
+ 9332 "00000011" // /* MW 4 */
+ 9333 "01100000" // /* MW 3 */
+ 9334 "10010001" // /* MW 2 */
+ 9335 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9337 "00010000" // /* MW 9 */
+ 9338 "00100000" // /* MW 8 */
+ 9339 "00110010" // /* MW 7 */
+ 9340 "11110000" // /* MW 6 */
+ 9341 "00000001" // /* MW 5 */
+ 9342 "00000000" // /* MW 4 */
+ 9343 "11010000" // /* MW 3 */
+ 9344 "11101110" // /* MW 2 */
+ 9345 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9347 "00010110" // /* MW 3 */
+ 9348 "11111110" // /* MW 2 */
+ 9349 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9351 "00110110" // /* MW 3 */
+ 9352 "11111110" // /* MW 2 */
+ 9353 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9355 "01010110" // /* MW 3 */
+ 9356 "01000110" // /* MW 2 */
+ 9357 "00000111" // /* MW 1 */
+ 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9359 "00000000" // /* MW 1 */
+ 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9361 "00000000" // /* MW 1 */
+ 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9363 "00000000" // /* MW 1 */
+ 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9365 "00000000" // /* MW 1 */
+ 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9367 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9369 "00000010" // /* MW 3 */
+ 9370 "01100001" // /* MW 2 */
+ 9371 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9373 "00010001" // /* MW 3 */
+ 9374 "00000110" // /* MW 2 */
+ 9375 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9377 "11111101" // /* MW 3 */
+ 9378 "11100000" // /* MW 2 */
+ 9379 "00010111" // /* MW 1 */
+ 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9381 "00000000" // /* MW 1 */
+ 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9383 "00000000" // /* MW 1 */
+ 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9385 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9387 "00001000" // /* MW 3 */
+ 9388 "10010011" // /* MW 2 */
+ 9389 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+ 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9391 "10000001" // /* MW 5 */
+ 9392 "10101101" // /* MW 4 */
+ 9393 "10100111" // /* MW 3 */
+ 9394 "00000000" // /* MW 2 */
+ 9395 "00000100" // /* MW 1 */
+ 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9397 "00000000" // /* MW 1 */
+ 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9399 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+ 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9401 "00110110" // /* MW 3 */
+ 9402 "00000110" // /* MW 2 */
+ 9403 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9405 "10000001" // /* MW 5 */
+ 9406 "11011101" // /* MW 4 */
+ 9407 "11011100" // /* MW 3 */
+ 9408 "11001010" // /* MW 2 */
+ 9409 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 47 first
+ 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9411 "01110110" // /* MW 3 */
+ 9412 "00000110" // /* MW 2 */
+ 9413 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9415 "10011110" // /* MW 3 */
+ 9416 "01011100" // /* MW 2 */
+ 9417 "00000111" // /* MW 1 */
+ 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9419 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 195 2 first
+.no_stack_arguments
+ 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */
+ 9421 "00000001" // /* MW 5 */
+ 9422 "00000000" // /* MW 4 */
+ 9423 "01001000" // /* MW 3 */
+ 9424 "00010001" // /* MW 2 */
+ 9425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9427 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2 first
+.delay_slot
+ 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9429 "00000111" // /* MW 3 */
+ 9430 "01100010" // /* MW 2 */
+ 9431 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 192 2
+.delay_slot
+ 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9433 "00110001" // /* MW 3 */
+ 9434 "00000110" // /* MW 2 */
+ 9435 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45 first
+.delay_slot
+ 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9437 "00001101" // /* MW 3 */
+ 9438 "11100001" // /* MW 2 */
+ 9439 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 193 45
+.delay_slot
+ 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 9441 "00000000" // /* MW 15 */
+ 9442 "00000000" // /* MW 14 */
+ 9443 "10101000" // /* MW 13 */
+ 9444 "10100000" // /* MW 12 */
+ 9445 "00110100" // /* MW 11 */
+ 9446 "00000000" // /* MW 10 */
+ 9447 "00000000" // /* MW 9 */
+ 9448 "00000000" // /* MW 8 */
+ 9449 "01011011" // /* MW 7 */
+ 9450 "00000001" // /* MW 6 */
+ 9451 "00100000" // /* MW 5 */
+ 9452 "00000000" // /* MW 4 */
+ 9453 "11110000" // /* MW 3 */
+ 9454 "00101100" // /* MW 2 */
+ 9455 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+.src_ref 7 "superkernels.cpp" 198 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9457 "00010000" // /* MW 9 */
+ 9458 "00100000" // /* MW 8 */
+ 9459 "00110010" // /* MW 7 */
+ 9460 "11110011" // /* MW 6 */
+ 9461 "00000001" // /* MW 5 */
+ 9462 "00000000" // /* MW 4 */
+ 9463 "11010000" // /* MW 3 */
+ 9464 "11000110" // /* MW 2 */
+ 9465 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9467 "00000101" // /* MW 3 */
+ 9468 "00100000" // /* MW 2 */
+ 9469 "00010000" // /* MW 1 */
+ 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9471 "00000000" // /* MW 1 */
+ 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9473 "00000000" // /* MW 1 */
+ 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9475 "00000000" // /* MW 1 */
+ 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9477 "00000000" // /* MW 1 */
+ 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9479 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9481 "00001000" // /* MW 3 */
+ 9482 "01010001" // /* MW 2 */
+ 9483 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9485 "00010000" // /* MW 9 */
+ 9486 "00110000" // /* MW 8 */
+ 9487 "00110010" // /* MW 7 */
+ 9488 "11110001" // /* MW 6 */
+ 9489 "00000001" // /* MW 5 */
+ 9490 "00000000" // /* MW 4 */
+ 9491 "11010000" // /* MW 3 */
+ 9492 "11001110" // /* MW 2 */
+ 9493 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6 first
+ 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9495 "00110110" // /* MW 3 */
+ 9496 "00000110" // /* MW 2 */
+ 9497 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 19
+ 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9499 "01010110" // /* MW 3 */
+ 9500 "00000110" // /* MW 2 */
+ 9501 "00000010" // /* MW 1 */
+ 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9503 "00000000" // /* MW 1 */
+ 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9505 "00000000" // /* MW 1 */
+ 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9507 "00000000" // /* MW 1 */
+ 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9509 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9511 "00110001" // /* MW 3 */
+ 9512 "00100001" // /* MW 2 */
+ 9513 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9515 "00010001" // /* MW 3 */
+ 9516 "11100110" // /* MW 2 */
+ 9517 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 16 first
+ 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9519 "00101000" // /* MW 3 */
+ 9520 "01100001" // /* MW 2 */
+ 9521 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 197 6
+ 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */
+ 9523 "00000001" // /* MW 5 */
+ 9524 "01000000" // /* MW 4 */
+ 9525 "10101000" // /* MW 3 */
+ 9526 "00010010" // /* MW 2 */
+ 9527 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9529 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9531 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9533 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9535 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9537 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14
+ 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9539 "00000001" // /* MW 3 */
+ 9540 "00100000" // /* MW 2 */
+ 9541 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 198 14 first
+ 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9543 "00000000" // /* MW 9 */
+ 9544 "00000000" // /* MW 8 */
+ 9545 "00000000" // /* MW 7 */
+ 9546 "10000000" // /* MW 6 */
+ 9547 "00010001" // /* MW 5 */
+ 9548 "00000110" // /* MW 4 */
+ 9549 "11110110" // /* MW 3 */
+ 9550 "00101100" // /* MW 2 */
+ 9551 "00000000" // /* MW 1 */
+.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 200
+ 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9553 "00111001" // /* MW 3 */
+ 9554 "11110100" // /* MW 2 */
+ 9555 "00000111" // /* MW 1 */
+ 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9557 "00011001" // /* MW 3 */
+ 9558 "11111011" // /* MW 2 */
+ 9559 "00000111" // /* MW 1 */
+ 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9561 "00000000" // /* MW 1 */
+ 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9563 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9565 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9567 "11110001" // /* MW 3 */
+ 9568 "11111101" // /* MW 2 */
+ 9569 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9571 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9573 "00000000" // /* MW 3 */
+ 9574 "00101000" // /* MW 2 */
+ 9575 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9577 "10100000" // /* MW 3 */
+ 9578 "01100111" // /* MW 2 */
+ 9579 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 200
+.delay_slot
+ 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9581 "00000001" // /* MW 5 */
+ 9582 "00000000" // /* MW 4 */
+ 9583 "00000000" // /* MW 3 */
+ 9584 "11111000" // /* MW 2 */
+ 9585 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9587 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9589 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 9591 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 205 first
+.src_ref 3 "elementwise_binary_shared.h" 211 24 first
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.function_start
+ 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9601 "01011000" // /* MW 9 */
+ 9602 "00000000" // /* MW 8 */
+ 9603 "00001000" // /* MW 7 */
+ 9604 "00001011" // /* MW 6 */
+ 9605 "00100000" // /* MW 5 */
+ 9606 "00001000" // /* MW 4 */
+ 9607 "11010000" // /* MW 3 */
+ 9608 "10000101" // /* MW 2 */
+ 9609 "00100011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+ 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9611 "00000001" // /* MW 3 */
+ 9612 "10000000" // /* MW 2 */
+ 9613 "00010111" // /* MW 1 */
+ 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9615 "00000000" // /* MW 1 */
+ 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9617 "00000000" // /* MW 1 */
+ 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9619 "00000000" // /* MW 1 */
+ 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9621 "00000000" // /* MW 1 */
+ 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9623 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 211 22 first
+ 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9625 "00101001" // /* MW 3 */
+ 9626 "00011100" // /* MW 2 */
+ 9627 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 24 first
+ 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9629 "00101110" // /* MW 3 */
+ 9630 "00011100" // /* MW 2 */
+ 9631 "00000001" // /* MW 1 */
+ 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9633 "00000000" // /* MW 1 */
+ 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9635 "00000000" // /* MW 1 */
+ 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9637 "00000000" // /* MW 1 */
+ 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9639 "00000000" // /* MW 1 */
+ 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9641 "00000000" // /* MW 1 */
+ 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9643 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 22
+ 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9645 "00101001" // /* MW 3 */
+ 9646 "00011100" // /* MW 2 */
+ 9647 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 24 first
+ 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9649 "00101110" // /* MW 3 */
+ 9650 "00000100" // /* MW 2 */
+ 9651 "00000001" // /* MW 1 */
+ 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9653 "00000000" // /* MW 1 */
+ 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9655 "00000000" // /* MW 1 */
+ 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9657 "00000000" // /* MW 1 */
+ 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9659 "00000000" // /* MW 1 */
+ 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9661 "00000000" // /* MW 1 */
+ 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9663 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 22
+ 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9665 "00101001" // /* MW 3 */
+ 9666 "00011100" // /* MW 2 */
+ 9667 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 24 first
+ 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9669 "01110110" // /* MW 3 */
+ 9670 "00010100" // /* MW 2 */
+ 9671 "00000001" // /* MW 1 */
+ 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9673 "00000000" // /* MW 1 */
+ 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9675 "00000000" // /* MW 1 */
+ 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9677 "00000000" // /* MW 1 */
+ 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9679 "00000000" // /* MW 1 */
+ 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9681 "00000000" // /* MW 1 */
+ 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9683 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 22
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9685 "01110001" // /* MW 3 */
+ 9686 "01001100" // /* MW 2 */
+ 9687 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 34 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9689 "00010111" // /* MW 3 */
+ 9690 "00000100" // /* MW 2 */
+ 9691 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 217 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9693 "00000000" // /* MW 3 */
+ 9694 "00101000" // /* MW 2 */
+ 9695 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9697 "00000000" // /* MW 5 */
+ 9698 "10111110" // /* MW 4 */
+ 9699 "11110000" // /* MW 3 */
+ 9700 "00000000" // /* MW 2 */
+ 9701 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9703 "00010100" // /* MW 3 */
+ 9704 "11000010" // /* MW 2 */
+ 9705 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9707 "00100111" // /* MW 3 */
+ 9708 "01110110" // /* MW 2 */
+ 9709 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9711 "10000010" // /* MW 3 */
+ 9712 "00000001" // /* MW 2 */
+ 9713 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9715 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 219
+.src_ref 3 "elementwise_binary_shared.h" 219 first
+.function_start
+ 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9729 "00000001" // /* MW 5 */
+ 9730 "00000000" // /* MW 4 */
+ 9731 "00000000" // /* MW 3 */
+ 9732 "00001000" // /* MW 2 */
+ 9733 "00000000" // /* MW 1 */
+ 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9735 "00111101" // /* MW 3 */
+ 9736 "11111000" // /* MW 2 */
+ 9737 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8 first
+.no_stack_arguments
+ 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */
+ 9739 "00000001" // /* MW 5 */
+ 9740 "00000000" // /* MW 4 */
+ 9741 "11000000" // /* MW 3 */
+ 9742 "00010010" // /* MW 2 */
+ 9743 "00000000" // /* MW 1 */
+.delay_slot
+ 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9745 "10011101" // /* MW 3 */
+ 9746 "11111111" // /* MW 2 */
+ 9747 "00001111" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+ 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9749 "11000000" // /* MW 3 */
+ 9750 "01100000" // /* MW 2 */
+ 9751 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9753 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9755 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9757 "01100111" // /* MW 3 */
+ 9758 "00000001" // /* MW 2 */
+ 9759 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.return_address
+ 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9761 "00111001" // /* MW 3 */
+ 9762 "11111000" // /* MW 2 */
+ 9763 "00000111" // /* MW 1 */
+ 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9765 "00000000" // /* MW 1 */
+ 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9767 "00000000" // /* MW 1 */
+ 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9769 "00000000" // /* MW 1 */
+ 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9771 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9773 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9775 "10011001" // /* MW 3 */
+ 9776 "11111111" // /* MW 2 */
+ 9777 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 9779 "00000000" // /* MW 3 */
+ 9780 "00101000" // /* MW 2 */
+ 9781 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9783 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9785 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9787 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 193 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9789 "00001001" // /* MW 3 */
+ 9790 "00100000" // /* MW 2 */
+ 9791 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.src_ref 8 "mul_impl.h" 193 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9793 "01110001" // /* MW 9 */
+ 9794 "00000000" // /* MW 8 */
+ 9795 "00000000" // /* MW 7 */
+ 9796 "00000000" // /* MW 6 */
+ 9797 "11111110" // /* MW 5 */
+ 9798 "00111111" // /* MW 4 */
+ 9799 "00110000" // /* MW 3 */
+ 9800 "11000010" // /* MW 2 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 9801 "11101000" // /* MW 1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0
+.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE
+.src_ref 3 "elementwise_binary_shared.h" 107 first
+.src_ref 3 "elementwise_binary_shared.h" 119 37
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.function_start
+ 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9809 "11000000" // /* MW 3 */
+ 9810 "00010110" // /* MW 2 */
+ 9811 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+ 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9813 "00000111" // /* MW 3 */
+ 9814 "01100000" // /* MW 2 */
+ 9815 "00011011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 122 22 first
+ 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9817 "01010010" // /* MW 3 */
+ 9818 "00011100" // /* MW 2 */
+ 9819 "00000011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 15 first
+ 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9821 "10010110" // /* MW 3 */
+ 9822 "00000100" // /* MW 2 */
+ 9823 "00000011" // /* MW 1 */
+ 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9825 "00000000" // /* MW 1 */
+ 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9827 "00000000" // /* MW 1 */
+ 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9829 "00000000" // /* MW 1 */
+ 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9831 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9833 "00001001" // /* MW 3 */
+ 9834 "00000110" // /* MW 2 */
+ 9835 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 107
+ 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9837 "00000001" // /* MW 5 */
+ 9838 "00000000" // /* MW 4 */
+ 9839 "00000000" // /* MW 3 */
+ 9840 "00010000" // /* MW 2 */
+ 9841 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 124 26
+ 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9843 "01001100" // /* MW 3 */
+ 9844 "11000110" // /* MW 2 */
+ 9845 "00010000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25
+.src_ref 3 "elementwise_binary_shared.h" 124 8
+ 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */
+ 9847 "01100000" // /* MW 9 */
+ 9848 "00000000" // /* MW 8 */
+ 9849 "00010000" // /* MW 7 */
+ 9850 "11100010" // /* MW 6 */
+ 9851 "00000100" // /* MW 5 */
+ 9852 "00000110" // /* MW 4 */
+ 9853 "00000000" // /* MW 3 */
+ 9854 "00000001" // /* MW 2 */
+ 9855 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 65 25 first
+.delay_slot
+ 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9857 "01110010" // /* MW 3 */
+ 9858 "00000101" // /* MW 2 */
+ 9859 "00011000" // /* MW 1 */
+.delay_slot
+ 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9861 "11000000" // /* MW 3 */
+ 9862 "01011110" // /* MW 2 */
+ 9863 "00011000" // /* MW 1 */
+.delay_slot
+ 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9865 "11100000" // /* MW 3 */
+ 9866 "01100101" // /* MW 2 */
+ 9867 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9869 "10000001" // /* MW 5 */
+ 9870 "11011101" // /* MW 4 */
+ 9871 "00001010" // /* MW 3 */
+ 9872 "11110010" // /* MW 2 */
+ 9873 "11111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 119 37 first
+.delay_slot
+ 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9875 "00010011" // /* MW 3 */
+ 9876 "00000100" // /* MW 2 */
+ 9877 "00001111" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 126 34
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+.src_ref 3 "elementwise_binary_shared.h" 131 19
+ 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 9879 "01110010" // /* MW 9 */
+ 9880 "10111001" // /* MW 8 */
+ 9881 "00000100" // /* MW 7 */
+ 9882 "00000000" // /* MW 6 */
+ 9883 "00001011" // /* MW 5 */
+ 9884 "10000000" // /* MW 4 */
+ 9885 "10000100" // /* MW 3 */
+ 9886 "10000010" // /* MW 2 */
+ 9887 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 126 34 first
+.src_ref 3 "elementwise_binary_shared.h" 131 19 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9889 "00000001" // /* MW 5 */
+ 9890 "00000001" // /* MW 4 */
+ 9891 "01010100" // /* MW 3 */
+ 9892 "00000001" // /* MW 2 */
+ 9893 "10000000" // /* MW 1 */
+ 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9895 "00000000" // /* MW 1 */
+ 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9897 "00000000" // /* MW 1 */
+ 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9899 "00000000" // /* MW 1 */
+ 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9901 "00000000" // /* MW 1 */
+ 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9903 "00000000" // /* MW 1 */
+ 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9905 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 131 12
+.src_ref 3 "elementwise_binary_shared.h" 131 35
+ 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */
+ 9907 "00000001" // /* MW 5 */
+ 9908 "01000000" // /* MW 4 */
+ 9909 "01110000" // /* MW 3 */
+ 9910 "00010011" // /* MW 2 */
+ 9911 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9913 "00000000" // /* MW 3 */
+ 9914 "00000000" // /* MW 2 */
+ 9915 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9917 "11010000" // /* MW 5 */
+ 9918 "11001000" // /* MW 4 */
+ 9919 "11001000" // /* MW 3 */
+ 9920 "00000111" // /* MW 2 */
+ 9921 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9927 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */
+ 9929 "00100000" // /* MW 9 */
+ 9930 "00000000" // /* MW 8 */
+ 9931 "00000000" // /* MW 7 */
+ 9932 "11011110" // /* MW 6 */
+ 9933 "00000100" // /* MW 5 */
+ 9934 "00000000" // /* MW 4 */
+ 9935 "10000000" // /* MW 3 */
+ 9936 "00000100" // /* MW 2 */
+ 9937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9945 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.delay_slot
+ 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 9947 "00100110" // /* MW 5 */
+ 9948 "00001000" // /* MW 4 */
+ 9949 "11110000" // /* MW 3 */
+ 9950 "00101100" // /* MW 2 */
+ 9951 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+ 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 9953 "10000000" // /* MW 3 */
+ 9954 "00000000" // /* MW 2 */
+ 9955 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+ 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9957 "01010000" // /* MW 11 */
+ 9958 "00000000" // /* MW 10 */
+ 9959 "00000000" // /* MW 9 */
+ 9960 "00000001" // /* MW 8 */
+ 9961 "00010011" // /* MW 7 */
+ 9962 "00000100" // /* MW 6 */
+ 9963 "00100001" // /* MW 5 */
+ 9964 "00000000" // /* MW 4 */
+ 9965 "11110000" // /* MW 3 */
+ 9966 "00101100" // /* MW 2 */
+ 9967 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160
+ 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */
+ 9969 "00000000" // /* MW 5 */
+ 9970 "00000000" // /* MW 4 */
+ 9971 "11001000" // /* MW 3 */
+ 9972 "00010011" // /* MW 2 */
+ 9973 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.delay_slot
+ 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 9975 "01110000" // /* MW 7 */
+ 9976 "01100000" // /* MW 6 */
+ 9977 "10110000" // /* MW 5 */
+ 9978 "00000011" // /* MW 4 */
+ 9979 "01100000" // /* MW 3 */
+ 9980 "10010001" // /* MW 2 */
+ 9981 "00010011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9983 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9985 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 9987 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 9989 "10000001" // /* MW 11 */
+ 9990 "10101101" // /* MW 10 */
+ 9991 "00000000" // /* MW 9 */
+ 9992 "00000000" // /* MW 8 */
+ 9993 "00000000" // /* MW 7 */
+ 9994 "00000000" // /* MW 6 */
+ 9995 "00100000" // /* MW 5 */
+ 9996 "00000000" // /* MW 4 */
+ 9997 "11110000" // /* MW 3 */
+ 9998 "00101100" // /* MW 2 */
+ 9999 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192
+.src_ref 3 "elementwise_binary_shared.h" 150 97
+ 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10001 "00001101" // /* MW 3 */
+ 10002 "00000100" // /* MW 2 */
+ 10003 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 97 first
+ 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10005 "01000111" // /* MW 3 */
+ 10006 "10000100" // /* MW 2 */
+ 10007 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */
+ 10009 "00000001" // /* MW 5 */
+ 10010 "01000000" // /* MW 4 */
+ 10011 "10100000" // /* MW 3 */
+ 10012 "00010011" // /* MW 2 */
+ 10013 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.delay_slot
+ 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10015 "00000000" // /* MW 5 */
+ 10016 "00100000" // /* MW 4 */
+ 10017 "00000000" // /* MW 3 */
+ 10018 "10000000" // /* MW 2 */
+ 10019 "00111111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.delay_slot
+ 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10021 "11010000" // /* MW 5 */
+ 10022 "11001000" // /* MW 4 */
+ 10023 "11001000" // /* MW 3 */
+ 10024 "00000111" // /* MW 2 */
+ 10025 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10027 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10029 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10031 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10033 "00000000" // /* MW 15 */
+ 10034 "00000000" // /* MW 14 */
+ 10035 "00010000" // /* MW 13 */
+ 10036 "00000000" // /* MW 12 */
+ 10037 "00001000" // /* MW 11 */
+ 10038 "00000000" // /* MW 10 */
+ 10039 "11100000" // /* MW 9 */
+ 10040 "00101111" // /* MW 8 */
+ 10041 "01011011" // /* MW 7 */
+ 10042 "00000001" // /* MW 6 */
+ 10043 "00100000" // /* MW 5 */
+ 10044 "00000000" // /* MW 4 */
+ 10045 "11110000" // /* MW 3 */
+ 10046 "00101100" // /* MW 2 */
+ 10047 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10049 "01011000" // /* MW 9 */
+ 10050 "10111110" // /* MW 8 */
+ 10051 "01000111" // /* MW 7 */
+ 10052 "00000000" // /* MW 6 */
+ 10053 "11010010" // /* MW 5 */
+ 10054 "00000010" // /* MW 4 */
+ 10055 "01010000" // /* MW 3 */
+ 10056 "10000000" // /* MW 2 */
+ 10057 "10000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 3 "elementwise_binary_shared.h" 173 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10059 "10000000" // /* MW 3 */
+ 10060 "00000000" // /* MW 2 */
+ 10061 "00011000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 169 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10063 "00000000" // /* MW 3 */
+ 10064 "00000000" // /* MW 2 */
+ 10065 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 171 16
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10067 "10000000" // /* MW 3 */
+ 10068 "00000000" // /* MW 2 */
+ 10069 "00011010" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10071 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10073 "00010001" // /* MW 3 */
+ 10074 "00000000" // /* MW 2 */
+ 10075 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10077 "00100101" // /* MW 5 */
+ 10078 "00000001" // /* MW 4 */
+ 10079 "11100010" // /* MW 3 */
+ 10080 "00000010" // /* MW 2 */
+ 10081 "10100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10083 "10000000" // /* MW 3 */
+ 10084 "00111010" // /* MW 2 */
+ 10085 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10087 "10010110" // /* MW 3 */
+ 10088 "01000000" // /* MW 2 */
+ 10089 "00001000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10091 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10093 "00000001" // /* MW 3 */
+ 10094 "00000001" // /* MW 2 */
+ 10095 "00011000" // /* MW 1 */
+ 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10097 "00000000" // /* MW 1 */
+ 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10099 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 150 78
+ 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10101 "00010010" // /* MW 3 */
+ 10102 "00000000" // /* MW 2 */
+ 10103 "00000101" // /* MW 1 */
+ 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10105 "00000000" // /* MW 1 */
+ 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10107 "00000000" // /* MW 1 */
+ 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10109 "00000000" // /* MW 1 */
+ 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10111 "00000000" // /* MW 1 */
+ 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10113 "00000000" // /* MW 1 */
+ 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10115 "00000000" // /* MW 1 */
+.src_ref 4 "broadcast.hpp" 56 25 first
+ 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10117 "01110010" // /* MW 3 */
+ 10118 "00000001" // /* MW 2 */
+ 10119 "00011000" // /* MW 1 */
+ 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10121 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+ 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10123 "01100110" // /* MW 5 */
+ 10124 "11111000" // /* MW 4 */
+ 10125 "11111111" // /* MW 3 */
+ 10126 "00101100" // /* MW 2 */
+ 10127 "00000000" // /* MW 1 */
+.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 166 4 first
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+ 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10129 "00010000" // /* MW 11 */
+ 10130 "00000000" // /* MW 10 */
+ 10131 "01111100" // /* MW 9 */
+ 10132 "00001000" // /* MW 8 */
+ 10133 "00000000" // /* MW 7 */
+ 10134 "00000000" // /* MW 6 */
+ 10135 "11101000" // /* MW 5 */
+ 10136 "01010000" // /* MW 4 */
+ 10137 "11011110" // /* MW 3 */
+ 10138 "10001010" // /* MW 2 */
+ 10139 "01111000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 166 31
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10141 "00010000" // /* MW 11 */
+ 10142 "00011000" // /* MW 10 */
+ 10143 "10111100" // /* MW 9 */
+ 10144 "00001001" // /* MW 8 */
+ 10145 "00000000" // /* MW 7 */
+ 10146 "00000000" // /* MW 6 */
+ 10147 "01101000" // /* MW 5 */
+ 10148 "10010000" // /* MW 4 */
+ 10149 "00000010" // /* MW 3 */
+ 10150 "01100011" // /* MW 2 */
+ 10151 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 177 44
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10153 "11110001" // /* MW 7 */
+ 10154 "00000000" // /* MW 6 */
+ 10155 "11101000" // /* MW 5 */
+ 10156 "01010000" // /* MW 4 */
+ 10157 "01111110" // /* MW 3 */
+ 10158 "00000101" // /* MW 2 */
+ 10159 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10161 "01101000" // /* MW 5 */
+ 10162 "10010000" // /* MW 4 */
+ 10163 "01010010" // /* MW 3 */
+ 10164 "10010000" // /* MW 2 */
+ 10165 "10000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10167 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10169 "00101011" // /* MW 3 */
+ 10170 "00001000" // /* MW 2 */
+ 10171 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10173 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 31 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10175 "00111101" // /* MW 3 */
+ 10176 "10000100" // /* MW 2 */
+ 10177 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 166 4
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10179 "00000001" // /* MW 7 */
+ 10180 "00000010" // /* MW 6 */
+ 10181 "00000001" // /* MW 5 */
+ 10182 "10000110" // /* MW 4 */
+ 10183 "01111110" // /* MW 3 */
+ 10184 "01110001" // /* MW 2 */
+ 10185 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10187 "11101000" // /* MW 5 */
+ 10188 "01010000" // /* MW 4 */
+ 10189 "01111110" // /* MW 3 */
+ 10190 "00000011" // /* MW 2 */
+ 10191 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10193 "00000000" // /* MW 15 */
+ 10194 "00000000" // /* MW 14 */
+ 10195 "01111000" // /* MW 13 */
+ 10196 "10100101" // /* MW 12 */
+ 10197 "00000001" // /* MW 11 */
+ 10198 "00000000" // /* MW 10 */
+ 10199 "11010100" // /* MW 9 */
+ 10200 "00001001" // /* MW 8 */
+ 10201 "01011011" // /* MW 7 */
+ 10202 "00000001" // /* MW 6 */
+ 10203 "00100000" // /* MW 5 */
+ 10204 "00000000" // /* MW 4 */
+ 10205 "01110000" // /* MW 3 */
+ 10206 "00000101" // /* MW 2 */
+ 10207 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10209 "00000000" // /* MW 15 */
+ 10210 "00000000" // /* MW 14 */
+ 10211 "01111000" // /* MW 13 */
+ 10212 "10100101" // /* MW 12 */
+ 10213 "00000001" // /* MW 11 */
+ 10214 "00000000" // /* MW 10 */
+ 10215 "00000000" // /* MW 9 */
+ 10216 "00000000" // /* MW 8 */
+ 10217 "01011011" // /* MW 7 */
+ 10218 "00000001" // /* MW 6 */
+ 10219 "00100000" // /* MW 5 */
+ 10220 "00000000" // /* MW 4 */
+ 10221 "11110000" // /* MW 3 */
+ 10222 "00101100" // /* MW 2 */
+ 10223 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10225 "00010000" // /* MW 15 */
+ 10226 "00001000" // /* MW 14 */
+ 10227 "01111000" // /* MW 13 */
+ 10228 "10100101" // /* MW 12 */
+ 10229 "00000001" // /* MW 11 */
+ 10230 "00000000" // /* MW 10 */
+ 10231 "00000000" // /* MW 9 */
+ 10232 "00000000" // /* MW 8 */
+ 10233 "01011011" // /* MW 7 */
+ 10234 "00000001" // /* MW 6 */
+ 10235 "00100000" // /* MW 5 */
+ 10236 "00000000" // /* MW 4 */
+ 10237 "11110000" // /* MW 3 */
+ 10238 "00101100" // /* MW 2 */
+ 10239 "00000000" // /* MW 1 */
+.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary_shared.h" 169 16 first
+.src_ref 3 "elementwise_binary_shared.h" 171 16 first
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10241 "00000000" // /* MW 15 */
+ 10242 "00000000" // /* MW 14 */
+ 10243 "01111000" // /* MW 13 */
+ 10244 "10100101" // /* MW 12 */
+ 10245 "00000001" // /* MW 11 */
+ 10246 "00000000" // /* MW 10 */
+ 10247 "00000000" // /* MW 9 */
+ 10248 "00000000" // /* MW 8 */
+ 10249 "01011011" // /* MW 7 */
+ 10250 "00000001" // /* MW 6 */
+ 10251 "11101000" // /* MW 5 */
+ 10252 "01010000" // /* MW 4 */
+ 10253 "01111110" // /* MW 3 */
+ 10254 "00000011" // /* MW 2 */
+ 10255 "00101001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 173 18 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10257 "00000000" // /* MW 15 */
+ 10258 "00000000" // /* MW 14 */
+ 10259 "01111000" // /* MW 13 */
+ 10260 "10100101" // /* MW 12 */
+ 10261 "00000001" // /* MW 11 */
+ 10262 "00000000" // /* MW 10 */
+ 10263 "00000000" // /* MW 9 */
+ 10264 "00000000" // /* MW 8 */
+ 10265 "10100011" // /* MW 7 */
+ 10266 "00011100" // /* MW 6 */
+ 10267 "00100010" // /* MW 5 */
+ 10268 "00000000" // /* MW 4 */
+ 10269 "01110000" // /* MW 3 */
+ 10270 "00000101" // /* MW 2 */
+ 10271 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10273 "00000000" // /* MW 15 */
+ 10274 "00000000" // /* MW 14 */
+ 10275 "01111000" // /* MW 13 */
+ 10276 "10100101" // /* MW 12 */
+ 10277 "00000001" // /* MW 11 */
+ 10278 "00000000" // /* MW 10 */
+ 10279 "00000000" // /* MW 9 */
+ 10280 "00000000" // /* MW 8 */
+ 10281 "01011011" // /* MW 7 */
+ 10282 "00000001" // /* MW 6 */
+ 10283 "00100000" // /* MW 5 */
+ 10284 "00000000" // /* MW 4 */
+ 10285 "11110000" // /* MW 3 */
+ 10286 "00101100" // /* MW 2 */
+ 10287 "00000000" // /* MW 1 */
+.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10289 "00010000" // /* MW 15 */
+ 10290 "00001000" // /* MW 14 */
+ 10291 "01111000" // /* MW 13 */
+ 10292 "10100101" // /* MW 12 */
+ 10293 "00000001" // /* MW 11 */
+ 10294 "00000000" // /* MW 10 */
+ 10295 "00000000" // /* MW 9 */
+ 10296 "00000000" // /* MW 8 */
+ 10297 "01011011" // /* MW 7 */
+ 10298 "00000001" // /* MW 6 */
+ 10299 "00100000" // /* MW 5 */
+ 10300 "00000000" // /* MW 4 */
+ 10301 "11110000" // /* MW 3 */
+ 10302 "00101100" // /* MW 2 */
+ 10303 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10305 "00000001" // /* MW 5 */
+ 10306 "00000000" // /* MW 4 */
+ 10307 "00000000" // /* MW 3 */
+ 10308 "11110000" // /* MW 2 */
+ 10309 "11111111" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10311 "10100011" // /* MW 3 */
+ 10312 "00011100" // /* MW 2 */
+ 10313 "00001010" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10315 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 177 44 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10317 "00000001" // /* MW 3 */
+ 10318 "00000010" // /* MW 2 */
+ 10319 "00000001" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10321 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 187 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10323 "00000000" // /* MW 3 */
+ 10324 "00101000" // /* MW 2 */
+ 10325 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary_shared.h" 185 18 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10327 "10100011" // /* MW 3 */
+ 10328 "00011100" // /* MW 2 */
+ 10329 "00001010" // /* MW 1 */
+.delay_slot
+ 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10331 "10100000" // /* MW 3 */
+ 10332 "01100000" // /* MW 2 */
+ 10333 "00011111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10335 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary_shared.h" 185 18
+.delay_slot
+ 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10337 "10100011" // /* MW 3 */
+ 10338 "00011100" // /* MW 2 */
+ 10339 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end
+.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0
+ 10341 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 3 "elementwise_binary_shared.h" 237 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.function_start
+ 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10353 "01110010" // /* MW 9 */
+ 10354 "11110000" // /* MW 8 */
+ 10355 "01100000" // /* MW 7 */
+ 10356 "00000000" // /* MW 6 */
+ 10357 "10001011" // /* MW 5 */
+ 10358 "10001000" // /* MW 4 */
+ 10359 "10000011" // /* MW 3 */
+ 10360 "10000010" // /* MW 2 */
+ 10361 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13 first
+.src_ref 3 "elementwise_binary_shared.h" 244 19 first
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 247 12
+ 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10363 "10000001" // /* MW 5 */
+ 10364 "11000101" // /* MW 4 */
+ 10365 "01010100" // /* MW 3 */
+ 10366 "00000001" // /* MW 2 */
+ 10367 "01000000" // /* MW 1 */
+ 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10369 "00000000" // /* MW 1 */
+ 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10371 "00000000" // /* MW 1 */
+ 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10373 "00000000" // /* MW 1 */
+ 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10375 "00000000" // /* MW 1 */
+ 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10377 "00000000" // /* MW 1 */
+ 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10379 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 244 12
+.src_ref 3 "elementwise_binary_shared.h" 244 35
+ 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */
+ 10381 "00000001" // /* MW 5 */
+ 10382 "00000000" // /* MW 4 */
+ 10383 "01101000" // /* MW 3 */
+ 10384 "00010100" // /* MW 2 */
+ 10385 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 237
+.delay_slot
+ 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10387 "00000001" // /* MW 5 */
+ 10388 "00000000" // /* MW 4 */
+ 10389 "00000000" // /* MW 3 */
+ 10390 "00001000" // /* MW 2 */
+ 10391 "00000000" // /* MW 1 */
+.delay_slot
+ 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10393 "11100000" // /* MW 3 */
+ 10394 "01010101" // /* MW 2 */
+ 10395 "00011000" // /* MW 1 */
+.delay_slot
+ 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10397 "11100000" // /* MW 3 */
+ 10398 "01100000" // /* MW 2 */
+ 10399 "00011001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 538 13
+.src_ref 4 "vector_native_types.hpp" 374 137 first
+.delay_slot
+ 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10401 "00101011" // /* MW 3 */
+ 10402 "00000111" // /* MW 2 */
+ 10403 "00001001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10405 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 247 12 first
+.no_stack_arguments
+ 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10407 "00000001" // /* MW 5 */
+ 10408 "00000000" // /* MW 4 */
+ 10409 "00101000" // /* MW 3 */
+ 10410 "00010011" // /* MW 2 */
+ 10411 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10413 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10419 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10421 "10000001" // /* MW 11 */
+ 10422 "10101101" // /* MW 10 */
+ 10423 "00000000" // /* MW 9 */
+ 10424 "00000000" // /* MW 8 */
+ 10425 "00000000" // /* MW 7 */
+ 10426 "00000000" // /* MW 6 */
+ 10427 "00100000" // /* MW 5 */
+ 10428 "00000000" // /* MW 4 */
+ 10429 "11110000" // /* MW 3 */
+ 10430 "00101100" // /* MW 2 */
+ 10431 "00000000" // /* MW 1 */
+.return_address
+ 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */
+ 10433 "00000000" // /* MW 5 */
+ 10434 "00000000" // /* MW 4 */
+ 10435 "01111000" // /* MW 3 */
+ 10436 "00010100" // /* MW 2 */
+ 10437 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10439 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10441 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10443 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10447 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96
+.src_ref 3 "elementwise_binary_shared.h" 245 12 first
+.no_stack_arguments
+ 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 10449 "00000001" // /* MW 5 */
+ 10450 "00000000" // /* MW 4 */
+ 10451 "00101000" // /* MW 3 */
+ 10452 "00010011" // /* MW 2 */
+ 10453 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.src_ref 3 "elementwise_binary_shared.h" 245 12
+.delay_slot
+ 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10455 "01110000" // /* MW 7 */
+ 10456 "01100000" // /* MW 6 */
+ 10457 "10110000" // /* MW 5 */
+ 10458 "00000000" // /* MW 4 */
+ 10459 "01100000" // /* MW 3 */
+ 10460 "10010001" // /* MW 2 */
+ 10461 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10463 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10465 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10467 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 10469 "10000001" // /* MW 11 */
+ 10470 "10101101" // /* MW 10 */
+ 10471 "00000000" // /* MW 9 */
+ 10472 "00000000" // /* MW 8 */
+ 10473 "00000000" // /* MW 7 */
+ 10474 "00000000" // /* MW 6 */
+ 10475 "00100000" // /* MW 5 */
+ 10476 "00000000" // /* MW 4 */
+ 10477 "11110000" // /* MW 3 */
+ 10478 "00101100" // /* MW 2 */
+ 10479 "00000000" // /* MW 1 */
+.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.return_address
+ 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10481 "10000000" // /* MW 3 */
+ 10482 "01110001" // /* MW 2 */
+ 10483 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4 first
+ 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10485 "00000000" // /* MW 3 */
+ 10486 "00101000" // /* MW 2 */
+ 10487 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 250 4
+.delay_slot
+ 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10489 "00000001" // /* MW 5 */
+ 10490 "00000000" // /* MW 4 */
+ 10491 "00000000" // /* MW 3 */
+ 10492 "11111000" // /* MW 2 */
+ 10493 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10495 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10497 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10499 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0
+ 10501 "00000000" // /* MW 1 */
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 205 first
+.src_ref 7 "superkernels.cpp" 210 6
+.function_start
+ 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10513 "10000000" // /* MW 5 */
+ 10514 "11001000" // /* MW 4 */
+ 10515 "11000110" // /* MW 3 */
+ 10516 "00000111" // /* MW 2 */
+ 10517 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6 first
+ 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10519 "11000001" // /* MW 5 */
+ 10520 "10110101" // /* MW 4 */
+ 10521 "11011000" // /* MW 3 */
+ 10522 "11000010" // /* MW 2 */
+ 10523 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 205
+ 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10525 "00000001" // /* MW 5 */
+ 10526 "00000000" // /* MW 4 */
+ 10527 "00000000" // /* MW 3 */
+ 10528 "00001000" // /* MW 2 */
+ 10529 "00000000" // /* MW 1 */
+ 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10531 "01110000" // /* MW 7 */
+ 10532 "11010000" // /* MW 6 */
+ 10533 "00001011" // /* MW 5 */
+ 10534 "00000000" // /* MW 4 */
+ 10535 "10110000" // /* MW 3 */
+ 10536 "01100011" // /* MW 2 */
+ 10537 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+ 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10539 "00010001" // /* MW 9 */
+ 10540 "00101000" // /* MW 8 */
+ 10541 "00110010" // /* MW 7 */
+ 10542 "11110011" // /* MW 6 */
+ 10543 "00000001" // /* MW 5 */
+ 10544 "00000000" // /* MW 4 */
+ 10545 "10110000" // /* MW 3 */
+ 10546 "10000010" // /* MW 2 */
+ 10547 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10549 "11000000" // /* MW 3 */
+ 10550 "11010100" // /* MW 2 */
+ 10551 "00011011" // /* MW 1 */
+ 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10553 "00000000" // /* MW 1 */
+ 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10555 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 210 6
+.src_ref 7 "superkernels.cpp" 210 16
+ 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */
+ 10557 "00000001" // /* MW 5 */
+ 10558 "01000000" // /* MW 4 */
+ 10559 "11110000" // /* MW 3 */
+ 10560 "00010100" // /* MW 2 */
+ 10561 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 22 first
+.delay_slot
+ 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10563 "10010000" // /* MW 3 */
+ 10564 "01100010" // /* MW 2 */
+ 10565 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 30
+.delay_slot
+ 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10567 "11111011" // /* MW 3 */
+ 10568 "01100011" // /* MW 2 */
+ 10569 "00010100" // /* MW 1 */
+.delay_slot
+ 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10571 "00111101" // /* MW 3 */
+ 10572 "11110100" // /* MW 2 */
+ 10573 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 207 11
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10575 "01110000" // /* MW 7 */
+ 10576 "01100000" // /* MW 6 */
+ 10577 "00110000" // /* MW 5 */
+ 10578 "00000011" // /* MW 4 */
+ 10579 "00110000" // /* MW 3 */
+ 10580 "11000110" // /* MW 2 */
+ 10581 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4
+.src_ref 7 "superkernels.cpp" 224 2
+.delay_slot
+ 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10583 "00000000" // /* MW 5 */
+ 10584 "11001010" // /* MW 4 */
+ 10585 "11000000" // /* MW 3 */
+ 10586 "00000111" // /* MW 2 */
+ 10587 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10589 "11010000" // /* MW 5 */
+ 10590 "11001000" // /* MW 4 */
+ 10591 "11000100" // /* MW 3 */
+ 10592 "00000111" // /* MW 2 */
+ 10593 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10595 "00010000" // /* MW 9 */
+ 10596 "00110010" // /* MW 8 */
+ 10597 "00110010" // /* MW 7 */
+ 10598 "11110001" // /* MW 6 */
+ 10599 "00000001" // /* MW 5 */
+ 10600 "00000000" // /* MW 4 */
+ 10601 "11100000" // /* MW 3 */
+ 10602 "11000000" // /* MW 2 */
+ 10603 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10605 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 213 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */
+ 10607 "00000001" // /* MW 5 */
+ 10608 "00000000" // /* MW 4 */
+ 10609 "00000000" // /* MW 3 */
+ 10610 "00010011" // /* MW 2 */
+ 10611 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10613 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10615 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10617 "00110001" // /* MW 3 */
+ 10618 "00100000" // /* MW 2 */
+ 10619 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10621 "00000101" // /* MW 3 */
+ 10622 "00100000" // /* MW 2 */
+ 10623 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10625 "00000000" // /* MW 15 */
+ 10626 "00000000" // /* MW 14 */
+ 10627 "01111000" // /* MW 13 */
+ 10628 "10100101" // /* MW 12 */
+ 10629 "00000001" // /* MW 11 */
+ 10630 "00000000" // /* MW 10 */
+ 10631 "00000000" // /* MW 9 */
+ 10632 "10000000" // /* MW 8 */
+ 10633 "00010001" // /* MW 7 */
+ 10634 "00000110" // /* MW 6 */
+ 10635 "00100010" // /* MW 5 */
+ 10636 "00000000" // /* MW 4 */
+ 10637 "11110000" // /* MW 3 */
+ 10638 "00101100" // /* MW 2 */
+ 10639 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18
+.return_address
+ 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10641 "10100000" // /* MW 5 */
+ 10642 "11001000" // /* MW 4 */
+ 10643 "11000100" // /* MW 3 */
+ 10644 "00000111" // /* MW 2 */
+ 10645 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 18 first
+.src_ref 7 "superkernels.cpp" 217 65
+ 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10647 "00010000" // /* MW 9 */
+ 10648 "10000000" // /* MW 8 */
+ 10649 "00110010" // /* MW 7 */
+ 10650 "11110001" // /* MW 6 */
+ 10651 "00000001" // /* MW 5 */
+ 10652 "00000000" // /* MW 4 */
+ 10653 "11010000" // /* MW 3 */
+ 10654 "11000010" // /* MW 2 */
+ 10655 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51
+.src_ref 7 "superkernels.cpp" 217 65
+.src_ref 7 "superkernels.cpp" 224 2
+ 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10657 "00010000" // /* MW 9 */
+ 10658 "10000000" // /* MW 8 */
+ 10659 "00110010" // /* MW 7 */
+ 10660 "11110001" // /* MW 6 */
+ 10661 "00000001" // /* MW 5 */
+ 10662 "00000000" // /* MW 4 */
+ 10663 "11010000" // /* MW 3 */
+ 10664 "11000110" // /* MW 2 */
+ 10665 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 51 first
+.src_ref 7 "superkernels.cpp" 217 16
+.src_ref 7 "superkernels.cpp" 222 47
+ 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10667 "00010000" // /* MW 9 */
+ 10668 "00101010" // /* MW 8 */
+ 10669 "10110010" // /* MW 7 */
+ 10670 "11110000" // /* MW 6 */
+ 10671 "00000001" // /* MW 5 */
+ 10672 "00000000" // /* MW 4 */
+ 10673 "01010000" // /* MW 3 */
+ 10674 "11001011" // /* MW 2 */
+ 10675 "01001010" // /* MW 1 */
+ 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10677 "00000000" // /* MW 1 */
+ 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10679 "00000000" // /* MW 1 */
+ 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */
+ 10681 "00000000" // /* MW 5 */
+ 10682 "00000000" // /* MW 4 */
+ 10683 "11111000" // /* MW 3 */
+ 10684 "00010100" // /* MW 2 */
+ 10685 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13
+.delay_slot
+ 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10687 "11000000" // /* MW 5 */
+ 10688 "11001000" // /* MW 4 */
+ 10689 "11000000" // /* MW 3 */
+ 10690 "00000111" // /* MW 2 */
+ 10691 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10693 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 27 first
+.delay_slot
+ 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10695 "00001111" // /* MW 3 */
+ 10696 "01100001" // /* MW 2 */
+ 10697 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 215 13 first
+.delay_slot
+ 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10699 "10100011" // /* MW 5 */
+ 10700 "00001100" // /* MW 4 */
+ 10701 "11110000" // /* MW 3 */
+ 10702 "00101100" // /* MW 2 */
+ 10703 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 217 16 first
+.delay_slot
+ 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10705 "00000000" // /* MW 15 */
+ 10706 "00000000" // /* MW 14 */
+ 10707 "01111000" // /* MW 13 */
+ 10708 "10100101" // /* MW 12 */
+ 10709 "00000001" // /* MW 11 */
+ 10710 "00000000" // /* MW 10 */
+ 10711 "00000000" // /* MW 9 */
+ 10712 "10000000" // /* MW 8 */
+ 10713 "00010001" // /* MW 7 */
+ 10714 "00000110" // /* MW 6 */
+ 10715 "00100001" // /* MW 5 */
+ 10716 "00000000" // /* MW 4 */
+ 10717 "11110000" // /* MW 3 */
+ 10718 "00101100" // /* MW 2 */
+ 10719 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208
+.src_ref 7 "superkernels.cpp" 222 47
+.src_ref 7 "superkernels.cpp" 224 2
+ 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10721 "00000000" // /* MW 15 */
+ 10722 "00000000" // /* MW 14 */
+ 10723 "00010000" // /* MW 13 */
+ 10724 "00101010" // /* MW 12 */
+ 10725 "10110010" // /* MW 11 */
+ 10726 "11110000" // /* MW 10 */
+ 10727 "00000001" // /* MW 9 */
+ 10728 "00000000" // /* MW 8 */
+ 10729 "10001011" // /* MW 7 */
+ 10730 "10000000" // /* MW 6 */
+ 10731 "00100010" // /* MW 5 */
+ 10732 "00000000" // /* MW 4 */
+ 10733 "11110000" // /* MW 3 */
+ 10734 "00101100" // /* MW 2 */
+ 10735 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 10737 "00000000" // /* MW 7 */
+ 10738 "11000011" // /* MW 6 */
+ 10739 "10110011" // /* MW 5 */
+ 10740 "00000011" // /* MW 4 */
+ 10741 "01100000" // /* MW 3 */
+ 10742 "10010001" // /* MW 2 */
+ 10743 "01110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10745 "00010000" // /* MW 9 */
+ 10746 "00100000" // /* MW 8 */
+ 10747 "00110010" // /* MW 7 */
+ 10748 "11110000" // /* MW 6 */
+ 10749 "00000001" // /* MW 5 */
+ 10750 "00000000" // /* MW 4 */
+ 10751 "11010000" // /* MW 3 */
+ 10752 "11101110" // /* MW 2 */
+ 10753 "11111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10755 "00010110" // /* MW 3 */
+ 10756 "11111110" // /* MW 2 */
+ 10757 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10759 "00110110" // /* MW 3 */
+ 10760 "11111110" // /* MW 2 */
+ 10761 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10763 "01010110" // /* MW 3 */
+ 10764 "01000110" // /* MW 2 */
+ 10765 "00000111" // /* MW 1 */
+ 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10767 "00000000" // /* MW 1 */
+ 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10769 "00000000" // /* MW 1 */
+ 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10771 "00000000" // /* MW 1 */
+ 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10773 "00000000" // /* MW 1 */
+ 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10775 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10777 "00000010" // /* MW 3 */
+ 10778 "01100001" // /* MW 2 */
+ 10779 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10781 "00010001" // /* MW 3 */
+ 10782 "00000110" // /* MW 2 */
+ 10783 "00001111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10785 "11111101" // /* MW 3 */
+ 10786 "11100000" // /* MW 2 */
+ 10787 "00010111" // /* MW 1 */
+ 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10789 "00000000" // /* MW 1 */
+ 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10791 "00000000" // /* MW 1 */
+ 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10793 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10795 "00001000" // /* MW 3 */
+ 10796 "10010011" // /* MW 2 */
+ 10797 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+ 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10799 "10000001" // /* MW 5 */
+ 10800 "10101101" // /* MW 4 */
+ 10801 "10100111" // /* MW 3 */
+ 10802 "00000000" // /* MW 2 */
+ 10803 "00000100" // /* MW 1 */
+ 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10805 "00000000" // /* MW 1 */
+ 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10807 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+ 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10809 "00110110" // /* MW 3 */
+ 10810 "00000110" // /* MW 2 */
+ 10811 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 1 "io_buffer_main.h" 324 51
+ 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10813 "10000001" // /* MW 5 */
+ 10814 "11011101" // /* MW 4 */
+ 10815 "11011100" // /* MW 3 */
+ 10816 "11001010" // /* MW 2 */
+ 10817 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 47 first
+ 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10819 "01110110" // /* MW 3 */
+ 10820 "00000110" // /* MW 2 */
+ 10821 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10823 "10011110" // /* MW 3 */
+ 10824 "01011100" // /* MW 2 */
+ 10825 "00000111" // /* MW 1 */
+ 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10827 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 224 2 first
+.no_stack_arguments
+ 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */
+ 10829 "00000001" // /* MW 5 */
+ 10830 "00000000" // /* MW 4 */
+ 10831 "00111000" // /* MW 3 */
+ 10832 "00010100" // /* MW 2 */
+ 10833 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10835 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2 first
+.delay_slot
+ 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10837 "00000111" // /* MW 3 */
+ 10838 "01100010" // /* MW 2 */
+ 10839 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 221 2
+.delay_slot
+ 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10841 "00110001" // /* MW 3 */
+ 10842 "00000110" // /* MW 2 */
+ 10843 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45 first
+.delay_slot
+ 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10845 "00001101" // /* MW 3 */
+ 10846 "11100001" // /* MW 2 */
+ 10847 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 222 45
+.delay_slot
+ 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 10849 "00000000" // /* MW 15 */
+ 10850 "00000000" // /* MW 14 */
+ 10851 "10101000" // /* MW 13 */
+ 10852 "10100000" // /* MW 12 */
+ 10853 "00110100" // /* MW 11 */
+ 10854 "00000000" // /* MW 10 */
+ 10855 "00000000" // /* MW 9 */
+ 10856 "00000000" // /* MW 8 */
+ 10857 "01011011" // /* MW 7 */
+ 10858 "00000001" // /* MW 6 */
+ 10859 "00100000" // /* MW 5 */
+ 10860 "00000000" // /* MW 4 */
+ 10861 "11110000" // /* MW 3 */
+ 10862 "00101100" // /* MW 2 */
+ 10863 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+.src_ref 7 "superkernels.cpp" 227 14
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.return_address
+ 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10865 "00010000" // /* MW 9 */
+ 10866 "00100000" // /* MW 8 */
+ 10867 "00110010" // /* MW 7 */
+ 10868 "11110011" // /* MW 6 */
+ 10869 "00000001" // /* MW 5 */
+ 10870 "00000000" // /* MW 4 */
+ 10871 "11010000" // /* MW 3 */
+ 10872 "11000110" // /* MW 2 */
+ 10873 "11001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10875 "00000101" // /* MW 3 */
+ 10876 "00100000" // /* MW 2 */
+ 10877 "00010000" // /* MW 1 */
+ 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10879 "00000000" // /* MW 1 */
+ 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10881 "00000000" // /* MW 1 */
+ 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10883 "00000000" // /* MW 1 */
+ 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10885 "00000000" // /* MW 1 */
+ 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10887 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10889 "00001000" // /* MW 3 */
+ 10890 "01010001" // /* MW 2 */
+ 10891 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10893 "00010000" // /* MW 9 */
+ 10894 "00110000" // /* MW 8 */
+ 10895 "00110010" // /* MW 7 */
+ 10896 "11110001" // /* MW 6 */
+ 10897 "00000001" // /* MW 5 */
+ 10898 "00000000" // /* MW 4 */
+ 10899 "11010000" // /* MW 3 */
+ 10900 "11001110" // /* MW 2 */
+ 10901 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6 first
+ 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10903 "00110110" // /* MW 3 */
+ 10904 "00000110" // /* MW 2 */
+ 10905 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 19
+ 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10907 "01010110" // /* MW 3 */
+ 10908 "00000110" // /* MW 2 */
+ 10909 "00000010" // /* MW 1 */
+ 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10911 "00000000" // /* MW 1 */
+ 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10913 "00000000" // /* MW 1 */
+ 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10915 "00000000" // /* MW 1 */
+ 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10917 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10919 "00110001" // /* MW 3 */
+ 10920 "00100001" // /* MW 2 */
+ 10921 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10923 "00010001" // /* MW 3 */
+ 10924 "11100110" // /* MW 2 */
+ 10925 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 16 first
+ 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10927 "00101000" // /* MW 3 */
+ 10928 "01100001" // /* MW 2 */
+ 10929 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 226 6
+ 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */
+ 10931 "00000001" // /* MW 5 */
+ 10932 "01000000" // /* MW 4 */
+ 10933 "01101000" // /* MW 3 */
+ 10934 "00010101" // /* MW 2 */
+ 10935 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10937 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10939 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10941 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10943 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10945 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14
+ 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10947 "00000001" // /* MW 3 */
+ 10948 "00100000" // /* MW 2 */
+ 10949 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 227 14 first
+ 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 10951 "00000000" // /* MW 9 */
+ 10952 "00000000" // /* MW 8 */
+ 10953 "00000000" // /* MW 7 */
+ 10954 "10000000" // /* MW 6 */
+ 10955 "00010001" // /* MW 5 */
+ 10956 "00000110" // /* MW 4 */
+ 10957 "11110110" // /* MW 3 */
+ 10958 "00101100" // /* MW 2 */
+ 10959 "00000000" // /* MW 1 */
+.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 229
+ 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10961 "00111001" // /* MW 3 */
+ 10962 "11110100" // /* MW 2 */
+ 10963 "00000111" // /* MW 1 */
+ 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10965 "00011001" // /* MW 3 */
+ 10966 "11111011" // /* MW 2 */
+ 10967 "00000111" // /* MW 1 */
+ 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10969 "00000000" // /* MW 1 */
+ 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10971 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10973 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10975 "11110001" // /* MW 3 */
+ 10976 "11111101" // /* MW 2 */
+ 10977 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10979 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 10981 "00000000" // /* MW 3 */
+ 10982 "00101000" // /* MW 2 */
+ 10983 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 10985 "10100000" // /* MW 3 */
+ 10986 "01100111" // /* MW 2 */
+ 10987 "00011111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 229
+.delay_slot
+ 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 10989 "00000001" // /* MW 5 */
+ 10990 "00000000" // /* MW 4 */
+ 10991 "00000000" // /* MW 3 */
+ 10992 "11111000" // /* MW 2 */
+ 10993 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10995 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 10997 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 10999 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 205 first
+.src_ref 3 "elementwise_binary_shared.h" 211 24 first
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.function_start
+ 11008 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11009 "01011000" // /* MW 9 */
+ 11010 "00000000" // /* MW 8 */
+ 11011 "00001000" // /* MW 7 */
+ 11012 "00001011" // /* MW 6 */
+ 11013 "00100000" // /* MW 5 */
+ 11014 "00001000" // /* MW 4 */
+ 11015 "11010000" // /* MW 3 */
+ 11016 "10000101" // /* MW 2 */
+ 11017 "00100011" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+ 11018 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11019 "00000001" // /* MW 3 */
+ 11020 "10000000" // /* MW 2 */
+ 11021 "00010111" // /* MW 1 */
+ 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11023 "00000000" // /* MW 1 */
+ 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11025 "00000000" // /* MW 1 */
+ 11026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11027 "00000000" // /* MW 1 */
+ 11028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11029 "00000000" // /* MW 1 */
+ 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11031 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 211 22 first
+ 11032 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11033 "00101001" // /* MW 3 */
+ 11034 "00011100" // /* MW 2 */
+ 11035 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 24 first
+ 11036 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11037 "00101110" // /* MW 3 */
+ 11038 "00011100" // /* MW 2 */
+ 11039 "00000001" // /* MW 1 */
+ 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11041 "00000000" // /* MW 1 */
+ 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11043 "00000000" // /* MW 1 */
+ 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11045 "00000000" // /* MW 1 */
+ 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11047 "00000000" // /* MW 1 */
+ 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11049 "00000000" // /* MW 1 */
+ 11050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11051 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 212 22
+ 11052 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11053 "00101001" // /* MW 3 */
+ 11054 "00011100" // /* MW 2 */
+ 11055 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 24 first
+ 11056 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11057 "00101110" // /* MW 3 */
+ 11058 "00000100" // /* MW 2 */
+ 11059 "00000001" // /* MW 1 */
+ 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11061 "00000000" // /* MW 1 */
+ 11062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11063 "00000000" // /* MW 1 */
+ 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11065 "00000000" // /* MW 1 */
+ 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11067 "00000000" // /* MW 1 */
+ 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11069 "00000000" // /* MW 1 */
+ 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11071 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 213 22
+ 11072 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11073 "00101001" // /* MW 3 */
+ 11074 "00011100" // /* MW 2 */
+ 11075 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 24 first
+ 11076 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11077 "01110110" // /* MW 3 */
+ 11078 "00010100" // /* MW 2 */
+ 11079 "00000001" // /* MW 1 */
+ 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11081 "00000000" // /* MW 1 */
+ 11082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11083 "00000000" // /* MW 1 */
+ 11084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11085 "00000000" // /* MW 1 */
+ 11086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11087 "00000000" // /* MW 1 */
+ 11088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11089 "00000000" // /* MW 1 */
+ 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11091 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 214 22
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11092 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11093 "01110001" // /* MW 3 */
+ 11094 "01001100" // /* MW 2 */
+ 11095 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 34 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11096 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11097 "00010111" // /* MW 3 */
+ 11098 "00000100" // /* MW 2 */
+ 11099 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 217 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11100 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11101 "00000000" // /* MW 3 */
+ 11102 "00101000" // /* MW 2 */
+ 11103 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11104 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11105 "00000000" // /* MW 5 */
+ 11106 "10111110" // /* MW 4 */
+ 11107 "11110000" // /* MW 3 */
+ 11108 "00000000" // /* MW 2 */
+ 11109 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11110 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11111 "00010100" // /* MW 3 */
+ 11112 "11000010" // /* MW 2 */
+ 11113 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 48
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11114 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11115 "00100111" // /* MW 3 */
+ 11116 "01110110" // /* MW 2 */
+ 11117 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 216 36
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11118 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11119 "10000010" // /* MW 3 */
+ 11120 "00000001" // /* MW 2 */
+ 11121 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 11123 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv
+.src_ref 3 "elementwise_binary_shared.h" 219
+.src_ref 3 "elementwise_binary_shared.h" 219 first
+.function_start
+ 11136 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11137 "00000001" // /* MW 5 */
+ 11138 "00000000" // /* MW 4 */
+ 11139 "00000000" // /* MW 3 */
+ 11140 "00001000" // /* MW 2 */
+ 11141 "00000000" // /* MW 1 */
+ 11142 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11143 "00111101" // /* MW 3 */
+ 11144 "11111000" // /* MW 2 */
+ 11145 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8 first
+.no_stack_arguments
+ 11146 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */
+ 11147 "00000001" // /* MW 5 */
+ 11148 "00000000" // /* MW 4 */
+ 11149 "10000000" // /* MW 3 */
+ 11150 "00010101" // /* MW 2 */
+ 11151 "00000000" // /* MW 1 */
+.delay_slot
+ 11152 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11153 "10100000" // /* MW 3 */
+ 11154 "00010111" // /* MW 2 */
+ 11155 "00011000" // /* MW 1 */
+.delay_slot
+ 11156 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11157 "00010101" // /* MW 3 */
+ 11158 "11111100" // /* MW 2 */
+ 11159 "00001111" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8
+.delay_slot
+ 11160 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11161 "11000000" // /* MW 3 */
+ 11162 "11010000" // /* MW 2 */
+ 11163 "00011011" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11165 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11167 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 220 8
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.src_ref 8 "add_impl.h" 146 29
+.return_address
+ 11168 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11169 "00001000" // /* MW 9 */
+ 11170 "11000100" // /* MW 8 */
+ 11171 "00110011" // /* MW 7 */
+ 11172 "01101000" // /* MW 6 */
+ 11173 "00000000" // /* MW 5 */
+ 11174 "00000001" // /* MW 4 */
+ 11175 "00100000" // /* MW 3 */
+ 11176 "00000111" // /* MW 2 */
+ 11177 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29
+.src_ref 8 "add_impl.h" 147 37
+.src_ref 8 "add_impl.h" 147 39
+ 11178 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11179 "01011000" // /* MW 9 */
+ 11180 "11111101" // /* MW 8 */
+ 11181 "00000111" // /* MW 7 */
+ 11182 "00001000" // /* MW 6 */
+ 11183 "10000000" // /* MW 5 */
+ 11184 "00000001" // /* MW 4 */
+ 11185 "10000000" // /* MW 3 */
+ 11186 "11100010" // /* MW 2 */
+ 11187 "00000001" // /* MW 1 */
+.src_ref 8 "add_impl.h" 146 29 first
+.src_ref 8 "add_impl.h" 147 39
+ 11188 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11189 "00000001" // /* MW 9 */
+ 11190 "10100000" // /* MW 8 */
+ 11191 "00000111" // /* MW 7 */
+ 11192 "10000000" // /* MW 6 */
+ 11193 "00010001" // /* MW 5 */
+ 11194 "00001010" // /* MW 4 */
+ 11195 "00100000" // /* MW 3 */
+ 11196 "10111110" // /* MW 2 */
+ 11197 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 50 first
+ 11198 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11199 "01001010" // /* MW 3 */
+ 11200 "00000110" // /* MW 2 */
+ 11201 "00000000" // /* MW 1 */
+ 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11203 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11205 "00000000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 37
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11206 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11207 "00010111" // /* MW 3 */
+ 11208 "00000010" // /* MW 2 */
+ 11209 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11210 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11211 "00000000" // /* MW 3 */
+ 11212 "00101000" // /* MW 2 */
+ 11213 "00010000" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11214 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11215 "00000101" // /* MW 3 */
+ 11216 "00100010" // /* MW 2 */
+ 11217 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary_shared.h" 222 4
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11218 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11219 "00000001" // /* MW 5 */
+ 11220 "00000000" // /* MW 4 */
+ 11221 "00000000" // /* MW 3 */
+ 11222 "11111000" // /* MW 2 */
+ 11223 "11111111" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 54 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11224 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11225 "00100111" // /* MW 3 */
+ 11226 "01110111" // /* MW 2 */
+ 11227 "00010100" // /* MW 1 */
+.src_ref 8 "add_impl.h" 147 39
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11228 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11229 "10000010" // /* MW 3 */
+ 11230 "00100001" // /* MW 2 */
+ 11231 "00010100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0
+ 11233 "00000000" // /* MW 1 */
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E
+.src_ref 3 "elementwise_binary_shared.h" 227 first
+.src_ref 3 "elementwise_binary_shared.h" 232 8 first
+.tail_call
+.function_start
+ 11248 "10000100" // J #9808 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */
+ 11249 "00000000" // /* MW 5 */
+ 11250 "00000000" // /* MW 4 */
+ 11251 "00101000" // /* MW 3 */
+ 11252 "00010011" // /* MW 2 */
+ 11253 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11255 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11257 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11259 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11261 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end
+.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0
+ 11263 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 141 first
+.src_ref 3 "elementwise_binary.h" 142 23
+.src_ref 3 "elementwise_binary.h" 144 4 first
+.function_start
+ 11264 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11265 "00000001" // /* MW 5 */
+ 11266 "00100001" // /* MW 4 */
+ 11267 "00000000" // /* MW 3 */
+ 11268 "00000000" // /* MW 2 */
+ 11269 "00000101" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11270 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11271 "11000000" // /* MW 3 */
+ 11272 "01010000" // /* MW 2 */
+ 11273 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 141
+.delay_slot
+ 11274 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11275 "10010000" // /* MW 3 */
+ 11276 "01100000" // /* MW 2 */
+ 11277 "00011000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23 first
+.delay_slot
+ 11278 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11279 "00010001" // /* MW 3 */
+ 11280 "00000100" // /* MW 2 */
+ 11281 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 142 23
+.delay_slot
+ 11282 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11283 "00010001" // /* MW 3 */
+ 11284 "00010100" // /* MW 2 */
+ 11285 "00001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0
+ 11287 "00000000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv
+.src_ref 3 "elementwise_binary.h" 130 first
+.src_ref 3 "elementwise_binary.h" 133 24 first
+.function_start
+ 11296 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11297 "00101110" // /* MW 3 */
+ 11298 "00011100" // /* MW 2 */
+ 11299 "00000001" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 130
+ 11300 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11301 "00000001" // /* MW 5 */
+ 11302 "00000000" // /* MW 4 */
+ 11303 "00000000" // /* MW 3 */
+ 11304 "00001000" // /* MW 2 */
+ 11305 "00000000" // /* MW 1 */
+ 11306 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11307 "00111101" // /* MW 3 */
+ 11308 "11111100" // /* MW 2 */
+ 11309 "00001111" // /* MW 1 */
+ 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11311 "00000000" // /* MW 1 */
+ 11312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11313 "00000000" // /* MW 1 */
+ 11314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11315 "00000000" // /* MW 1 */
+ 11316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11317 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 133 22 first
+ 11318 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11319 "00101001" // /* MW 3 */
+ 11320 "00011100" // /* MW 2 */
+ 11321 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 24 first
+ 11322 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11323 "00101110" // /* MW 3 */
+ 11324 "00011100" // /* MW 2 */
+ 11325 "00000001" // /* MW 1 */
+ 11326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11327 "00000000" // /* MW 1 */
+ 11328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11329 "00000000" // /* MW 1 */
+ 11330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11331 "00000000" // /* MW 1 */
+ 11332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11333 "00000000" // /* MW 1 */
+ 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11335 "00000000" // /* MW 1 */
+ 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11337 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 134 22
+ 11338 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11339 "00101001" // /* MW 3 */
+ 11340 "00011100" // /* MW 2 */
+ 11341 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 24 first
+ 11342 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11343 "00101110" // /* MW 3 */
+ 11344 "00000100" // /* MW 2 */
+ 11345 "00000001" // /* MW 1 */
+ 11346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11347 "00000000" // /* MW 1 */
+ 11348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11349 "00000000" // /* MW 1 */
+ 11350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11351 "00000000" // /* MW 1 */
+ 11352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11353 "00000000" // /* MW 1 */
+ 11354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11355 "00000000" // /* MW 1 */
+ 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11357 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 135 22
+ 11358 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11359 "00101001" // /* MW 3 */
+ 11360 "00011100" // /* MW 2 */
+ 11361 "00001000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 24 first
+ 11362 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11363 "00101110" // /* MW 3 */
+ 11364 "00010100" // /* MW 2 */
+ 11365 "00000001" // /* MW 1 */
+ 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11367 "00000000" // /* MW 1 */
+ 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11369 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 137 8 first
+.no_stack_arguments
+ 11370 "00000100" // JL #11264 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11264 delay_slots=5 */
+ 11371 "00000001" // /* MW 5 */
+ 11372 "00000000" // /* MW 4 */
+ 11373 "00000000" // /* MW 3 */
+ 11374 "00010110" // /* MW 2 */
+ 11375 "00000000" // /* MW 1 */
+.delay_slot
+ 11376 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11377 "10011101" // /* MW 3 */
+ 11378 "11111011" // /* MW 2 */
+ 11379 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11381 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11383 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 136 22 first
+.delay_slot
+ 11384 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11385 "00101001" // /* MW 3 */
+ 11386 "11011100" // /* MW 2 */
+ 11387 "00001000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+ 11388 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11389 "11000000" // /* MW 3 */
+ 11390 "01100000" // /* MW 2 */
+ 11391 "00011111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.return_address
+ 11392 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11393 "00111001" // /* MW 3 */
+ 11394 "11111100" // /* MW 2 */
+ 11395 "00000111" // /* MW 1 */
+ 11396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11397 "00000000" // /* MW 1 */
+ 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11399 "00000000" // /* MW 1 */
+ 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11401 "00000000" // /* MW 1 */
+ 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11403 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11405 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11406 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11407 "10011001" // /* MW 3 */
+ 11408 "11111011" // /* MW 2 */
+ 11409 "00000111" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11410 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11411 "00000000" // /* MW 3 */
+ 11412 "00101000" // /* MW 2 */
+ 11413 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11415 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11417 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11419 "00000000" // /* MW 1 */
+.src_ref 8 "mul_impl.h" 134 25
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11420 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11421 "00000001" // /* MW 3 */
+ 11422 "00100000" // /* MW 2 */
+ 11423 "00010000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 139 4
+.src_ref 8 "mul_impl.h" 134 25 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11424 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11425 "01110001" // /* MW 9 */
+ 11426 "00000000" // /* MW 8 */
+ 11427 "00000000" // /* MW 7 */
+ 11428 "00000000" // /* MW 6 */
+ 11429 "11111110" // /* MW 5 */
+ 11430 "00111111" // /* MW 4 */
+ 11431 "00110000" // /* MW 3 */
+ 11432 "11000010" // /* MW 2 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0
+ 11433 "11101000" // /* MW 1 */
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E
+.src_ref 3 "elementwise_binary.h" 149 first
+.src_ref 3 "elementwise_binary.h" 156 37
+.src_ref 3 "elementwise_binary.h" 168 8 first
+.function_start
+ 11440 "10111010" // MOVA m0, #32; MOVXM ls, #11616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11441 "00010000" // /* MW 9 */
+ 11442 "10110000" // /* MW 8 */
+ 11443 "01111110" // /* MW 7 */
+ 11444 "00001000" // /* MW 6 */
+ 11445 "00000000" // /* MW 5 */
+ 11446 "00000000" // /* MW 4 */
+ 11447 "10000000" // /* MW 3 */
+ 11448 "00000000" // /* MW 2 */
+ 11449 "00000100" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 37 first
+.src_ref 3 "elementwise_binary.h" 168 8 first
+ 11450 "10111010" // LDA r3, [p3], m0; MOVXM le, #11632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11451 "00010000" // /* MW 9 */
+ 11452 "10111000" // /* MW 8 */
+ 11453 "10111110" // /* MW 7 */
+ 11454 "00001001" // /* MW 6 */
+ 11455 "00000000" // /* MW 5 */
+ 11456 "00000000" // /* MW 4 */
+ 11457 "11010000" // /* MW 3 */
+ 11458 "00001110" // /* MW 2 */
+ 11459 "01100001" // /* MW 1 */
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11460 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11461 "01011000" // /* MW 9 */
+ 11462 "00111100" // /* MW 8 */
+ 11463 "00001011" // /* MW 7 */
+ 11464 "01001000" // /* MW 6 */
+ 11465 "00010111" // /* MW 5 */
+ 11466 "00111110" // /* MW 4 */
+ 11467 "11010000" // /* MW 3 */
+ 11468 "10010000" // /* MW 2 */
+ 11469 "01100000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11470 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11471 "00010000" // /* MW 9 */
+ 11472 "00110100" // /* MW 8 */
+ 11473 "00110010" // /* MW 7 */
+ 11474 "11110010" // /* MW 6 */
+ 11475 "00000001" // /* MW 5 */
+ 11476 "00000000" // /* MW 4 */
+ 11477 "11010000" // /* MW 3 */
+ 11478 "10000000" // /* MW 2 */
+ 11479 "01100010" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11480 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11481 "01000010" // /* MW 3 */
+ 11482 "00000100" // /* MW 2 */
+ 11483 "00000100" // /* MW 1 */
+ 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11485 "00000000" // /* MW 1 */
+ 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11487 "00000000" // /* MW 1 */
+ 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11489 "00000000" // /* MW 1 */
+.src_ref 3 "elementwise_binary.h" 156 78
+ 11490 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11491 "00011101" // /* MW 3 */
+ 11492 "11000010" // /* MW 2 */
+ 11493 "00010000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 168 8
+.src_ref 3 "elementwise_binary.h" 187 20 first
+ 11494 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11495 "11111001" // /* MW 5 */
+ 11496 "11100001" // /* MW 4 */
+ 11497 "10001010" // /* MW 3 */
+ 11498 "00001110" // /* MW 2 */
+ 11499 "00000101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11500 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11501 "01101000" // /* MW 5 */
+ 11502 "01010000" // /* MW 4 */
+ 11503 "01110000" // /* MW 3 */
+ 11504 "00010011" // /* MW 2 */
+ 11505 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 195 20
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11506 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11507 "10000000" // /* MW 7 */
+ 11508 "10111010" // /* MW 6 */
+ 11509 "11101000" // /* MW 5 */
+ 11510 "01010000" // /* MW 4 */
+ 11511 "01110000" // /* MW 3 */
+ 11512 "00011011" // /* MW 2 */
+ 11513 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11514 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11515 "01101000" // /* MW 5 */
+ 11516 "01010000" // /* MW 4 */
+ 11517 "01110000" // /* MW 3 */
+ 11518 "00010011" // /* MW 2 */
+ 11519 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11520 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11521 "11101000" // /* MW 5 */
+ 11522 "01010000" // /* MW 4 */
+ 11523 "01110000" // /* MW 3 */
+ 11524 "00011011" // /* MW 2 */
+ 11525 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11526 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11527 "10011011" // /* MW 3 */
+ 11528 "00001000" // /* MW 2 */
+ 11529 "00000001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11530 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11531 "01101000" // /* MW 5 */
+ 11532 "01010000" // /* MW 4 */
+ 11533 "01110000" // /* MW 3 */
+ 11534 "00011011" // /* MW 2 */
+ 11535 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11536 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11537 "11101000" // /* MW 5 */
+ 11538 "01010000" // /* MW 4 */
+ 11539 "01110000" // /* MW 3 */
+ 11540 "00010011" // /* MW 2 */
+ 11541 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11542 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11543 "01000001" // /* MW 9 */
+ 11544 "11100010" // /* MW 8 */
+ 11545 "00000000" // /* MW 7 */
+ 11546 "00011101" // /* MW 6 */
+ 11547 "00110100" // /* MW 5 */
+ 11548 "00101000" // /* MW 4 */
+ 11549 "01110000" // /* MW 3 */
+ 11550 "00011011" // /* MW 2 */
+ 11551 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11552 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11553 "01100001" // /* MW 9 */
+ 11554 "11100000" // /* MW 8 */
+ 11555 "00000001" // /* MW 7 */
+ 11556 "00011101" // /* MW 6 */
+ 11557 "01110100" // /* MW 5 */
+ 11558 "00101000" // /* MW 4 */
+ 11559 "01110000" // /* MW 3 */
+ 11560 "00010011" // /* MW 2 */
+ 11561 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11562 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11563 "01000001" // /* MW 9 */
+ 11564 "11100010" // /* MW 8 */
+ 11565 "00000000" // /* MW 7 */
+ 11566 "00011101" // /* MW 6 */
+ 11567 "00110100" // /* MW 5 */
+ 11568 "00101000" // /* MW 4 */
+ 11569 "01110000" // /* MW 3 */
+ 11570 "00011011" // /* MW 2 */
+ 11571 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11572 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11573 "01100001" // /* MW 9 */
+ 11574 "11100000" // /* MW 8 */
+ 11575 "00000001" // /* MW 7 */
+ 11576 "00011101" // /* MW 6 */
+ 11577 "01110100" // /* MW 5 */
+ 11578 "00101000" // /* MW 4 */
+ 11579 "01110000" // /* MW 3 */
+ 11580 "00010011" // /* MW 2 */
+ 11581 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11582 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11583 "01000001" // /* MW 9 */
+ 11584 "11100010" // /* MW 8 */
+ 11585 "00000000" // /* MW 7 */
+ 11586 "00011101" // /* MW 6 */
+ 11587 "00110100" // /* MW 5 */
+ 11588 "00101000" // /* MW 4 */
+ 11589 "01110000" // /* MW 3 */
+ 11590 "00011011" // /* MW 2 */
+ 11591 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11592 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11593 "01100001" // /* MW 9 */
+ 11594 "11100000" // /* MW 8 */
+ 11595 "00000001" // /* MW 7 */
+ 11596 "00011101" // /* MW 6 */
+ 11597 "01110100" // /* MW 5 */
+ 11598 "00101000" // /* MW 4 */
+ 11599 "01110000" // /* MW 3 */
+ 11600 "00010011" // /* MW 2 */
+ 11601 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11602 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 11603 "01000001" // /* MW 13 */
+ 11604 "11100010" // /* MW 12 */
+ 11605 "00000000" // /* MW 11 */
+ 11606 "10001100" // /* MW 10 */
+ 11607 "01110000" // /* MW 9 */
+ 11608 "00001000" // /* MW 8 */
+ 11609 "00000000" // /* MW 7 */
+ 11610 "00000000" // /* MW 6 */
+ 11611 "01101000" // /* MW 5 */
+ 11612 "01010000" // /* MW 4 */
+ 11613 "01110000" // /* MW 3 */
+ 11614 "00011011" // /* MW 2 */
+ 11615 "00100001" // /* MW 1 */
+.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 187 20 first
+.src_ref 3 "elementwise_binary.h" 189 20 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 11616 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11617 "00000011" // /* MW 15 */
+ 11618 "00001111" // /* MW 14 */
+ 11619 "01111000" // /* MW 13 */
+ 11620 "10100101" // /* MW 12 */
+ 11621 "00000001" // /* MW 11 */
+ 11622 "00000000" // /* MW 10 */
+ 11623 "00000000" // /* MW 9 */
+ 11624 "00000000" // /* MW 8 */
+ 11625 "10100011" // /* MW 7 */
+ 11626 "00011100" // /* MW 6 */
+ 11627 "11101010" // /* MW 5 */
+ 11628 "01010000" // /* MW 4 */
+ 11629 "01110000" // /* MW 3 */
+ 11630 "00010011" // /* MW 2 */
+ 11631 "00100001" // /* MW 1 */
+.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 211 20 first
+.src_ref 3 "elementwise_binary.h" 213 20 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.end_of_loop
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11632 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 11633 "00010010" // /* MW 15 */
+ 11634 "00000111" // /* MW 14 */
+ 11635 "01111000" // /* MW 13 */
+ 11636 "10100101" // /* MW 12 */
+ 11637 "00000001" // /* MW 11 */
+ 11638 "00000000" // /* MW 10 */
+ 11639 "00000000" // /* MW 9 */
+ 11640 "00000000" // /* MW 8 */
+ 11641 "00100011" // /* MW 7 */
+ 11642 "00011100" // /* MW 6 */
+ 11643 "01101010" // /* MW 5 */
+ 11644 "01010000" // /* MW 4 */
+ 11645 "01110000" // /* MW 3 */
+ 11646 "00011011" // /* MW 2 */
+ 11647 "00100001" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 11648 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11649 "01100001" // /* MW 7 */
+ 11650 "11100000" // /* MW 6 */
+ 11651 "00000001" // /* MW 5 */
+ 11652 "00000010" // /* MW 4 */
+ 11653 "01100000" // /* MW 3 */
+ 11654 "10010100" // /* MW 2 */
+ 11655 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11656 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11657 "01000001" // /* MW 7 */
+ 11658 "11100010" // /* MW 6 */
+ 11659 "00000000" // /* MW 5 */
+ 11660 "00000010" // /* MW 4 */
+ 11661 "01100000" // /* MW 3 */
+ 11662 "10000100" // /* MW 2 */
+ 11663 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11664 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11665 "01100001" // /* MW 7 */
+ 11666 "11100000" // /* MW 6 */
+ 11667 "00000001" // /* MW 5 */
+ 11668 "00000010" // /* MW 4 */
+ 11669 "01100000" // /* MW 3 */
+ 11670 "10010100" // /* MW 2 */
+ 11671 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11672 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11673 "01000001" // /* MW 7 */
+ 11674 "11100010" // /* MW 6 */
+ 11675 "00000000" // /* MW 5 */
+ 11676 "00000010" // /* MW 4 */
+ 11677 "01100000" // /* MW 3 */
+ 11678 "10000100" // /* MW 2 */
+ 11679 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11680 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11681 "01100001" // /* MW 7 */
+ 11682 "11100000" // /* MW 6 */
+ 11683 "00000001" // /* MW 5 */
+ 11684 "00000010" // /* MW 4 */
+ 11685 "01100000" // /* MW 3 */
+ 11686 "10010100" // /* MW 2 */
+ 11687 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 5 "mul_acc32_fp.hpp" 36 105 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11688 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11689 "01000001" // /* MW 7 */
+ 11690 "11100010" // /* MW 6 */
+ 11691 "00000000" // /* MW 5 */
+ 11692 "00000010" // /* MW 4 */
+ 11693 "01100000" // /* MW 3 */
+ 11694 "10000100" // /* MW 2 */
+ 11695 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "mul_acc32_fp.hpp" 36 105
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11696 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11697 "01100001" // /* MW 7 */
+ 11698 "11100000" // /* MW 6 */
+ 11699 "00000001" // /* MW 5 */
+ 11700 "00000010" // /* MW 4 */
+ 11701 "01100000" // /* MW 3 */
+ 11702 "10010100" // /* MW 2 */
+ 11703 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11704 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11705 "00100011" // /* MW 3 */
+ 11706 "00011100" // /* MW 2 */
+ 11707 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 172 4 first
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11708 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 11709 "00000000" // /* MW 5 */
+ 11710 "01010000" // /* MW 4 */
+ 11711 "01100000" // /* MW 3 */
+ 11712 "10010100" // /* MW 2 */
+ 11713 "01000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11714 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11715 "00100011" // /* MW 3 */
+ 11716 "00011100" // /* MW 2 */
+ 11717 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11718 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11719 "10100011" // /* MW 3 */
+ 11720 "00011100" // /* MW 2 */
+ 11721 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 5 "accum.hpp" 1110 102 first
+.src_ref 3 "elementwise_binary.h" 218 20 first
+.delay_slot
+ 11722 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11723 "00100011" // /* MW 3 */
+ 11724 "00011100" // /* MW 2 */
+ 11725 "00001010" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 3 "elementwise_binary.h" 195 20 first
+.delay_slot
+ 11726 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11727 "10100011" // /* MW 3 */
+ 11728 "00011100" // /* MW 2 */
+ 11729 "00001010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end
+.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0
+ 11731 "00000000" // /* MW 1 */
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE
+.src_ref 7 "superkernels.cpp" 369 first
+.src_ref 7 "superkernels.cpp" 374 6
+.function_start
+ 11744 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11745 "10000000" // /* MW 5 */
+ 11746 "11001000" // /* MW 4 */
+ 11747 "11001000" // /* MW 3 */
+ 11748 "00000111" // /* MW 2 */
+ 11749 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+ 11750 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11751 "11000001" // /* MW 5 */
+ 11752 "10110101" // /* MW 4 */
+ 11753 "11011000" // /* MW 3 */
+ 11754 "11000010" // /* MW 2 */
+ 11755 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 369
+ 11756 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11757 "00000001" // /* MW 5 */
+ 11758 "00000000" // /* MW 4 */
+ 11759 "00000000" // /* MW 3 */
+ 11760 "00001000" // /* MW 2 */
+ 11761 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 22 first
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11762 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11763 "01111001" // /* MW 9 */
+ 11764 "01100000" // /* MW 8 */
+ 11765 "11001010" // /* MW 7 */
+ 11766 "10000001" // /* MW 6 */
+ 11767 "00010100" // /* MW 5 */
+ 11768 "00100011" // /* MW 4 */
+ 11769 "10110000" // /* MW 3 */
+ 11770 "00111010" // /* MW 2 */
+ 11771 "11111111" // /* MW 1 */
+ 11772 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11773 "01110000" // /* MW 7 */
+ 11774 "11010000" // /* MW 6 */
+ 11775 "00001011" // /* MW 5 */
+ 11776 "00000000" // /* MW 4 */
+ 11777 "10110000" // /* MW 3 */
+ 11778 "10000011" // /* MW 2 */
+ 11779 "11111101" // /* MW 1 */
+ 11780 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11781 "00010101" // /* MW 3 */
+ 11782 "11111100" // /* MW 2 */
+ 11783 "00001111" // /* MW 1 */
+ 11784 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11785 "00111101" // /* MW 3 */
+ 11786 "11110000" // /* MW 2 */
+ 11787 "00001111" // /* MW 1 */
+ 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11789 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 374 6 first
+.src_ref 7 "superkernels.cpp" 374 16 first
+ 11790 "10000100" // JNZ r16, #11936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11936 delay_slots=5 */
+ 11791 "00000001" // /* MW 5 */
+ 11792 "01000000" // /* MW 4 */
+ 11793 "01010000" // /* MW 3 */
+ 11794 "00010111" // /* MW 2 */
+ 11795 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 30 first
+.delay_slot
+ 11796 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11797 "11111011" // /* MW 3 */
+ 11798 "01100011" // /* MW 2 */
+ 11799 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11800 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11801 "10100000" // /* MW 5 */
+ 11802 "11001000" // /* MW 4 */
+ 11803 "11000100" // /* MW 3 */
+ 11804 "00000111" // /* MW 2 */
+ 11805 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 371 11
+.delay_slot
+ 11806 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 11807 "01110000" // /* MW 7 */
+ 11808 "01100000" // /* MW 6 */
+ 11809 "00110111" // /* MW 5 */
+ 11810 "00000001" // /* MW 4 */
+ 11811 "00110000" // /* MW 3 */
+ 11812 "11000110" // /* MW 2 */
+ 11813 "01000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 11814 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11815 "11000000" // /* MW 3 */
+ 11816 "11010110" // /* MW 2 */
+ 11817 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 7 "superkernels.cpp" 379 28
+.src_ref 7 "superkernels.cpp" 381 42
+.src_ref 7 "superkernels.cpp" 393 2
+.delay_slot
+ 11818 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11819 "00010001" // /* MW 9 */
+ 11820 "11000000" // /* MW 8 */
+ 11821 "10110010" // /* MW 7 */
+ 11822 "11110011" // /* MW 6 */
+ 11823 "00000001" // /* MW 5 */
+ 11824 "00000000" // /* MW 4 */
+ 11825 "10110000" // /* MW 3 */
+ 11826 "10100011" // /* MW 2 */
+ 11827 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 11828 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11829 "00010001" // /* MW 9 */
+ 11830 "00110100" // /* MW 8 */
+ 11831 "00110010" // /* MW 7 */
+ 11832 "11110001" // /* MW 6 */
+ 11833 "00000001" // /* MW 5 */
+ 11834 "00000000" // /* MW 4 */
+ 11835 "01100000" // /* MW 3 */
+ 11836 "10010001" // /* MW 2 */
+ 11837 "00010011" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 11838 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11839 "00010000" // /* MW 9 */
+ 11840 "00110010" // /* MW 8 */
+ 11841 "00110010" // /* MW 7 */
+ 11842 "11110001" // /* MW 6 */
+ 11843 "00000001" // /* MW 5 */
+ 11844 "00000000" // /* MW 4 */
+ 11845 "11100000" // /* MW 3 */
+ 11846 "11000000" // /* MW 2 */
+ 11847 "01000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11849 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 377 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 11850 "00000100" // JL #11296 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */
+ 11851 "00000001" // /* MW 5 */
+ 11852 "00000000" // /* MW 4 */
+ 11853 "00010000" // /* MW 3 */
+ 11854 "00010110" // /* MW 2 */
+ 11855 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11857 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11859 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 11860 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11861 "00110001" // /* MW 3 */
+ 11862 "00100000" // /* MW 2 */
+ 11863 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 11864 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11865 "00000101" // /* MW 3 */
+ 11866 "00100000" // /* MW 2 */
+ 11867 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 11868 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11869 "00010001" // /* MW 3 */
+ 11870 "00000110" // /* MW 2 */
+ 11871 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 381 42 first
+.return_address
+ 11872 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11873 "00010000" // /* MW 9 */
+ 11874 "00101000" // /* MW 8 */
+ 11875 "10110010" // /* MW 7 */
+ 11876 "11110000" // /* MW 6 */
+ 11877 "00000001" // /* MW 5 */
+ 11878 "00000000" // /* MW 4 */
+ 11879 "11010000" // /* MW 3 */
+ 11880 "11000010" // /* MW 2 */
+ 11881 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16
+.src_ref 7 "superkernels.cpp" 381 18
+.src_ref 7 "superkernels.cpp" 390 48
+ 11882 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11883 "00010000" // /* MW 9 */
+ 11884 "00101010" // /* MW 8 */
+ 11885 "10110010" // /* MW 7 */
+ 11886 "11110001" // /* MW 6 */
+ 11887 "00000001" // /* MW 5 */
+ 11888 "00000000" // /* MW 4 */
+ 11889 "11010000" // /* MW 3 */
+ 11890 "11000110" // /* MW 2 */
+ 11891 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 28 first
+.src_ref 7 "superkernels.cpp" 382 16
+.src_ref 7 "superkernels.cpp" 391 48
+ 11892 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11893 "00010000" // /* MW 9 */
+ 11894 "00101110" // /* MW 8 */
+ 11895 "10110010" // /* MW 7 */
+ 11896 "11110000" // /* MW 6 */
+ 11897 "00000001" // /* MW 5 */
+ 11898 "00000000" // /* MW 4 */
+ 11899 "01010000" // /* MW 3 */
+ 11900 "11001011" // /* MW 2 */
+ 11901 "11101010" // /* MW 1 */
+ 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11903 "00000000" // /* MW 1 */
+ 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11905 "00000000" // /* MW 1 */
+ 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11907 "00000000" // /* MW 1 */
+ 11908 "10000100" // J #11952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11952 delay_slots=5 */
+ 11909 "00000000" // /* MW 5 */
+ 11910 "00000000" // /* MW 4 */
+ 11911 "01011000" // /* MW 3 */
+ 11912 "00010111" // /* MW 2 */
+ 11913 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13
+.delay_slot
+ 11914 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11915 "11000000" // /* MW 5 */
+ 11916 "11001000" // /* MW 4 */
+ 11917 "11000100" // /* MW 3 */
+ 11918 "00000111" // /* MW 2 */
+ 11919 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 27 first
+.delay_slot
+ 11920 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11921 "00001111" // /* MW 3 */
+ 11922 "01100001" // /* MW 2 */
+ 11923 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 379 13 first
+.delay_slot
+ 11924 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11925 "01010001" // /* MW 3 */
+ 11926 "00000110" // /* MW 2 */
+ 11927 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 381 16 first
+.delay_slot
+ 11928 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11929 "00010001" // /* MW 3 */
+ 11930 "00000110" // /* MW 2 */
+ 11931 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 382 16 first
+.delay_slot
+ 11932 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11933 "00010001" // /* MW 3 */
+ 11934 "00000110" // /* MW 2 */
+ 11935 "00001001" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192
+.src_ref 7 "superkernels.cpp" 390 48
+ 11936 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11937 "10101000" // /* MW 5 */
+ 11938 "11001000" // /* MW 4 */
+ 11939 "11000110" // /* MW 3 */
+ 11940 "00000111" // /* MW 2 */
+ 11941 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48
+ 11942 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11943 "00010000" // /* MW 9 */
+ 11944 "00101110" // /* MW 8 */
+ 11945 "10110010" // /* MW 7 */
+ 11946 "11110000" // /* MW 6 */
+ 11947 "00000001" // /* MW 5 */
+ 11948 "00000000" // /* MW 4 */
+ 11949 "11110000" // /* MW 3 */
+ 11950 "00101100" // /* MW 2 */
+ 11951 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 11952 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11953 "10000110" // /* MW 3 */
+ 11954 "01100111" // /* MW 2 */
+ 11955 "00011000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+.src_ref 1 "io_buffer_main.h" 218 49
+ 11956 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 11957 "00010000" // /* MW 9 */
+ 11958 "00100000" // /* MW 8 */
+ 11959 "00110010" // /* MW 7 */
+ 11960 "11110001" // /* MW 6 */
+ 11961 "00000001" // /* MW 5 */
+ 11962 "00000000" // /* MW 4 */
+ 11963 "11010000" // /* MW 3 */
+ 11964 "11101110" // /* MW 2 */
+ 11965 "00011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 11966 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11967 "00010110" // /* MW 3 */
+ 11968 "11111110" // /* MW 2 */
+ 11969 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 11970 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11971 "00110110" // /* MW 3 */
+ 11972 "11111110" // /* MW 2 */
+ 11973 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+ 11974 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11975 "01010110" // /* MW 3 */
+ 11976 "00000110" // /* MW 2 */
+ 11977 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 11978 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11979 "01110110" // /* MW 3 */
+ 11980 "01000110" // /* MW 2 */
+ 11981 "00000000" // /* MW 1 */
+ 11982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11983 "00000000" // /* MW 1 */
+ 11984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11985 "00000000" // /* MW 1 */
+ 11986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11987 "00000000" // /* MW 1 */
+ 11988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 11989 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 11990 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 11991 "00000010" // /* MW 3 */
+ 11992 "01100001" // /* MW 2 */
+ 11993 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2 first
+.src_ref 1 "io_buffer_main.h" 218 20
+ 11994 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 11995 "00001110" // /* MW 5 */
+ 11996 "01000000" // /* MW 4 */
+ 11997 "00111001" // /* MW 3 */
+ 11998 "11000010" // /* MW 2 */
+ 11999 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 385 2
+ 12000 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12001 "00010001" // /* MW 3 */
+ 12002 "00000110" // /* MW 2 */
+ 12003 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+.src_ref 1 "io_buffer_main.h" 395 8
+ 12004 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12005 "11111101" // /* MW 3 */
+ 12006 "11100000" // /* MW 2 */
+ 12007 "00010111" // /* MW 1 */
+ 12008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12009 "00000000" // /* MW 1 */
+ 12010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12011 "00000000" // /* MW 1 */
+ 12012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12013 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 12014 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12015 "00001000" // /* MW 3 */
+ 12016 "11010011" // /* MW 2 */
+ 12017 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 12018 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12019 "00000110" // /* MW 3 */
+ 12020 "01100111" // /* MW 2 */
+ 12021 "00011010" // /* MW 1 */
+ 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12023 "00000000" // /* MW 1 */
+ 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12025 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 12026 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12027 "01110110" // /* MW 3 */
+ 12028 "11111111" // /* MW 2 */
+ 12029 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 12030 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12031 "00110110" // /* MW 3 */
+ 12032 "11111110" // /* MW 2 */
+ 12033 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 12034 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12035 "01010110" // /* MW 3 */
+ 12036 "11111110" // /* MW 2 */
+ 12037 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 47 first
+ 12038 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12039 "01110110" // /* MW 3 */
+ 12040 "01010110" // /* MW 2 */
+ 12041 "00000010" // /* MW 1 */
+ 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12043 "00000000" // /* MW 1 */
+ 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12045 "00000000" // /* MW 1 */
+ 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12047 "00000000" // /* MW 1 */
+ 12048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12049 "00000000" // /* MW 1 */
+ 12050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12051 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 12052 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12053 "00010010" // /* MW 3 */
+ 12054 "10100011" // /* MW 2 */
+ 12055 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 12056 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12057 "00110001" // /* MW 3 */
+ 12058 "00000110" // /* MW 2 */
+ 12059 "00001010" // /* MW 1 */
+ 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12061 "00000000" // /* MW 1 */
+ 12062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12063 "00000000" // /* MW 1 */
+ 12064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12065 "00000000" // /* MW 1 */
+ 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12067 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 12068 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12069 "00001000" // /* MW 3 */
+ 12070 "11010011" // /* MW 2 */
+ 12071 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46
+.src_ref 7 "superkernels.cpp" 391 46
+.src_ref 1 "io_buffer_main.h" 324 32
+ 12072 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12073 "01111001" // /* MW 9 */
+ 12074 "01100000" // /* MW 8 */
+ 12075 "11001110" // /* MW 7 */
+ 12076 "00101001" // /* MW 6 */
+ 12077 "00000000" // /* MW 5 */
+ 12078 "00000001" // /* MW 4 */
+ 12079 "01100000" // /* MW 3 */
+ 12080 "00010001" // /* MW 2 */
+ 12081 "11010001" // /* MW 1 */
+ 12082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12083 "00000000" // /* MW 1 */
+ 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12085 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+ 12086 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12087 "00011001" // /* MW 3 */
+ 12088 "11101110" // /* MW 2 */
+ 12089 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 48 first
+ 12090 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12091 "00111011" // /* MW 5 */
+ 12092 "11011000" // /* MW 4 */
+ 12093 "11011111" // /* MW 3 */
+ 12094 "11000110" // /* MW 2 */
+ 12095 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 48 first
+.src_ref 7 "superkernels.cpp" 393 2
+ 12096 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12097 "10000001" // /* MW 5 */
+ 12098 "11011101" // /* MW 4 */
+ 12099 "11010110" // /* MW 3 */
+ 12100 "11010010" // /* MW 2 */
+ 12101 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 12102 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12103 "01010110" // /* MW 3 */
+ 12104 "01001110" // /* MW 2 */
+ 12105 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 12106 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12107 "00011110" // /* MW 3 */
+ 12108 "01011101" // /* MW 2 */
+ 12109 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12110 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12111 "11000000" // /* MW 3 */
+ 12112 "01100000" // /* MW 2 */
+ 12113 "00011111" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12115 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12116 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12117 "01110110" // /* MW 3 */
+ 12118 "00000110" // /* MW 2 */
+ 12119 "00000100" // /* MW 1 */
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12121 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 393 2 first
+.aggressive_scheduled_block_id 2
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 12122 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */
+ 12123 "00000001" // /* MW 5 */
+ 12124 "00000000" // /* MW 4 */
+ 12125 "01011000" // /* MW 3 */
+ 12126 "00010110" // /* MW 2 */
+ 12127 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.delay_slot
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12128 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12129 "11000000" // /* MW 3 */
+ 12130 "11010100" // /* MW 2 */
+ 12131 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 12132 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12133 "00001101" // /* MW 3 */
+ 12134 "01100011" // /* MW 2 */
+ 12135 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46 first
+.delay_slot
+ 12136 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12137 "00001101" // /* MW 3 */
+ 12138 "00100001" // /* MW 2 */
+ 12139 "00010101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 391 46
+.delay_slot
+ 12140 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12141 "01000001" // /* MW 3 */
+ 12142 "01101001" // /* MW 2 */
+ 12143 "00011001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 390 46 first
+.delay_slot
+ 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12145 "00000000" // /* MW 15 */
+ 12146 "00000000" // /* MW 14 */
+ 12147 "10101000" // /* MW 13 */
+ 12148 "11100010" // /* MW 12 */
+ 12149 "00110100" // /* MW 11 */
+ 12150 "00000000" // /* MW 10 */
+ 12151 "00000000" // /* MW 9 */
+ 12152 "00000000" // /* MW 8 */
+ 12153 "01011011" // /* MW 7 */
+ 12154 "00000001" // /* MW 6 */
+ 12155 "00100000" // /* MW 5 */
+ 12156 "00000000" // /* MW 4 */
+ 12157 "11110000" // /* MW 3 */
+ 12158 "00101100" // /* MW 2 */
+ 12159 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 32 first
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 40
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+.return_address
+ 12160 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12161 "01111000" // /* MW 9 */
+ 12162 "11010000" // /* MW 8 */
+ 12163 "10110011" // /* MW 7 */
+ 12164 "00101000" // /* MW 6 */
+ 12165 "00000000" // /* MW 5 */
+ 12166 "00000001" // /* MW 4 */
+ 12167 "11010000" // /* MW 3 */
+ 12168 "11000110" // /* MW 2 */
+ 12169 "11001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19
+ 12170 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12171 "11000000" // /* MW 5 */
+ 12172 "11001000" // /* MW 4 */
+ 12173 "11001100" // /* MW 3 */
+ 12174 "00000111" // /* MW 2 */
+ 12175 "00000000" // /* MW 1 */
+ 12176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12177 "00000000" // /* MW 1 */
+ 12178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12179 "00000000" // /* MW 1 */
+ 12180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12181 "00000000" // /* MW 1 */
+ 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12183 "00000000" // /* MW 1 */
+ 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12185 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 12186 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12187 "00001000" // /* MW 3 */
+ 12188 "01010001" // /* MW 2 */
+ 12189 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 12190 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12191 "00110110" // /* MW 3 */
+ 12192 "11110110" // /* MW 2 */
+ 12193 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+ 12194 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12195 "00011001" // /* MW 3 */
+ 12196 "11101101" // /* MW 2 */
+ 12197 "00000111" // /* MW 1 */
+ 12198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12199 "00000000" // /* MW 1 */
+ 12200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12201 "00000000" // /* MW 1 */
+ 12202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12203 "00000000" // /* MW 1 */
+ 12204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12205 "00000000" // /* MW 1 */
+ 12206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12207 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32
+ 12208 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12209 "00010001" // /* MW 3 */
+ 12210 "00100011" // /* MW 2 */
+ 12211 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+.src_ref 1 "io_buffer_main.h" 327 28
+ 12212 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12213 "01100011" // /* MW 5 */
+ 12214 "11101100" // /* MW 4 */
+ 12215 "11010011" // /* MW 3 */
+ 12216 "11000110" // /* MW 2 */
+ 12217 "01001010" // /* MW 1 */
+ 12218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12219 "00000000" // /* MW 1 */
+ 12220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12221 "00000000" // /* MW 1 */
+ 12222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12223 "00000000" // /* MW 1 */
+ 12224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12225 "00000000" // /* MW 1 */
+ 12226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12227 "00000000" // /* MW 1 */
+ 12228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12229 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 12230 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12231 "00001000" // /* MW 3 */
+ 12232 "01010001" // /* MW 2 */
+ 12233 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+.src_ref 7 "superkernels.cpp" 398 14
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 12234 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12235 "00010000" // /* MW 9 */
+ 12236 "00100000" // /* MW 8 */
+ 12237 "10110010" // /* MW 7 */
+ 12238 "11110000" // /* MW 6 */
+ 12239 "00000001" // /* MW 5 */
+ 12240 "00000000" // /* MW 4 */
+ 12241 "11010000" // /* MW 3 */
+ 12242 "11001110" // /* MW 2 */
+ 12243 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 19 first
+ 12244 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12245 "01010110" // /* MW 3 */
+ 12246 "00000110" // /* MW 2 */
+ 12247 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 12248 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12249 "00110110" // /* MW 3 */
+ 12250 "00000110" // /* MW 2 */
+ 12251 "00000001" // /* MW 1 */
+ 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12253 "00000000" // /* MW 1 */
+ 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12255 "00000000" // /* MW 1 */
+ 12256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12257 "00000000" // /* MW 1 */
+ 12258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12259 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 12260 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12261 "00110001" // /* MW 3 */
+ 12262 "00100001" // /* MW 2 */
+ 12263 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 12264 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12265 "00010001" // /* MW 3 */
+ 12266 "11100110" // /* MW 2 */
+ 12267 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 16 first
+ 12268 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12269 "00101000" // /* MW 3 */
+ 12270 "01100001" // /* MW 2 */
+ 12271 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 397 6
+ 12272 "10000100" // JNZ r16, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */
+ 12273 "00000001" // /* MW 5 */
+ 12274 "01000000" // /* MW 4 */
+ 12275 "00001000" // /* MW 3 */
+ 12276 "00011000" // /* MW 2 */
+ 12277 "10000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12279 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12281 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12283 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12285 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12287 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14
+ 12288 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12289 "00000001" // /* MW 3 */
+ 12290 "00100000" // /* MW 2 */
+ 12291 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 398 14 first
+ 12292 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12293 "11000001" // /* MW 11 */
+ 12294 "00001000" // /* MW 10 */
+ 12295 "10000011" // /* MW 9 */
+ 12296 "00000000" // /* MW 8 */
+ 12297 "00000000" // /* MW 7 */
+ 12298 "00000000" // /* MW 6 */
+ 12299 "00100000" // /* MW 5 */
+ 12300 "00000000" // /* MW 4 */
+ 12301 "11110000" // /* MW 3 */
+ 12302 "00101100" // /* MW 2 */
+ 12303 "00000000" // /* MW 1 */
+.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560
+.src_ref 7 "superkernels.cpp" 400
+ 12304 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12305 "00111001" // /* MW 3 */
+ 12306 "11110000" // /* MW 2 */
+ 12307 "00000111" // /* MW 1 */
+ 12308 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12309 "11110001" // /* MW 3 */
+ 12310 "11111101" // /* MW 2 */
+ 12311 "00000111" // /* MW 1 */
+ 12312 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12313 "10011001" // /* MW 3 */
+ 12314 "11110111" // /* MW 2 */
+ 12315 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 12316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12317 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 12318 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12319 "11010001" // /* MW 3 */
+ 12320 "11111001" // /* MW 2 */
+ 12321 "00000111" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12323 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12325 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 12326 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 12327 "00000000" // /* MW 3 */
+ 12328 "00101000" // /* MW 2 */
+ 12329 "00010000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 12330 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12331 "00001011" // /* MW 3 */
+ 12332 "10001110" // /* MW 2 */
+ 12333 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 400
+.delay_slot
+ 12334 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12335 "00000001" // /* MW 5 */
+ 12336 "00000000" // /* MW 4 */
+ 12337 "00000000" // /* MW 3 */
+ 12338 "11111000" // /* MW 2 */
+ 12339 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12341 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12343 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end
+.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0
+ 12345 "00000000" // /* MW 1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0
+.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh
+.src_ref 2 "conv2d_dw_bf16_params.h" 211 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.function_start
+ 12352 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12353 "00010000" // /* MW 9 */
+ 12354 "11100000" // /* MW 8 */
+ 12355 "10110011" // /* MW 7 */
+ 12356 "11110000" // /* MW 6 */
+ 12357 "00000001" // /* MW 5 */
+ 12358 "00000000" // /* MW 4 */
+ 12359 "11010000" // /* MW 3 */
+ 12360 "10000101" // /* MW 2 */
+ 12361 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12362 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12363 "01011000" // /* MW 9 */
+ 12364 "00000000" // /* MW 8 */
+ 12365 "00001000" // /* MW 7 */
+ 12366 "01001011" // /* MW 6 */
+ 12367 "00000000" // /* MW 5 */
+ 12368 "00000001" // /* MW 4 */
+ 12369 "11010000" // /* MW 3 */
+ 12370 "10000001" // /* MW 2 */
+ 12371 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 211
+ 12372 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12373 "00000001" // /* MW 5 */
+ 12374 "00000000" // /* MW 4 */
+ 12375 "00000000" // /* MW 3 */
+ 12376 "00001000" // /* MW 2 */
+ 12377 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32
+ 12378 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12379 "00010001" // /* MW 9 */
+ 12380 "11100000" // /* MW 8 */
+ 12381 "10110011" // /* MW 7 */
+ 12382 "11110011" // /* MW 6 */
+ 12383 "00000001" // /* MW 5 */
+ 12384 "00000000" // /* MW 4 */
+ 12385 "10110000" // /* MW 3 */
+ 12386 "11110011" // /* MW 2 */
+ 12387 "11111110" // /* MW 1 */
+ 12388 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12389 "00111101" // /* MW 3 */
+ 12390 "11111100" // /* MW 2 */
+ 12391 "00001111" // /* MW 1 */
+ 12392 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12393 "11110101" // /* MW 3 */
+ 12394 "11111001" // /* MW 2 */
+ 12395 "00001111" // /* MW 1 */
+ 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12397 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12398 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12399 "00101001" // /* MW 3 */
+ 12400 "00011100" // /* MW 2 */
+ 12401 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12402 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12403 "00001001" // /* MW 3 */
+ 12404 "00011100" // /* MW 2 */
+ 12405 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12406 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12407 "00101110" // /* MW 3 */
+ 12408 "00000100" // /* MW 2 */
+ 12409 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 17
+ 12410 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12411 "00001110" // /* MW 3 */
+ 12412 "00010100" // /* MW 2 */
+ 12413 "00000000" // /* MW 1 */
+ 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12415 "00000000" // /* MW 1 */
+ 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12417 "00000000" // /* MW 1 */
+ 12418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12419 "00000000" // /* MW 1 */
+ 12420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12421 "00000000" // /* MW 1 */
+ 12422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12423 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12424 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12425 "00101001" // /* MW 3 */
+ 12426 "00000100" // /* MW 2 */
+ 12427 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 215 15
+ 12428 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12429 "00001001" // /* MW 3 */
+ 12430 "00010100" // /* MW 2 */
+ 12431 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first
+ 12432 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12433 "00101010" // /* MW 3 */
+ 12434 "01011110" // /* MW 2 */
+ 12435 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 52
+ 12436 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12437 "01001010" // /* MW 3 */
+ 12438 "11101110" // /* MW 2 */
+ 12439 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+ 12440 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12441 "00101010" // /* MW 3 */
+ 12442 "11101100" // /* MW 2 */
+ 12443 "00000111" // /* MW 1 */
+ 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12445 "00000000" // /* MW 1 */
+ 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12447 "00000000" // /* MW 1 */
+ 12448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12449 "00000000" // /* MW 1 */
+ 12450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12451 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.no_stack_arguments
+ 12452 "00000100" // JL #15664 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */
+ 12453 "00000001" // /* MW 5 */
+ 12454 "00000000" // /* MW 4 */
+ 12455 "10011000" // /* MW 3 */
+ 12456 "00011110" // /* MW 2 */
+ 12457 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 38
+.delay_slot
+ 12458 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12459 "01000011" // /* MW 5 */
+ 12460 "10111110" // /* MW 4 */
+ 12461 "10111000" // /* MW 3 */
+ 12462 "11001010" // /* MW 2 */
+ 12463 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 80
+.delay_slot
+ 12464 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12465 "00010001" // /* MW 5 */
+ 12466 "11000010" // /* MW 4 */
+ 12467 "10110000" // /* MW 3 */
+ 12468 "10000110" // /* MW 2 */
+ 12469 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12470 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12471 "00010101" // /* MW 5 */
+ 12472 "11101111" // /* MW 4 */
+ 12473 "10110111" // /* MW 3 */
+ 12474 "01000010" // /* MW 2 */
+ 12475 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12476 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12477 "11110001" // /* MW 3 */
+ 12478 "00100010" // /* MW 2 */
+ 12479 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.delay_slot
+ 12480 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 12481 "00000000" // /* MW 15 */
+ 12482 "00000000" // /* MW 14 */
+ 12483 "01111000" // /* MW 13 */
+ 12484 "10100101" // /* MW 12 */
+ 12485 "00000001" // /* MW 11 */
+ 12486 "10010000" // /* MW 10 */
+ 12487 "00001000" // /* MW 9 */
+ 12488 "00011110" // /* MW 8 */
+ 12489 "01011011" // /* MW 7 */
+ 12490 "00000001" // /* MW 6 */
+ 12491 "00100000" // /* MW 5 */
+ 12492 "00000000" // /* MW 4 */
+ 12493 "11110000" // /* MW 3 */
+ 12494 "00101100" // /* MW 2 */
+ 12495 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.return_address
+ 12496 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12497 "00000010" // /* MW 5 */
+ 12498 "01000000" // /* MW 4 */
+ 12499 "00100000" // /* MW 3 */
+ 12500 "11010010" // /* MW 2 */
+ 12501 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first
+ 12502 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12503 "01000011" // /* MW 5 */
+ 12504 "01001000" // /* MW 4 */
+ 12505 "01011000" // /* MW 3 */
+ 12506 "11000101" // /* MW 2 */
+ 12507 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 52
+ 12508 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12509 "01101010" // /* MW 3 */
+ 12510 "11101110" // /* MW 2 */
+ 12511 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12512 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12513 "00110001" // /* MW 3 */
+ 12514 "11101100" // /* MW 2 */
+ 12515 "00000111" // /* MW 1 */
+ 12516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12517 "00000000" // /* MW 1 */
+ 12518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12519 "00000000" // /* MW 1 */
+ 12520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12521 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+ 12522 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12523 "01000110" // /* MW 3 */
+ 12524 "11101001" // /* MW 2 */
+ 12525 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66
+ 12526 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12527 "00001010" // /* MW 3 */
+ 12528 "00110111" // /* MW 2 */
+ 12529 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first
+ 12530 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12531 "01100011" // /* MW 5 */
+ 12532 "11000110" // /* MW 4 */
+ 12533 "10111000" // /* MW 3 */
+ 12534 "01001110" // /* MW 2 */
+ 12535 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.no_stack_arguments
+ 12536 "00111010" // ST r17, [sp, #-32]; JL #15664 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */
+ 12537 "01000001" // /* MW 9 */
+ 12538 "00000000" // /* MW 8 */
+ 12539 "00000000" // /* MW 7 */
+ 12540 "10100110" // /* MW 6 */
+ 12541 "00000111" // /* MW 5 */
+ 12542 "00000000" // /* MW 4 */
+ 12543 "10110000" // /* MW 3 */
+ 12544 "01000110" // /* MW 2 */
+ 12545 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12546 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12547 "00100010" // /* MW 3 */
+ 12548 "10101001" // /* MW 2 */
+ 12549 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12550 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12551 "00001010" // /* MW 3 */
+ 12552 "01110111" // /* MW 2 */
+ 12553 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.delay_slot
+ 12554 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12555 "00010001" // /* MW 3 */
+ 12556 "00100101" // /* MW 2 */
+ 12557 "00010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first
+.delay_slot
+ 12558 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12559 "01110000" // /* MW 3 */
+ 12560 "00100110" // /* MW 2 */
+ 12561 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 218 87
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+.delay_slot
+ 12562 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 12563 "01100000" // /* MW 13 */
+ 12564 "00101011" // /* MW 12 */
+ 12565 "00000000" // /* MW 11 */
+ 12566 "00001001" // /* MW 10 */
+ 12567 "10011000" // /* MW 9 */
+ 12568 "00111101" // /* MW 8 */
+ 12569 "00100010" // /* MW 7 */
+ 12570 "01000001" // /* MW 6 */
+ 12571 "00100100" // /* MW 5 */
+ 12572 "00000000" // /* MW 4 */
+ 12573 "11110000" // /* MW 3 */
+ 12574 "00101100" // /* MW 2 */
+ 12575 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+.return_address
+ 12576 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12577 "01011000" // /* MW 9 */
+ 12578 "01000010" // /* MW 8 */
+ 12579 "00000000" // /* MW 7 */
+ 12580 "11001000" // /* MW 6 */
+ 12581 "00110111" // /* MW 5 */
+ 12582 "00111111" // /* MW 4 */
+ 12583 "00100000" // /* MW 3 */
+ 12584 "00001110" // /* MW 2 */
+ 12585 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12586 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12587 "01011000" // /* MW 9 */
+ 12588 "11111100" // /* MW 8 */
+ 12589 "00101001" // /* MW 7 */
+ 12590 "00001000" // /* MW 6 */
+ 12591 "10000000" // /* MW 5 */
+ 12592 "00000001" // /* MW 4 */
+ 12593 "00100000" // /* MW 3 */
+ 12594 "11000010" // /* MW 2 */
+ 12595 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53
+ 12596 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12597 "01011000" // /* MW 9 */
+ 12598 "00000010" // /* MW 8 */
+ 12599 "10001000" // /* MW 7 */
+ 12600 "10001000" // /* MW 6 */
+ 12601 "01100000" // /* MW 5 */
+ 12602 "00000000" // /* MW 4 */
+ 12603 "00100000" // /* MW 3 */
+ 12604 "11011010" // /* MW 2 */
+ 12605 "11111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+ 12606 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12607 "01011000" // /* MW 9 */
+ 12608 "00010111" // /* MW 8 */
+ 12609 "10001000" // /* MW 7 */
+ 12610 "00001011" // /* MW 6 */
+ 12611 "01010001" // /* MW 5 */
+ 12612 "00000000" // /* MW 4 */
+ 12613 "01010000" // /* MW 3 */
+ 12614 "01000101" // /* MW 2 */
+ 12615 "11100001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76
+ 12616 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12617 "01011000" // /* MW 9 */
+ 12618 "00100000" // /* MW 8 */
+ 12619 "10000000" // /* MW 7 */
+ 12620 "01001000" // /* MW 6 */
+ 12621 "00100111" // /* MW 5 */
+ 12622 "00111111" // /* MW 4 */
+ 12623 "00100000" // /* MW 3 */
+ 12624 "01010110" // /* MW 2 */
+ 12625 "11111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12626 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12627 "01011000" // /* MW 9 */
+ 12628 "00000001" // /* MW 8 */
+ 12629 "01001000" // /* MW 7 */
+ 12630 "11001011" // /* MW 6 */
+ 12631 "01110000" // /* MW 5 */
+ 12632 "00000001" // /* MW 4 */
+ 12633 "00100000" // /* MW 3 */
+ 12634 "01111010" // /* MW 2 */
+ 12635 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41
+ 12636 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12637 "01011000" // /* MW 9 */
+ 12638 "11000000" // /* MW 8 */
+ 12639 "11101111" // /* MW 7 */
+ 12640 "00001011" // /* MW 6 */
+ 12641 "11010000" // /* MW 5 */
+ 12642 "00000101" // /* MW 4 */
+ 12643 "10000000" // /* MW 3 */
+ 12644 "11000000" // /* MW 2 */
+ 12645 "11101001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12646 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12647 "00100001" // /* MW 3 */
+ 12648 "00101000" // /* MW 2 */
+ 12649 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+ 12650 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12651 "00000110" // /* MW 3 */
+ 12652 "11000111" // /* MW 2 */
+ 12653 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 74
+ 12654 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12655 "00000010" // /* MW 5 */
+ 12656 "00110110" // /* MW 4 */
+ 12657 "01010000" // /* MW 3 */
+ 12658 "11110001" // /* MW 2 */
+ 12659 "00011110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69
+ 12660 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12661 "11110101" // /* MW 5 */
+ 12662 "00111111" // /* MW 4 */
+ 12663 "01001011" // /* MW 3 */
+ 12664 "00101000" // /* MW 2 */
+ 12665 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12666 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12667 "00011101" // /* MW 5 */
+ 12668 "00100000" // /* MW 4 */
+ 12669 "11110001" // /* MW 3 */
+ 12670 "11100001" // /* MW 2 */
+ 12671 "01111000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first
+ 12672 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12673 "01110000" // /* MW 3 */
+ 12674 "00101000" // /* MW 2 */
+ 12675 "00010101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68
+ 12676 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12677 "00000001" // /* MW 5 */
+ 12678 "10100000" // /* MW 4 */
+ 12679 "10010000" // /* MW 3 */
+ 12680 "00000000" // /* MW 2 */
+ 12681 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first
+ 12682 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12683 "00000001" // /* MW 5 */
+ 12684 "10110100" // /* MW 4 */
+ 12685 "10111101" // /* MW 3 */
+ 12686 "11100111" // /* MW 2 */
+ 12687 "00001100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first
+ 12688 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12689 "00000010" // /* MW 5 */
+ 12690 "10100011" // /* MW 4 */
+ 12691 "10110000" // /* MW 3 */
+ 12692 "00001101" // /* MW 2 */
+ 12693 "01111101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 70
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first
+ 12694 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12695 "11111111" // /* MW 5 */
+ 12696 "00110101" // /* MW 4 */
+ 12697 "10110000" // /* MW 3 */
+ 12698 "11001101" // /* MW 2 */
+ 12699 "00001001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first
+ 12700 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12701 "00001111" // /* MW 3 */
+ 12702 "11001101" // /* MW 2 */
+ 12703 "00010110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first
+ 12704 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12705 "00011111" // /* MW 3 */
+ 12706 "11011111" // /* MW 2 */
+ 12707 "00010011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 79
+ 12708 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12709 "11111111" // /* MW 5 */
+ 12710 "10110011" // /* MW 4 */
+ 12711 "11111001" // /* MW 3 */
+ 12712 "01101011" // /* MW 2 */
+ 12713 "10011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first
+ 12714 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12715 "00000111" // /* MW 3 */
+ 12716 "00110111" // /* MW 2 */
+ 12717 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first
+ 12718 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12719 "11011111" // /* MW 5 */
+ 12720 "10010000" // /* MW 4 */
+ 12721 "00110111" // /* MW 3 */
+ 12722 "11010110" // /* MW 2 */
+ 12723 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+ 12724 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12725 "01010010" // /* MW 3 */
+ 12726 "00111000" // /* MW 2 */
+ 12727 "00010111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first
+ 12728 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12729 "00101101" // /* MW 3 */
+ 12730 "00100101" // /* MW 2 */
+ 12731 "00010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 232 39
+ 12732 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12733 "00111111" // /* MW 5 */
+ 12734 "11001000" // /* MW 4 */
+ 12735 "00111000" // /* MW 3 */
+ 12736 "01001010" // /* MW 2 */
+ 12737 "11100101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first
+ 12738 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12739 "11111011" // /* MW 5 */
+ 12740 "01110010" // /* MW 4 */
+ 12741 "00111111" // /* MW 3 */
+ 12742 "11110010" // /* MW 2 */
+ 12743 "11111001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 235 47
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first
+ 12744 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12745 "00011111" // /* MW 5 */
+ 12746 "01110000" // /* MW 4 */
+ 12747 "00111001" // /* MW 3 */
+ 12748 "11110010" // /* MW 2 */
+ 12749 "11101101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first
+ 12750 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12751 "11111011" // /* MW 5 */
+ 12752 "11001110" // /* MW 4 */
+ 12753 "00111001" // /* MW 3 */
+ 12754 "11001110" // /* MW 2 */
+ 12755 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 71
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first
+ 12756 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12757 "11101010" // /* MW 5 */
+ 12758 "10110011" // /* MW 4 */
+ 12759 "10111001" // /* MW 3 */
+ 12760 "00110101" // /* MW 2 */
+ 12761 "11100111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first
+ 12762 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12763 "01011011" // /* MW 5 */
+ 12764 "01111011" // /* MW 4 */
+ 12765 "00111001" // /* MW 3 */
+ 12766 "11111110" // /* MW 2 */
+ 12767 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12768 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12769 "11100010" // /* MW 5 */
+ 12770 "00110011" // /* MW 4 */
+ 12771 "11111001" // /* MW 3 */
+ 12772 "00100001" // /* MW 2 */
+ 12773 "10010100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first
+ 12774 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12775 "00000100" // /* MW 5 */
+ 12776 "11110011" // /* MW 4 */
+ 12777 "00111111" // /* MW 3 */
+ 12778 "10000010" // /* MW 2 */
+ 12779 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first
+ 12780 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12781 "01101101" // /* MW 3 */
+ 12782 "11111111" // /* MW 2 */
+ 12783 "00010000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 73
+ 12784 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12785 "11111111" // /* MW 5 */
+ 12786 "10111111" // /* MW 4 */
+ 12787 "00111001" // /* MW 3 */
+ 12788 "01100110" // /* MW 2 */
+ 12789 "11110000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+ 12790 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12791 "11011011" // /* MW 5 */
+ 12792 "11000110" // /* MW 4 */
+ 12793 "00111000" // /* MW 3 */
+ 12794 "10000110" // /* MW 2 */
+ 12795 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 100
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first
+ 12796 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12797 "11111111" // /* MW 5 */
+ 12798 "00110001" // /* MW 4 */
+ 12799 "00111001" // /* MW 3 */
+ 12800 "10100100" // /* MW 2 */
+ 12801 "11000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12802 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12803 "11000011" // /* MW 5 */
+ 12804 "11011011" // /* MW 4 */
+ 12805 "00110011" // /* MW 3 */
+ 12806 "11011010" // /* MW 2 */
+ 12807 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first
+ 12808 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12809 "01011011" // /* MW 5 */
+ 12810 "01000011" // /* MW 4 */
+ 12811 "00111000" // /* MW 3 */
+ 12812 "11001010" // /* MW 2 */
+ 12813 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12814 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12815 "01011011" // /* MW 5 */
+ 12816 "11111100" // /* MW 4 */
+ 12817 "00111001" // /* MW 3 */
+ 12818 "10011110" // /* MW 2 */
+ 12819 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first
+ 12820 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12821 "11000001" // /* MW 5 */
+ 12822 "11011010" // /* MW 4 */
+ 12823 "00111110" // /* MW 3 */
+ 12824 "11001110" // /* MW 2 */
+ 12825 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first
+ 12826 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12827 "11110010" // /* MW 5 */
+ 12828 "10111111" // /* MW 4 */
+ 12829 "00011110" // /* MW 3 */
+ 12830 "00100000" // /* MW 2 */
+ 12831 "00111100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 250 61
+ 12832 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12833 "10100011" // /* MW 5 */
+ 12834 "01000011" // /* MW 4 */
+ 12835 "00111000" // /* MW 3 */
+ 12836 "11011010" // /* MW 2 */
+ 12837 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140
+ 12838 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12839 "01011001" // /* MW 9 */
+ 12840 "11111111" // /* MW 8 */
+ 12841 "00001111" // /* MW 7 */
+ 12842 "01101110" // /* MW 6 */
+ 12843 "01101101" // /* MW 5 */
+ 12844 "00011111" // /* MW 4 */
+ 12845 "00110000" // /* MW 3 */
+ 12846 "11000010" // /* MW 2 */
+ 12847 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first
+ 12848 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12849 "10000001" // /* MW 5 */
+ 12850 "01101010" // /* MW 4 */
+ 12851 "00111110" // /* MW 3 */
+ 12852 "11001010" // /* MW 2 */
+ 12853 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 253 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first
+ 12854 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12855 "11000011" // /* MW 5 */
+ 12856 "01010010" // /* MW 4 */
+ 12857 "00111010" // /* MW 3 */
+ 12858 "11101010" // /* MW 2 */
+ 12859 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 73
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41
+ 12860 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 12861 "00001000" // /* MW 11 */
+ 12862 "00010000" // /* MW 10 */
+ 12863 "01101101" // /* MW 9 */
+ 12864 "10110010" // /* MW 8 */
+ 12865 "00001000" // /* MW 7 */
+ 12866 "10101011" // /* MW 6 */
+ 12867 "01110001" // /* MW 5 */
+ 12868 "00011110" // /* MW 4 */
+ 12869 "00000111" // /* MW 3 */
+ 12870 "00010001" // /* MW 2 */
+ 12871 "00001000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first
+ 12872 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12873 "01110001" // /* MW 3 */
+ 12874 "00011110" // /* MW 2 */
+ 12875 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first
+ 12876 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12877 "11111011" // /* MW 5 */
+ 12878 "01010010" // /* MW 4 */
+ 12879 "00111000" // /* MW 3 */
+ 12880 "11000110" // /* MW 2 */
+ 12881 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 49
+ 12882 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12883 "10000011" // /* MW 5 */
+ 12884 "01000010" // /* MW 4 */
+ 12885 "00111100" // /* MW 3 */
+ 12886 "11000010" // /* MW 2 */
+ 12887 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first
+ 12888 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12889 "11111011" // /* MW 5 */
+ 12890 "01010010" // /* MW 4 */
+ 12891 "00111001" // /* MW 3 */
+ 12892 "11000110" // /* MW 2 */
+ 12893 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 47
+ 12894 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12895 "10000011" // /* MW 5 */
+ 12896 "01000010" // /* MW 4 */
+ 12897 "00111100" // /* MW 3 */
+ 12898 "11000010" // /* MW 2 */
+ 12899 "11100011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first
+ 12900 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12901 "01010001" // /* MW 3 */
+ 12902 "00011110" // /* MW 2 */
+ 12903 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first
+ 12904 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12905 "00110001" // /* MW 3 */
+ 12906 "00011110" // /* MW 2 */
+ 12907 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first
+ 12908 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12909 "00010001" // /* MW 3 */
+ 12910 "00001010" // /* MW 2 */
+ 12911 "00001111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first
+ 12912 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12913 "00001010" // /* MW 3 */
+ 12914 "00000110" // /* MW 2 */
+ 12915 "00000111" // /* MW 1 */
+ 12916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12917 "00000000" // /* MW 1 */
+ 12918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12919 "00000000" // /* MW 1 */
+ 12920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12921 "00000000" // /* MW 1 */
+ 12922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12923 "00000000" // /* MW 1 */
+ 12924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12925 "00000000" // /* MW 1 */
+ 12926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12927 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 58
+ 12928 "10000100" // JZ r16, #12960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12960 delay_slots=5 */
+ 12929 "00000001" // /* MW 5 */
+ 12930 "00000000" // /* MW 4 */
+ 12931 "01010000" // /* MW 3 */
+ 12932 "00011001" // /* MW 2 */
+ 12933 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12934 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12935 "01100000" // /* MW 3 */
+ 12936 "00111011" // /* MW 2 */
+ 12937 "00011001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+ 12938 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12939 "00000000" // /* MW 5 */
+ 12940 "10100000" // /* MW 4 */
+ 12941 "00001001" // /* MW 3 */
+ 12942 "01111111" // /* MW 2 */
+ 12943 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12945 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12947 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 12948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12949 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12950 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12951 "00000001" // /* MW 9 */
+ 12952 "00100110" // /* MW 8 */
+ 12953 "00000000" // /* MW 7 */
+ 12954 "00000000" // /* MW 6 */
+ 12955 "01011011" // /* MW 5 */
+ 12956 "00000001" // /* MW 4 */
+ 12957 "11110000" // /* MW 3 */
+ 12958 "00101100" // /* MW 2 */
+ 12959 "00000000" // /* MW 1 */
+.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267
+ 12960 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 12961 "00010000" // /* MW 9 */
+ 12962 "00110100" // /* MW 8 */
+ 12963 "00110010" // /* MW 7 */
+ 12964 "11110000" // /* MW 6 */
+ 12965 "00000001" // /* MW 5 */
+ 12966 "00000000" // /* MW 4 */
+ 12967 "00100000" // /* MW 3 */
+ 12968 "10000111" // /* MW 2 */
+ 12969 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12970 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12971 "11100010" // /* MW 5 */
+ 12972 "00000100" // /* MW 4 */
+ 12973 "01010000" // /* MW 3 */
+ 12974 "11000000" // /* MW 2 */
+ 12975 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39
+ 12976 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12977 "11101001" // /* MW 5 */
+ 12978 "00000010" // /* MW 4 */
+ 12979 "00100001" // /* MW 3 */
+ 12980 "10000011" // /* MW 2 */
+ 12981 "11111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+ 12982 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12983 "00100101" // /* MW 5 */
+ 12984 "00000001" // /* MW 4 */
+ 12985 "00100000" // /* MW 3 */
+ 12986 "00111110" // /* MW 2 */
+ 12987 "11111111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+ 12988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 12989 "00000001" // /* MW 5 */
+ 12990 "00000000" // /* MW 4 */
+ 12991 "00000000" // /* MW 3 */
+ 12992 "11111000" // /* MW 2 */
+ 12993 "11111111" // /* MW 1 */
+ 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12995 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 12996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 12997 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 12998 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 12999 "00010111" // /* MW 3 */
+ 13000 "00000010" // /* MW 2 */
+ 13001 "00000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.src_ref 2 "conv2d_dw_bf16_params.h" 267 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13002 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13003 "01000001" // /* MW 5 */
+ 13004 "01110000" // /* MW 4 */
+ 13005 "00001111" // /* MW 3 */
+ 13006 "00000000" // /* MW 2 */
+ 13007 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13008 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13009 "00010110" // /* MW 3 */
+ 13010 "01000000" // /* MW 2 */
+ 13011 "00001000" // /* MW 1 */
+.delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13012 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13013 "11000000" // /* MW 3 */
+ 13014 "01100000" // /* MW 2 */
+ 13015 "00011111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16_params.h" 266 41
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13016 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13017 "00000001" // /* MW 3 */
+ 13018 "00000001" // /* MW 2 */
+ 13019 "00011100" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13021 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end
+.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0
+ 13023 "00000000" // /* MW 1 */
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 2 "conv2d_dw_bf16.h" 199 first
+.function_start
+ 13024 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13025 "11000000" // /* MW 3 */
+ 13026 "01010110" // /* MW 2 */
+ 13027 "00011100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 82
+ 13028 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13029 "10101001" // /* MW 5 */
+ 13030 "00000001" // /* MW 4 */
+ 13031 "11011110" // /* MW 3 */
+ 13032 "10010011" // /* MW 2 */
+ 13033 "00100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 12
+.src_ref 1 "io_buffer_main.h" 125 25
+ 13034 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13035 "00000010" // /* MW 5 */
+ 13036 "11010001" // /* MW 4 */
+ 13037 "11010110" // /* MW 3 */
+ 13038 "10000011" // /* MW 2 */
+ 13039 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 204 82 first
+ 13040 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13041 "10001010" // /* MW 3 */
+ 13042 "11101000" // /* MW 2 */
+ 13043 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4 first
+ 13044 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13045 "01000110" // /* MW 3 */
+ 13046 "11111101" // /* MW 2 */
+ 13047 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13048 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13049 "00100110" // /* MW 3 */
+ 13050 "00111101" // /* MW 2 */
+ 13051 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13052 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13053 "01000110" // /* MW 3 */
+ 13054 "11111111" // /* MW 2 */
+ 13055 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13056 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13057 "00100110" // /* MW 3 */
+ 13058 "00101111" // /* MW 2 */
+ 13059 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 221 4
+ 13060 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13061 "00000110" // /* MW 3 */
+ 13062 "00101101" // /* MW 2 */
+ 13063 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4 first
+ 13064 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13065 "01000110" // /* MW 3 */
+ 13066 "11111100" // /* MW 2 */
+ 13067 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13068 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13069 "00100110" // /* MW 3 */
+ 13070 "00111100" // /* MW 2 */
+ 13071 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13072 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13073 "01000110" // /* MW 3 */
+ 13074 "11111110" // /* MW 2 */
+ 13075 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13076 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13077 "00100110" // /* MW 3 */
+ 13078 "00101110" // /* MW 2 */
+ 13079 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 222 4
+ 13080 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13081 "00000110" // /* MW 3 */
+ 13082 "00101100" // /* MW 2 */
+ 13083 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4 first
+ 13084 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13085 "11000110" // /* MW 3 */
+ 13086 "11111100" // /* MW 2 */
+ 13087 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13088 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13089 "10100110" // /* MW 3 */
+ 13090 "00111100" // /* MW 2 */
+ 13091 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13092 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13093 "11000110" // /* MW 3 */
+ 13094 "11111110" // /* MW 2 */
+ 13095 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13096 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13097 "10100110" // /* MW 3 */
+ 13098 "00101110" // /* MW 2 */
+ 13099 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 223 4
+ 13100 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13101 "10000110" // /* MW 3 */
+ 13102 "00101100" // /* MW 2 */
+ 13103 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4 first
+ 13104 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13105 "11000110" // /* MW 3 */
+ 13106 "11111111" // /* MW 2 */
+ 13107 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+ 13108 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13109 "10100110" // /* MW 3 */
+ 13110 "00101111" // /* MW 2 */
+ 13111 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 224 4
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13112 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13113 "00010000" // /* MW 9 */
+ 13114 "00110100" // /* MW 8 */
+ 13115 "00110010" // /* MW 7 */
+ 13116 "11110010" // /* MW 6 */
+ 13117 "00000001" // /* MW 5 */
+ 13118 "00000000" // /* MW 4 */
+ 13119 "11010000" // /* MW 3 */
+ 13120 "11110000" // /* MW 2 */
+ 13121 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 5 "accum.hpp" 946 89
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 13122 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13123 "10000001" // /* MW 5 */
+ 13124 "11000101" // /* MW 4 */
+ 13125 "01011000" // /* MW 3 */
+ 13126 "10011000" // /* MW 2 */
+ 13127 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13128 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13129 "00010000" // /* MW 3 */
+ 13130 "00001111" // /* MW 2 */
+ 13131 "00011011" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+ 13132 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13133 "01011000" // /* MW 11 */
+ 13134 "00000000" // /* MW 10 */
+ 13135 "01100000" // /* MW 9 */
+ 13136 "01101010" // /* MW 8 */
+ 13137 "00100000" // /* MW 7 */
+ 13138 "00000000" // /* MW 6 */
+ 13139 "01101000" // /* MW 5 */
+ 13140 "00111011" // /* MW 4 */
+ 13141 "01110000" // /* MW 3 */
+ 13142 "10000101" // /* MW 2 */
+ 13143 "10000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43 first
+.src_ref 2 "conv2d_dw_bf16.h" 225 4 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13144 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 13145 "01100000" // /* MW 13 */
+ 13146 "00001001" // /* MW 12 */
+ 13147 "01100010" // /* MW 11 */
+ 13148 "00001011" // /* MW 10 */
+ 13149 "00010000" // /* MW 9 */
+ 13150 "11100000" // /* MW 8 */
+ 13151 "00101101" // /* MW 7 */
+ 13152 "00000100" // /* MW 6 */
+ 13153 "11101001" // /* MW 5 */
+ 13154 "00111000" // /* MW 4 */
+ 13155 "11010000" // /* MW 3 */
+ 13156 "10111000" // /* MW 2 */
+ 13157 "01111111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13158 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13159 "01110010" // /* MW 9 */
+ 13160 "10010000" // /* MW 8 */
+ 13161 "10000000" // /* MW 7 */
+ 13162 "00000010" // /* MW 6 */
+ 13163 "01001011" // /* MW 5 */
+ 13164 "00001100" // /* MW 4 */
+ 13165 "11010001" // /* MW 3 */
+ 13166 "10110100" // /* MW 2 */
+ 13167 "01100101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 204 43
+.src_ref 2 "conv2d_dw_bf16.h" 225 4
+ 13168 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13169 "01111110" // /* MW 9 */
+ 13170 "11000000" // /* MW 8 */
+ 13171 "11100001" // /* MW 7 */
+ 13172 "00000011" // /* MW 6 */
+ 13173 "10010000" // /* MW 5 */
+ 13174 "10101011" // /* MW 4 */
+ 13175 "11010001" // /* MW 3 */
+ 13176 "00110000" // /* MW 2 */
+ 13177 "01101101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+ 13178 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13179 "01011110" // /* MW 9 */
+ 13180 "10010000" // /* MW 8 */
+ 13181 "00000111" // /* MW 7 */
+ 13182 "00000010" // /* MW 6 */
+ 13183 "11110100" // /* MW 5 */
+ 13184 "11110000" // /* MW 4 */
+ 13185 "11010001" // /* MW 3 */
+ 13186 "00001010" // /* MW 2 */
+ 13187 "01111001" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 244 56
+ 13188 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13189 "10000010" // /* MW 5 */
+ 13190 "00000000" // /* MW 4 */
+ 13191 "01010000" // /* MW 3 */
+ 13192 "00011110" // /* MW 2 */
+ 13193 "01110001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+ 13194 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13195 "00010000" // /* MW 11 */
+ 13196 "11111000" // /* MW 10 */
+ 13197 "01111001" // /* MW 9 */
+ 13198 "00001100" // /* MW 8 */
+ 13199 "00000000" // /* MW 7 */
+ 13200 "00000000" // /* MW 6 */
+ 13201 "01001011" // /* MW 5 */
+ 13202 "00010000" // /* MW 4 */
+ 13203 "11010110" // /* MW 3 */
+ 13204 "11000000" // /* MW 2 */
+ 13205 "01101001" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 244 56 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+ 13206 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13207 "00010000" // /* MW 11 */
+ 13208 "00101000" // /* MW 10 */
+ 13209 "10111010" // /* MW 9 */
+ 13210 "00001101" // /* MW 8 */
+ 13211 "00000000" // /* MW 7 */
+ 13212 "00000000" // /* MW 6 */
+ 13213 "01001011" // /* MW 5 */
+ 13214 "00010000" // /* MW 4 */
+ 13215 "11010010" // /* MW 3 */
+ 13216 "10010010" // /* MW 2 */
+ 13217 "01110010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13218 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13219 "00000101" // /* MW 5 */
+ 13220 "01100001" // /* MW 4 */
+ 13221 "10000100" // /* MW 3 */
+ 13222 "00010110" // /* MW 2 */
+ 13223 "00001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+ 13224 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13225 "10001010" // /* MW 3 */
+ 13226 "00000000" // /* MW 2 */
+ 13227 "00011011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 244 4
+ 13228 "10111010" // LDA r5, [p3]; MOVXM p3, #13456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13229 "00010000" // /* MW 9 */
+ 13230 "01001000" // /* MW 8 */
+ 13231 "10110010" // /* MW 7 */
+ 13232 "00001101" // /* MW 6 */
+ 13233 "00000000" // /* MW 5 */
+ 13234 "00000000" // /* MW 4 */
+ 13235 "11010000" // /* MW 3 */
+ 13236 "10010110" // /* MW 2 */
+ 13237 "01100000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+ 13238 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13239 "10101000" // /* MW 9 */
+ 13240 "00000001" // /* MW 8 */
+ 13241 "10001110" // /* MW 7 */
+ 13242 "00001010" // /* MW 6 */
+ 13243 "00010100" // /* MW 5 */
+ 13244 "00000000" // /* MW 4 */
+ 13245 "11110000" // /* MW 3 */
+ 13246 "00101100" // /* MW 2 */
+ 13247 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.src_ref 2 "conv2d_dw_bf16.h" 271 12
+.src_ref 2 "conv2d_dw_bf16.h" 272 12
+.src_ref 2 "conv2d_dw_bf16.h" 273 12
+.src_ref 2 "conv2d_dw_bf16.h" 274 12
+.src_ref 2 "conv2d_dw_bf16.h" 275 12
+.src_ref 2 "conv2d_dw_bf16.h" 276 12
+.src_ref 2 "conv2d_dw_bf16.h" 277 12
+ 13248 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13249 "00000000" // /* MW 15 */
+ 13250 "00000000" // /* MW 14 */
+ 13251 "01111000" // /* MW 13 */
+ 13252 "10111001" // /* MW 12 */
+ 13253 "00001110" // /* MW 11 */
+ 13254 "00001000" // /* MW 10 */
+ 13255 "00110110" // /* MW 9 */
+ 13256 "00000000" // /* MW 8 */
+ 13257 "01011011" // /* MW 7 */
+ 13258 "00000001" // /* MW 6 */
+ 13259 "00100000" // /* MW 5 */
+ 13260 "00000000" // /* MW 4 */
+ 13261 "00000000" // /* MW 3 */
+ 13262 "10010001" // /* MW 2 */
+ 13263 "00000111" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13264 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13265 "01101010" // /* MW 15 */
+ 13266 "01100011" // /* MW 14 */
+ 13267 "10101100" // /* MW 13 */
+ 13268 "00000011" // /* MW 12 */
+ 13269 "00001110" // /* MW 11 */
+ 13270 "00000010" // /* MW 10 */
+ 13271 "11010100" // /* MW 9 */
+ 13272 "00001101" // /* MW 8 */
+ 13273 "01001011" // /* MW 7 */
+ 13274 "00010000" // /* MW 6 */
+ 13275 "00100000" // /* MW 5 */
+ 13276 "00000000" // /* MW 4 */
+ 13277 "11110000" // /* MW 3 */
+ 13278 "00101100" // /* MW 2 */
+ 13279 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.src_ref 4 "vector.hpp" 1159 33
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13280 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13281 "00011010" // /* MW 15 */
+ 13282 "01001000" // /* MW 14 */
+ 13283 "11001100" // /* MW 13 */
+ 13284 "00111111" // /* MW 12 */
+ 13285 "10111001" // /* MW 11 */
+ 13286 "11011010" // /* MW 10 */
+ 13287 "00101111" // /* MW 9 */
+ 13288 "00000100" // /* MW 8 */
+ 13289 "01001011" // /* MW 7 */
+ 13290 "00010000" // /* MW 6 */
+ 13291 "00100101" // /* MW 5 */
+ 13292 "00000000" // /* MW 4 */
+ 13293 "11010000" // /* MW 3 */
+ 13294 "10100011" // /* MW 2 */
+ 13295 "01000000" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.begin_of_loop
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+.loop_nesting 1
+ 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13297 "01101110" // /* MW 9 */
+ 13298 "10000001" // /* MW 8 */
+ 13299 "10000100" // /* MW 7 */
+ 13300 "00000010" // /* MW 6 */
+ 13301 "11110100" // /* MW 5 */
+ 13302 "11110000" // /* MW 4 */
+ 13303 "01110001" // /* MW 3 */
+ 13304 "10110011" // /* MW 2 */
+ 13305 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13307 "00000001" // /* MW 9 */
+ 13308 "10001001" // /* MW 8 */
+ 13309 "10001010" // /* MW 7 */
+ 13310 "01000110" // /* MW 6 */
+ 13311 "00001011" // /* MW 5 */
+ 13312 "10011100" // /* MW 4 */
+ 13313 "11101010" // /* MW 3 */
+ 13314 "00111000" // /* MW 2 */
+ 13315 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13317 "00000001" // /* MW 9 */
+ 13318 "00110101" // /* MW 8 */
+ 13319 "10001001" // /* MW 7 */
+ 13320 "11000110" // /* MW 6 */
+ 13321 "10000110" // /* MW 5 */
+ 13322 "00110000" // /* MW 4 */
+ 13323 "01101010" // /* MW 3 */
+ 13324 "10110001" // /* MW 2 */
+ 13325 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13327 "00000110" // /* MW 3 */
+ 13328 "10001001" // /* MW 2 */
+ 13329 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13331 "10100001" // /* MW 7 */
+ 13332 "01001000" // /* MW 6 */
+ 13333 "10001100" // /* MW 5 */
+ 13334 "11000110" // /* MW 4 */
+ 13335 "10001110" // /* MW 3 */
+ 13336 "10110000" // /* MW 2 */
+ 13337 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13339 "10100001" // /* MW 7 */
+ 13340 "00110110" // /* MW 6 */
+ 13341 "10001010" // /* MW 5 */
+ 13342 "01000110" // /* MW 4 */
+ 13343 "00001111" // /* MW 3 */
+ 13344 "10011100" // /* MW 2 */
+ 13345 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13347 "00001110" // /* MW 3 */
+ 13348 "10001001" // /* MW 2 */
+ 13349 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13351 "11100001" // /* MW 7 */
+ 13352 "10010010" // /* MW 6 */
+ 13353 "10001011" // /* MW 5 */
+ 13354 "01000110" // /* MW 4 */
+ 13355 "00000011" // /* MW 3 */
+ 13356 "00011100" // /* MW 2 */
+ 13357 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13359 "11100001" // /* MW 7 */
+ 13360 "01010110" // /* MW 6 */
+ 13361 "10001000" // /* MW 5 */
+ 13362 "01000110" // /* MW 4 */
+ 13363 "00000111" // /* MW 3 */
+ 13364 "00011100" // /* MW 2 */
+ 13365 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13366 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13367 "01101110" // /* MW 9 */
+ 13368 "01000001" // /* MW 8 */
+ 13369 "00011000" // /* MW 7 */
+ 13370 "00000001" // /* MW 6 */
+ 13371 "00010000" // /* MW 5 */
+ 13372 "00000000" // /* MW 4 */
+ 13373 "11110000" // /* MW 3 */
+ 13374 "00101100" // /* MW 2 */
+ 13375 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13377 "01101010" // /* MW 15 */
+ 13378 "01100011" // /* MW 14 */
+ 13379 "01111100" // /* MW 13 */
+ 13380 "10100101" // /* MW 12 */
+ 13381 "00000001" // /* MW 11 */
+ 13382 "00000000" // /* MW 10 */
+ 13383 "00000000" // /* MW 9 */
+ 13384 "00000000" // /* MW 8 */
+ 13385 "01011011" // /* MW 7 */
+ 13386 "00000001" // /* MW 6 */
+ 13387 "00100000" // /* MW 5 */
+ 13388 "00000000" // /* MW 4 */
+ 13389 "11110000" // /* MW 3 */
+ 13390 "00101100" // /* MW 2 */
+ 13391 "00000000" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 13392 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13393 "00011010" // /* MW 15 */
+ 13394 "01001000" // /* MW 14 */
+ 13395 "01111100" // /* MW 13 */
+ 13396 "10100101" // /* MW 12 */
+ 13397 "00000001" // /* MW 11 */
+ 13398 "00000000" // /* MW 10 */
+ 13399 "00000000" // /* MW 9 */
+ 13400 "00000000" // /* MW 8 */
+ 13401 "01011011" // /* MW 7 */
+ 13402 "00000001" // /* MW 6 */
+ 13403 "00100000" // /* MW 5 */
+ 13404 "00000000" // /* MW 4 */
+ 13405 "11110000" // /* MW 3 */
+ 13406 "00101100" // /* MW 2 */
+ 13407 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13408 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13409 "01101110" // /* MW 9 */
+ 13410 "10000001" // /* MW 8 */
+ 13411 "10000100" // /* MW 7 */
+ 13412 "00000010" // /* MW 6 */
+ 13413 "10010000" // /* MW 5 */
+ 13414 "01110011" // /* MW 4 */
+ 13415 "11110100" // /* MW 3 */
+ 13416 "00001100" // /* MW 2 */
+ 13417 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13418 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13419 "00000001" // /* MW 7 */
+ 13420 "10001001" // /* MW 6 */
+ 13421 "10001010" // /* MW 5 */
+ 13422 "01000110" // /* MW 4 */
+ 13423 "00001011" // /* MW 3 */
+ 13424 "10011100" // /* MW 2 */
+ 13425 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13426 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13427 "00000001" // /* MW 7 */
+ 13428 "00110101" // /* MW 6 */
+ 13429 "10001001" // /* MW 5 */
+ 13430 "11000110" // /* MW 4 */
+ 13431 "10000110" // /* MW 3 */
+ 13432 "00110000" // /* MW 2 */
+ 13433 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13434 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13435 "00000110" // /* MW 3 */
+ 13436 "10001001" // /* MW 2 */
+ 13437 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13438 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13439 "10100001" // /* MW 7 */
+ 13440 "01001000" // /* MW 6 */
+ 13441 "10001100" // /* MW 5 */
+ 13442 "01000110" // /* MW 4 */
+ 13443 "00001111" // /* MW 3 */
+ 13444 "10011100" // /* MW 2 */
+ 13445 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13446 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13447 "10100001" // /* MW 9 */
+ 13448 "00110110" // /* MW 8 */
+ 13449 "10001010" // /* MW 7 */
+ 13450 "11000010" // /* MW 6 */
+ 13451 "10001110" // /* MW 5 */
+ 13452 "10110000" // /* MW 4 */
+ 13453 "11110100" // /* MW 3 */
+ 13454 "00101100" // /* MW 2 */
+ 13455 "00000000" // /* MW 1 */
+.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13456 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13457 "00011101" // /* MW 5 */
+ 13458 "00010010" // /* MW 4 */
+ 13459 "10001011" // /* MW 3 */
+ 13460 "00011110" // /* MW 2 */
+ 13461 "00111110" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13462 "01011010" // MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13463 "11100001" // /* MW 9 */
+ 13464 "10010010" // /* MW 8 */
+ 13465 "10001011" // /* MW 7 */
+ 13466 "00000010" // /* MW 6 */
+ 13467 "01010100" // /* MW 5 */
+ 13468 "10110111" // /* MW 4 */
+ 13469 "00000001" // /* MW 3 */
+ 13470 "00000000" // /* MW 2 */
+ 13471 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 5 "accum.hpp" 946 89 first
+.src_ref 2 "conv2d_dw_bf16.h" 250 8
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13472 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13473 "11100001" // /* MW 11 */
+ 13474 "01010110" // /* MW 10 */
+ 13475 "10001000" // /* MW 9 */
+ 13476 "00000010" // /* MW 8 */
+ 13477 "01001111" // /* MW 7 */
+ 13478 "10001111" // /* MW 6 */
+ 13479 "00000001" // /* MW 5 */
+ 13480 "00000000" // /* MW 4 */
+ 13481 "01110000" // /* MW 3 */
+ 13482 "10000101" // /* MW 2 */
+ 13483 "10000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 250 8 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13484 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13485 "01111111" // /* MW 3 */
+ 13486 "01110010" // /* MW 2 */
+ 13487 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13488 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13489 "10011011" // /* MW 3 */
+ 13490 "00011101" // /* MW 2 */
+ 13491 "00000000" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13492 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13493 "01110100" // /* MW 3 */
+ 13494 "00011100" // /* MW 2 */
+ 13495 "00111000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13496 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13497 "10110100" // /* MW 3 */
+ 13498 "01011000" // /* MW 2 */
+ 13499 "00111000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13500 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13501 "10010110" // /* MW 3 */
+ 13502 "00010001" // /* MW 2 */
+ 13503 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13504 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13505 "00010110" // /* MW 3 */
+ 13506 "00010000" // /* MW 2 */
+ 13507 "00001011" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13508 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13509 "01101100" // /* MW 3 */
+ 13510 "01010000" // /* MW 2 */
+ 13511 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13512 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13513 "00010100" // /* MW 3 */
+ 13514 "01010011" // /* MW 2 */
+ 13515 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 4 "max_min.hpp" 20 104 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13516 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13517 "01110000" // /* MW 7 */
+ 13518 "00110110" // /* MW 6 */
+ 13519 "10101000" // /* MW 5 */
+ 13520 "00000010" // /* MW 4 */
+ 13521 "01100000" // /* MW 3 */
+ 13522 "01000010" // /* MW 2 */
+ 13523 "01010001" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13524 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13525 "00000011" // /* MW 3 */
+ 13526 "00011100" // /* MW 2 */
+ 13527 "00011101" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13528 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13529 "01110000" // /* MW 7 */
+ 13530 "01000101" // /* MW 6 */
+ 13531 "10000000" // /* MW 5 */
+ 13532 "00000001" // /* MW 4 */
+ 13533 "01100000" // /* MW 3 */
+ 13534 "01010010" // /* MW 2 */
+ 13535 "01000111" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13536 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13537 "01000001" // /* MW 7 */
+ 13538 "01101101" // /* MW 6 */
+ 13539 "10001100" // /* MW 5 */
+ 13540 "01000110" // /* MW 4 */
+ 13541 "00000111" // /* MW 3 */
+ 13542 "00011100" // /* MW 2 */
+ 13543 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13544 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13545 "01000001" // /* MW 7 */
+ 13546 "00000011" // /* MW 6 */
+ 13547 "10001001" // /* MW 5 */
+ 13548 "11000110" // /* MW 4 */
+ 13549 "10000010" // /* MW 3 */
+ 13550 "00110000" // /* MW 2 */
+ 13551 "00000010" // /* MW 1 */
+.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18
+.begin_of_loop
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+.loop_nesting 2
+ 13552 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13553 "01101110" // /* MW 9 */
+ 13554 "10000001" // /* MW 8 */
+ 13555 "10000100" // /* MW 7 */
+ 13556 "00000010" // /* MW 6 */
+ 13557 "11110100" // /* MW 5 */
+ 13558 "11110000" // /* MW 4 */
+ 13559 "01110001" // /* MW 3 */
+ 13560 "10110011" // /* MW 2 */
+ 13561 "00000011" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1139 17
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13562 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13563 "00000001" // /* MW 9 */
+ 13564 "10001001" // /* MW 8 */
+ 13565 "10001010" // /* MW 7 */
+ 13566 "01000110" // /* MW 6 */
+ 13567 "00001011" // /* MW 5 */
+ 13568 "10011100" // /* MW 4 */
+ 13569 "11101010" // /* MW 3 */
+ 13570 "00111000" // /* MW 2 */
+ 13571 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1139 17 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13572 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13573 "00000001" // /* MW 9 */
+ 13574 "00110101" // /* MW 8 */
+ 13575 "10001001" // /* MW 7 */
+ 13576 "11000110" // /* MW 6 */
+ 13577 "10000110" // /* MW 5 */
+ 13578 "00110000" // /* MW 4 */
+ 13579 "01101010" // /* MW 3 */
+ 13580 "10110001" // /* MW 2 */
+ 13581 "00000000" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13582 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13583 "00000110" // /* MW 3 */
+ 13584 "10001001" // /* MW 2 */
+ 13585 "00011101" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13586 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13587 "10100001" // /* MW 7 */
+ 13588 "01001000" // /* MW 6 */
+ 13589 "10001100" // /* MW 5 */
+ 13590 "11000110" // /* MW 4 */
+ 13591 "10001110" // /* MW 3 */
+ 13592 "10110000" // /* MW 2 */
+ 13593 "00000100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.aggressive_scheduled_block_id 2
+.nohwbrkpt
+.noswbrkpt
+ 13594 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13595 "10100001" // /* MW 7 */
+ 13596 "00110110" // /* MW 6 */
+ 13597 "10001010" // /* MW 5 */
+ 13598 "01000110" // /* MW 4 */
+ 13599 "00001111" // /* MW 3 */
+ 13600 "10011100" // /* MW 2 */
+ 13601 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13602 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13603 "00001110" // /* MW 3 */
+ 13604 "10001001" // /* MW 2 */
+ 13605 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 268 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13606 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13607 "11100001" // /* MW 7 */
+ 13608 "10010010" // /* MW 6 */
+ 13609 "10001011" // /* MW 5 */
+ 13610 "01000110" // /* MW 4 */
+ 13611 "00000011" // /* MW 3 */
+ 13612 "00011100" // /* MW 2 */
+ 13613 "00000101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 265 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13614 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13615 "11100001" // /* MW 7 */
+ 13616 "01010110" // /* MW 6 */
+ 13617 "10001000" // /* MW 5 */
+ 13618 "01000110" // /* MW 4 */
+ 13619 "00000111" // /* MW 3 */
+ 13620 "00011100" // /* MW 2 */
+ 13621 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+ 13622 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13623 "00000101" // /* MW 5 */
+ 13624 "01100001" // /* MW 4 */
+ 13625 "11110100" // /* MW 3 */
+ 13626 "00101100" // /* MW 2 */
+ 13627 "00000000" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 270 12 first
+ 13628 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13629 "01000001" // /* MW 3 */
+ 13630 "01101101" // /* MW 2 */
+ 13631 "10001100" // /* MW 1 */
+.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608
+.src_ref 2 "conv2d_dw_bf16.h" 274 12 first
+.end_of_loop
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 13632 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13633 "00011010" // /* MW 15 */
+ 13634 "01001000" // /* MW 14 */
+ 13635 "01111100" // /* MW 13 */
+ 13636 "10100101" // /* MW 12 */
+ 13637 "00000001" // /* MW 11 */
+ 13638 "00000000" // /* MW 10 */
+ 13639 "00000000" // /* MW 9 */
+ 13640 "00000000" // /* MW 8 */
+ 13641 "01011011" // /* MW 7 */
+ 13642 "00000001" // /* MW 6 */
+ 13643 "00100000" // /* MW 5 */
+ 13644 "00000000" // /* MW 4 */
+ 13645 "11110000" // /* MW 3 */
+ 13646 "00101100" // /* MW 2 */
+ 13647 "00000000" // /* MW 1 */
+.src_ref 6 "aie_core.h" 81 15 first
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 244 4 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 1
+ 13648 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */
+ 13649 "01101000" // /* MW 11 */
+ 13650 "10000001" // /* MW 10 */
+ 13651 "10000100" // /* MW 9 */
+ 13652 "00000010" // /* MW 8 */
+ 13653 "00100111" // /* MW 7 */
+ 13654 "00000100" // /* MW 6 */
+ 13655 "00100000" // /* MW 5 */
+ 13656 "11100111" // /* MW 4 */
+ 13657 "11111000" // /* MW 3 */
+ 13658 "00001100" // /* MW 2 */
+ 13659 "00000011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 266 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 271 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13660 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13661 "00000001" // /* MW 7 */
+ 13662 "10001001" // /* MW 6 */
+ 13663 "10001010" // /* MW 5 */
+ 13664 "01000110" // /* MW 4 */
+ 13665 "00001011" // /* MW 3 */
+ 13666 "10011100" // /* MW 2 */
+ 13667 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 275 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13668 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13669 "00000001" // /* MW 7 */
+ 13670 "00110101" // /* MW 6 */
+ 13671 "10001001" // /* MW 5 */
+ 13672 "11000110" // /* MW 4 */
+ 13673 "10000110" // /* MW 3 */
+ 13674 "00110000" // /* MW 2 */
+ 13675 "00000010" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13676 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13677 "00000110" // /* MW 3 */
+ 13678 "10001001" // /* MW 2 */
+ 13679 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 267 12 first
+.src_ref 2 "conv2d_dw_bf16.h" 272 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13680 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13681 "10100001" // /* MW 7 */
+ 13682 "01001000" // /* MW 6 */
+ 13683 "10001100" // /* MW 5 */
+ 13684 "01000110" // /* MW 4 */
+ 13685 "00001111" // /* MW 3 */
+ 13686 "10011100" // /* MW 2 */
+ 13687 "00000011" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18 first
+.src_ref 2 "conv2d_dw_bf16.h" 276 12 first
+.delay_slot
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 13688 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13689 "10100001" // /* MW 7 */
+ 13690 "00110110" // /* MW 6 */
+ 13691 "10001010" // /* MW 5 */
+ 13692 "11000110" // /* MW 4 */
+ 13693 "10001110" // /* MW 3 */
+ 13694 "10110000" // /* MW 2 */
+ 13695 "00000100" // /* MW 1 */
+.src_ref 4 "shuffle.hpp" 142 18
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+.loop_nesting 0
+ 13696 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13697 "00001110" // /* MW 3 */
+ 13698 "10001001" // /* MW 2 */
+ 13699 "00011101" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 273 12 first
+ 13700 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13701 "11100001" // /* MW 3 */
+ 13702 "10010010" // /* MW 2 */
+ 13703 "10001011" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 277 12 first
+ 13704 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13705 "11100001" // /* MW 3 */
+ 13706 "01010110" // /* MW 2 */
+ 13707 "10001000" // /* MW 1 */
+ 13708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13709 "00000000" // /* MW 1 */
+ 13710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13711 "00000000" // /* MW 1 */
+ 13712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13713 "00000000" // /* MW 1 */
+ 13714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13715 "00000000" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102 first
+ 13716 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13717 "10010110" // /* MW 3 */
+ 13718 "00010001" // /* MW 2 */
+ 13719 "00001101" // /* MW 1 */
+.src_ref 5 "accum.hpp" 1110 102
+.src_ref 2 "conv2d_dw_bf16.h" 290 first
+ 13720 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */
+ 13721 "00000000" // /* MW 5 */
+ 13722 "01010000" // /* MW 4 */
+ 13723 "11000000" // /* MW 3 */
+ 13724 "00000010" // /* MW 2 */
+ 13725 "01100010" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13726 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13727 "01101100" // /* MW 3 */
+ 13728 "01010000" // /* MW 2 */
+ 13729 "00011100" // /* MW 1 */
+.src_ref 2 "conv2d_dw_bf16.h" 286 17 first
+.delay_slot
+ 13730 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13731 "00010100" // /* MW 3 */
+ 13732 "01010011" // /* MW 2 */
+ 13733 "00011101" // /* MW 1 */
+.src_ref 4 "max_min.hpp" 20 104 first
+.delay_slot
+ 13734 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13735 "01101100" // /* MW 3 */
+ 13736 "01010000" // /* MW 2 */
+ 13737 "00011101" // /* MW 1 */
+.src_ref 4 "vector.hpp" 1159 33 first
+.src_ref 2 "conv2d_dw_bf16.h" 285 16 first
+.delay_slot
+ 13738 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13739 "00010011" // /* MW 3 */
+ 13740 "10001010" // /* MW 2 */
+ 13741 "00001010" // /* MW 1 */
+.src_ref 6 "aie_core.h" 100 15 first
+.src_ref 4 "vector.hpp" 1159 33
+.delay_slot
+ 13742 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13743 "10010011" // /* MW 3 */
+ 13744 "00111010" // /* MW 2 */
+.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end
+.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0
+ 13745 "00001010" // /* MW 1 */
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE
+.src_ref 7 "superkernels.cpp" 444 first
+.src_ref 7 "superkernels.cpp" 449 6
+.function_start
+ 13760 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13761 "10000000" // /* MW 5 */
+ 13762 "11001000" // /* MW 4 */
+ 13763 "11001000" // /* MW 3 */
+ 13764 "00000111" // /* MW 2 */
+ 13765 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6 first
+ 13766 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13767 "01000001" // /* MW 5 */
+ 13768 "00101111" // /* MW 4 */
+ 13769 "11010000" // /* MW 3 */
+ 13770 "11000010" // /* MW 2 */
+ 13771 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 444
+ 13772 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13773 "00000001" // /* MW 5 */
+ 13774 "00000000" // /* MW 4 */
+ 13775 "00000000" // /* MW 3 */
+ 13776 "00010000" // /* MW 2 */
+ 13777 "00000000" // /* MW 1 */
+ 13778 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13779 "01110000" // /* MW 7 */
+ 13780 "01110000" // /* MW 6 */
+ 13781 "00101101" // /* MW 5 */
+ 13782 "00000010" // /* MW 4 */
+ 13783 "10110000" // /* MW 3 */
+ 13784 "00111010" // /* MW 2 */
+ 13785 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+ 13786 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13787 "01110000" // /* MW 7 */
+ 13788 "11110000" // /* MW 6 */
+ 13789 "10101000" // /* MW 5 */
+ 13790 "00000001" // /* MW 4 */
+ 13791 "10110000" // /* MW 3 */
+ 13792 "10110110" // /* MW 2 */
+ 13793 "11111111" // /* MW 1 */
+ 13794 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13795 "00011101" // /* MW 3 */
+ 13796 "11101100" // /* MW 2 */
+ 13797 "00001111" // /* MW 1 */
+ 13798 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13799 "10011101" // /* MW 3 */
+ 13800 "11110111" // /* MW 2 */
+ 13801 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+ 13802 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13803 "01110000" // /* MW 7 */
+ 13804 "01100000" // /* MW 6 */
+ 13805 "11001010" // /* MW 5 */
+ 13806 "00000001" // /* MW 4 */
+ 13807 "10110000" // /* MW 3 */
+ 13808 "00000010" // /* MW 2 */
+ 13809 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 449 6
+.src_ref 7 "superkernels.cpp" 449 16
+ 13810 "10000100" // JNZ r16, #13936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13936 delay_slots=5 */
+ 13811 "00000001" // /* MW 5 */
+ 13812 "01000000" // /* MW 4 */
+ 13813 "00111000" // /* MW 3 */
+ 13814 "00011011" // /* MW 2 */
+ 13815 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+.delay_slot
+ 13816 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13817 "11000000" // /* MW 3 */
+ 13818 "11010110" // /* MW 2 */
+ 13819 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 22 first
+.delay_slot
+ 13820 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13821 "10010000" // /* MW 3 */
+ 13822 "01100010" // /* MW 2 */
+ 13823 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 30
+.delay_slot
+ 13824 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13825 "11111011" // /* MW 3 */
+ 13826 "01100011" // /* MW 2 */
+ 13827 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13828 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13829 "10100000" // /* MW 5 */
+ 13830 "11001000" // /* MW 4 */
+ 13831 "11000110" // /* MW 3 */
+ 13832 "00000111" // /* MW 2 */
+ 13833 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 446 11
+.delay_slot
+ 13834 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13835 "00110001" // /* MW 3 */
+ 13836 "00000110" // /* MW 2 */
+ 13837 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 13838 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13839 "00010001" // /* MW 9 */
+ 13840 "00110100" // /* MW 8 */
+ 13841 "10110010" // /* MW 7 */
+ 13842 "11110000" // /* MW 6 */
+ 13843 "00000001" // /* MW 5 */
+ 13844 "00000000" // /* MW 4 */
+ 13845 "01100000" // /* MW 3 */
+ 13846 "10010001" // /* MW 2 */
+ 13847 "11110000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 13848 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 13849 "00010000" // /* MW 11 */
+ 13850 "00110010" // /* MW 10 */
+ 13851 "10110010" // /* MW 9 */
+ 13852 "11110000" // /* MW 8 */
+ 13853 "00000001" // /* MW 7 */
+ 13854 "00000000" // /* MW 6 */
+ 13855 "10001011" // /* MW 5 */
+ 13856 "10001000" // /* MW 4 */
+ 13857 "11100000" // /* MW 3 */
+ 13858 "11000000" // /* MW 2 */
+ 13859 "00100000" // /* MW 1 */
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13861 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 451 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 13862 "00000100" // JL #12352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12352 delay_slots=5 */
+ 13863 "00000001" // /* MW 5 */
+ 13864 "00000000" // /* MW 4 */
+ 13865 "00100000" // /* MW 3 */
+ 13866 "00011000" // /* MW 2 */
+ 13867 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13869 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13871 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 13872 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13873 "00110001" // /* MW 3 */
+ 13874 "00100000" // /* MW 2 */
+ 13875 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 13876 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13877 "00000101" // /* MW 3 */
+ 13878 "00100000" // /* MW 2 */
+ 13879 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 13880 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13881 "01110000" // /* MW 7 */
+ 13882 "10100101" // /* MW 6 */
+ 13883 "00000001" // /* MW 5 */
+ 13884 "00000000" // /* MW 4 */
+ 13885 "00110000" // /* MW 3 */
+ 13886 "11000010" // /* MW 2 */
+ 13887 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 44
+.src_ref 7 "superkernels.cpp" 461 2
+.return_address
+ 13888 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 13889 "00000000" // /* MW 7 */
+ 13890 "10000010" // /* MW 6 */
+ 13891 "00110011" // /* MW 5 */
+ 13892 "00000001" // /* MW 4 */
+ 13893 "01100000" // /* MW 3 */
+ 13894 "10010001" // /* MW 2 */
+ 13895 "00110011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 17 first
+ 13896 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13897 "00111010" // /* MW 3 */
+ 13898 "00000110" // /* MW 2 */
+ 13899 "00000010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13
+.src_ref 7 "superkernels.cpp" 453 15 first
+ 13900 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13901 "00010000" // /* MW 9 */
+ 13902 "00110000" // /* MW 8 */
+ 13903 "00110010" // /* MW 7 */
+ 13904 "11110001" // /* MW 6 */
+ 13905 "00000001" // /* MW 5 */
+ 13906 "00000000" // /* MW 4 */
+ 13907 "01010000" // /* MW 3 */
+ 13908 "11000011" // /* MW 2 */
+ 13909 "01000100" // /* MW 1 */
+ 13910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13911 "00000000" // /* MW 1 */
+ 13912 "10000100" // J #13952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13952 delay_slots=5 */
+ 13913 "00000000" // /* MW 5 */
+ 13914 "00000000" // /* MW 4 */
+ 13915 "01000000" // /* MW 3 */
+ 13916 "00011011" // /* MW 2 */
+ 13917 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15
+.src_ref 7 "superkernels.cpp" 457 26
+.delay_slot
+ 13918 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 13919 "10110000" // /* MW 5 */
+ 13920 "11001000" // /* MW 4 */
+ 13921 "11000110" // /* MW 3 */
+ 13922 "00000111" // /* MW 2 */
+ 13923 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13925 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 13926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13927 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 452 15 first
+.delay_slot
+ 13928 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13929 "00110001" // /* MW 3 */
+ 13930 "00000110" // /* MW 2 */
+ 13931 "00001011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 453 13 first
+.delay_slot
+ 13932 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13933 "00010001" // /* MW 3 */
+ 13934 "00000110" // /* MW 2 */
+ 13935 "00001010" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176
+.src_ref 7 "superkernels.cpp" 457 26
+ 13936 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 13937 "00000000" // /* MW 15 */
+ 13938 "00000000" // /* MW 14 */
+ 13939 "00010000" // /* MW 13 */
+ 13940 "00101100" // /* MW 12 */
+ 13941 "10110010" // /* MW 11 */
+ 13942 "11110001" // /* MW 10 */
+ 13943 "00000001" // /* MW 9 */
+ 13944 "00000000" // /* MW 8 */
+ 13945 "01011011" // /* MW 7 */
+ 13946 "00000001" // /* MW 6 */
+ 13947 "00100000" // /* MW 5 */
+ 13948 "00000000" // /* MW 4 */
+ 13949 "11110000" // /* MW 3 */
+ 13950 "00101100" // /* MW 2 */
+ 13951 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 13952 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13953 "10000110" // /* MW 3 */
+ 13954 "01100111" // /* MW 2 */
+ 13955 "00011010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15
+.src_ref 1 "io_buffer_main.h" 218 49
+ 13956 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 13957 "00010000" // /* MW 9 */
+ 13958 "00101000" // /* MW 8 */
+ 13959 "00110010" // /* MW 7 */
+ 13960 "11110010" // /* MW 6 */
+ 13961 "00000001" // /* MW 5 */
+ 13962 "00000000" // /* MW 4 */
+ 13963 "11010000" // /* MW 3 */
+ 13964 "11101110" // /* MW 2 */
+ 13965 "01011111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 13966 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13967 "00010110" // /* MW 3 */
+ 13968 "11111110" // /* MW 2 */
+ 13969 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 13970 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13971 "00110110" // /* MW 3 */
+ 13972 "11111110" // /* MW 2 */
+ 13973 "00000010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+ 13974 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13975 "01010110" // /* MW 3 */
+ 13976 "01000110" // /* MW 2 */
+ 13977 "00000010" // /* MW 1 */
+ 13978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13979 "00000000" // /* MW 1 */
+ 13980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13981 "00000000" // /* MW 1 */
+ 13982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13983 "00000000" // /* MW 1 */
+ 13984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13985 "00000000" // /* MW 1 */
+ 13986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 13987 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 13988 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13989 "00000010" // /* MW 3 */
+ 13990 "01100001" // /* MW 2 */
+ 13991 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+ 13992 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13993 "00010001" // /* MW 3 */
+ 13994 "00000110" // /* MW 2 */
+ 13995 "00001010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8
+ 13996 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 13997 "11111101" // /* MW 3 */
+ 13998 "11100000" // /* MW 2 */
+ 13999 "00010111" // /* MW 1 */
+ 14000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14001 "00000000" // /* MW 1 */
+ 14002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14003 "00000000" // /* MW 1 */
+ 14004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14005 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14006 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14007 "00001000" // /* MW 3 */
+ 14008 "10010011" // /* MW 2 */
+ 14009 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11
+.src_ref 7 "superkernels.cpp" 459 47
+.src_ref 7 "superkernels.cpp" 464 6
+.src_ref 7 "superkernels.cpp" 465 16
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+ 14010 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14011 "00010000" // /* MW 9 */
+ 14012 "00100000" // /* MW 8 */
+ 14013 "10110010" // /* MW 7 */
+ 14014 "11110011" // /* MW 6 */
+ 14015 "00000001" // /* MW 5 */
+ 14016 "00000000" // /* MW 4 */
+ 14017 "00000000" // /* MW 3 */
+ 14018 "00101111" // /* MW 2 */
+ 14019 "00000000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+ 14020 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14021 "11000001" // /* MW 5 */
+ 14022 "00101011" // /* MW 4 */
+ 14023 "00101000" // /* MW 3 */
+ 14024 "00000000" // /* MW 2 */
+ 14025 "00000110" // /* MW 1 */
+ 14026 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14027 "01011010" // /* MW 3 */
+ 14028 "01101000" // /* MW 2 */
+ 14029 "00011000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 51
+ 14030 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14031 "10000001" // /* MW 5 */
+ 14032 "00101001" // /* MW 4 */
+ 14033 "00100111" // /* MW 3 */
+ 14034 "11010011" // /* MW 2 */
+ 14035 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 15 first
+ 14036 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14037 "00110110" // /* MW 3 */
+ 14038 "00000110" // /* MW 2 */
+ 14039 "00000100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 26
+.src_ref 7 "superkernels.cpp" 461 2
+ 14040 "10111010" // LDA r16, [p3]; MOVXM p3, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14041 "00010000" // /* MW 9 */
+ 14042 "11100000" // /* MW 8 */
+ 14043 "10110011" // /* MW 7 */
+ 14044 "11110001" // /* MW 6 */
+ 14045 "00000001" // /* MW 5 */
+ 14046 "00000000" // /* MW 4 */
+ 14047 "11010000" // /* MW 3 */
+ 14048 "11000010" // /* MW 2 */
+ 14049 "01100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 14050 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14051 "01010110" // /* MW 3 */
+ 14052 "00000110" // /* MW 2 */
+ 14053 "00000111" // /* MW 1 */
+ 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14055 "00000000" // /* MW 1 */
+ 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14057 "00000000" // /* MW 1 */
+ 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14059 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+ 14060 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14061 "01110110" // /* MW 3 */
+ 14062 "00000110" // /* MW 2 */
+ 14063 "00000101" // /* MW 1 */
+ 14064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14065 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 457 24 first
+ 14066 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14067 "00001111" // /* MW 3 */
+ 14068 "01100001" // /* MW 2 */
+ 14069 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+ 14070 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14071 "00000111" // /* MW 3 */
+ 14072 "10100010" // /* MW 2 */
+ 14073 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+ 14074 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14075 "11111101" // /* MW 3 */
+ 14076 "00100000" // /* MW 2 */
+ 14077 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 461 2 first
+.no_stack_arguments
+ 14078 "00000100" // JL #13024 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13024 delay_slots=5 */
+ 14079 "00000001" // /* MW 5 */
+ 14080 "00000000" // /* MW 4 */
+ 14081 "01110000" // /* MW 3 */
+ 14082 "00011001" // /* MW 2 */
+ 14083 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 456 11 first
+.delay_slot
+ 14084 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14085 "00110001" // /* MW 3 */
+ 14086 "00000110" // /* MW 2 */
+ 14087 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 459 47 first
+.delay_slot
+ 14088 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14089 "11000001" // /* MW 3 */
+ 14090 "01001001" // /* MW 2 */
+ 14091 "00011000" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 201 10 first
+.delay_slot
+ 14092 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14093 "00100101" // /* MW 3 */
+ 14094 "10110100" // /* MW 2 */
+ 14095 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16 first
+.delay_slot
+ 14096 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14097 "00010101" // /* MW 3 */
+ 14098 "10111011" // /* MW 2 */
+ 14099 "00001111" // /* MW 1 */
+.src_ref 9 "io_buffer_impl.h" 52 16
+.delay_slot
+ 14100 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14101 "11000001" // /* MW 11 */
+ 14102 "10001010" // /* MW 10 */
+ 14103 "11011111" // /* MW 9 */
+ 14104 "00000011" // /* MW 8 */
+ 14105 "00000000" // /* MW 7 */
+ 14106 "00000000" // /* MW 6 */
+ 14107 "00100000" // /* MW 5 */
+ 14108 "00000000" // /* MW 4 */
+ 14109 "11110000" // /* MW 3 */
+ 14110 "00101100" // /* MW 2 */
+ 14111 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.return_address
+ 14112 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14113 "00001010" // /* MW 3 */
+ 14114 "01100111" // /* MW 2 */
+ 14115 "00011010" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 14116 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14117 "00010110" // /* MW 3 */
+ 14118 "00000110" // /* MW 2 */
+ 14119 "00000010" // /* MW 1 */
+ 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14121 "00000000" // /* MW 1 */
+ 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14123 "00000000" // /* MW 1 */
+ 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14125 "00000000" // /* MW 1 */
+ 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14127 "00000000" // /* MW 1 */
+ 14128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14129 "00000000" // /* MW 1 */
+ 14130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14131 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 14132 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14133 "11111000" // /* MW 3 */
+ 14134 "00010000" // /* MW 2 */
+ 14135 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 14136 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14137 "00010000" // /* MW 9 */
+ 14138 "00110000" // /* MW 8 */
+ 14139 "10110010" // /* MW 7 */
+ 14140 "11110000" // /* MW 6 */
+ 14141 "00000001" // /* MW 5 */
+ 14142 "00000000" // /* MW 4 */
+ 14143 "11010000" // /* MW 3 */
+ 14144 "11000010" // /* MW 2 */
+ 14145 "01011100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 19 first
+ 14146 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14147 "01010110" // /* MW 3 */
+ 14148 "00000110" // /* MW 2 */
+ 14149 "00000001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 14150 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14151 "00110110" // /* MW 3 */
+ 14152 "00000110" // /* MW 2 */
+ 14153 "00000111" // /* MW 1 */
+ 14154 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14155 "10011001" // /* MW 3 */
+ 14156 "11110100" // /* MW 2 */
+ 14157 "00000111" // /* MW 1 */
+ 14158 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14159 "11010001" // /* MW 3 */
+ 14160 "11111001" // /* MW 2 */
+ 14161 "00000111" // /* MW 1 */
+ 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14163 "00000000" // /* MW 1 */
+ 14164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14165 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 14166 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14167 "00000001" // /* MW 3 */
+ 14168 "11100001" // /* MW 2 */
+ 14169 "00010011" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 14170 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14171 "00010001" // /* MW 3 */
+ 14172 "11100110" // /* MW 2 */
+ 14173 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 16 first
+ 14174 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14175 "00101000" // /* MW 3 */
+ 14176 "01100001" // /* MW 2 */
+ 14177 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 464 6
+ 14178 "10000100" // JNZ r16, #14208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14208 delay_slots=5 */
+ 14179 "00000001" // /* MW 5 */
+ 14180 "01000000" // /* MW 4 */
+ 14181 "11000000" // /* MW 3 */
+ 14182 "00011011" // /* MW 2 */
+ 14183 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16
+.delay_slot
+ 14184 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14185 "00000001" // /* MW 3 */
+ 14186 "00110000" // /* MW 2 */
+ 14187 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14189 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14191 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14193 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14195 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 465 16 first
+ 14196 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14197 "11000001" // /* MW 11 */
+ 14198 "10001000" // /* MW 10 */
+ 14199 "10000011" // /* MW 9 */
+ 14200 "00000011" // /* MW 8 */
+ 14201 "00000000" // /* MW 7 */
+ 14202 "00000000" // /* MW 6 */
+ 14203 "00100000" // /* MW 5 */
+ 14204 "00000000" // /* MW 4 */
+ 14205 "11110000" // /* MW 3 */
+ 14206 "00101100" // /* MW 2 */
+ 14207 "00000000" // /* MW 1 */
+.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448
+.src_ref 7 "superkernels.cpp" 467
+ 14208 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14209 "01000001" // /* MW 5 */
+ 14210 "11101101" // /* MW 4 */
+ 14211 "00101110" // /* MW 3 */
+ 14212 "10110110" // /* MW 2 */
+ 14213 "11111111" // /* MW 1 */
+ 14214 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14215 "11110001" // /* MW 3 */
+ 14216 "11110001" // /* MW 2 */
+ 14217 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467 first
+ 14218 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 14219 "00000000" // /* MW 3 */
+ 14220 "00101000" // /* MW 2 */
+ 14221 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 467
+.delay_slot
+ 14222 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14223 "00000001" // /* MW 5 */
+ 14224 "00000000" // /* MW 4 */
+ 14225 "00000000" // /* MW 3 */
+ 14226 "11110000" // /* MW 2 */
+ 14227 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14229 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14231 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14233 "00000000" // /* MW 1 */
+.delay_slot
+ 14234 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14235 "11000000" // /* MW 3 */
+ 14236 "01100010" // /* MW 2 */
+.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end
+.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0
+ 14237 "00011111" // /* MW 1 */
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+.function superkernel_conv_eltbinary _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE
+.src_ref 7 "superkernels.cpp" 578
+.src_ref 7 "superkernels.cpp" 578 first
+.function_start
+ 14240 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14241 "00000001" // /* MW 5 */
+ 14242 "00000000" // /* MW 4 */
+ 14243 "00000000" // /* MW 3 */
+ 14244 "00001000" // /* MW 2 */
+ 14245 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6
+ 14246 "00111010" // ST p7, [sp, #-8]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14247 "00010001" // /* MW 9 */
+ 14248 "00100000" // /* MW 8 */
+ 14249 "10110010" // /* MW 7 */
+ 14250 "11110011" // /* MW 6 */
+ 14251 "00000001" // /* MW 5 */
+ 14252 "00000000" // /* MW 4 */
+ 14253 "10110000" // /* MW 3 */
+ 14254 "01110011" // /* MW 2 */
+ 14255 "11111111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6 first
+ 14256 "10111010" // LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14257 "01110010" // /* MW 9 */
+ 14258 "01110000" // /* MW 8 */
+ 14259 "00101101" // /* MW 7 */
+ 14260 "10000010" // /* MW 6 */
+ 14261 "00011101" // /* MW 5 */
+ 14262 "11111111" // /* MW 4 */
+ 14263 "11010111" // /* MW 3 */
+ 14264 "11000010" // /* MW 2 */
+ 14265 "11100000" // /* MW 1 */
+ 14266 "10011000" // ST p4, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14267 "00011101" // /* MW 3 */
+ 14268 "11110110" // /* MW 2 */
+ 14269 "00001111" // /* MW 1 */
+ 14270 "10011000" // ST p2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14271 "00011101" // /* MW 3 */
+ 14272 "11110001" // /* MW 2 */
+ 14273 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 7 "superkernels.cpp" 590 35
+.src_ref 7 "superkernels.cpp" 599 105
+.src_ref 7 "superkernels.cpp" 629 34
+ 14274 "00000010" // ST lr, [sp, #-20]; MOV p7, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 14275 "01110000" // /* MW 7 */
+ 14276 "01100000" // /* MW 6 */
+ 14277 "10110011" // /* MW 5 */
+ 14278 "00000011" // /* MW 4 */
+ 14279 "10110000" // /* MW 3 */
+ 14280 "10000111" // /* MW 2 */
+ 14281 "11111101" // /* MW 1 */
+ 14282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14283 "00000000" // /* MW 1 */
+ 14284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14285 "00000000" // /* MW 1 */
+ 14286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14287 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 583 6
+.src_ref 7 "superkernels.cpp" 583 16
+ 14288 "10000100" // JNZ r16, #14688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14688 delay_slots=5 */
+ 14289 "00000001" // /* MW 5 */
+ 14290 "01000000" // /* MW 4 */
+ 14291 "10110000" // /* MW 3 */
+ 14292 "00011100" // /* MW 2 */
+ 14293 "10000000" // /* MW 1 */
+.delay_slot
+ 14294 "10011000" // ST p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14295 "00011101" // /* MW 3 */
+ 14296 "11101000" // /* MW 2 */
+ 14297 "00001111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 22 first
+.delay_slot
+ 14298 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14299 "10010000" // /* MW 3 */
+ 14300 "01100010" // /* MW 2 */
+ 14301 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 30
+.delay_slot
+ 14302 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14303 "11111011" // /* MW 3 */
+ 14304 "01100011" // /* MW 2 */
+ 14305 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 11
+.delay_slot
+ 14306 "01000100" // MOVXM p6, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14307 "10100000" // /* MW 5 */
+ 14308 "11001000" // /* MW 4 */
+ 14309 "11001100" // /* MW 3 */
+ 14310 "00000111" // /* MW 2 */
+ 14311 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 580 11
+.delay_slot
+ 14312 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14313 "00110001" // /* MW 3 */
+ 14314 "00000110" // /* MW 2 */
+ 14315 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 4 "tile.hpp" 86 8
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id first
+ 14316 "10111010" // MOVA r0, #1; MOVXM p6, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14317 "00010000" // /* MW 9 */
+ 14318 "00110100" // /* MW 8 */
+ 14319 "00110010" // /* MW 7 */
+ 14320 "11110011" // /* MW 6 */
+ 14321 "00000001" // /* MW 5 */
+ 14322 "00000000" // /* MW 4 */
+ 14323 "00000000" // /* MW 3 */
+ 14324 "00100000" // /* MW 2 */
+ 14325 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.src_ref 4 "tile.hpp" 74 8
+.src_ref 4 "tile.hpp" 86 8 first
+.aggressive_scheduled_block_id 1
+.noswbrkpt
+ 14326 "01110110" // ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14327 "00010000" // /* MW 11 */
+ 14328 "00110010" // /* MW 10 */
+ 14329 "00110010" // /* MW 9 */
+ 14330 "11110000" // /* MW 8 */
+ 14331 "00000001" // /* MW 7 */
+ 14332 "00000000" // /* MW 6 */
+ 14333 "10001011" // /* MW 5 */
+ 14334 "10000100" // /* MW 4 */
+ 14335 "11100110" // /* MW 3 */
+ 14336 "11000000" // /* MW 2 */
+ 14337 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 7 "superkernels.cpp" 587 4
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14338 "10111010" // MOVA r1, #0; MOVXM p1, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14339 "00010000" // /* MW 9 */
+ 14340 "00000000" // /* MW 8 */
+ 14341 "10110011" // /* MW 7 */
+ 14342 "11110000" // /* MW 6 */
+ 14343 "00000001" // /* MW 5 */
+ 14344 "00000000" // /* MW 4 */
+ 14345 "00000000" // /* MW 3 */
+ 14346 "00000001" // /* MW 2 */
+ 14347 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4 first
+.aggressive_scheduled_block_id 1
+.no_stack_arguments
+.nohwbrkpt
+.noswbrkpt
+ 14348 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */
+ 14349 "00000001" // /* MW 5 */
+ 14350 "00000000" // /* MW 4 */
+ 14351 "01100000" // /* MW 3 */
+ 14352 "00000101" // /* MW 2 */
+ 14353 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14355 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+.aggressive_scheduled_block_id 1
+.nohwbrkpt
+.noswbrkpt
+ 14356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14357 "00000000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 86 8
+.delay_slot
+.aggressive_scheduled_block_id 1
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14358 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14359 "00110001" // /* MW 3 */
+ 14360 "00100000" // /* MW 2 */
+ 14361 "00010000" // /* MW 1 */
+.src_ref 4 "tile.hpp" 74 8
+.delay_slot
+ 14362 "00101100" // NOPA; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14363 "00001010" // /* MW 5 */
+ 14364 "01000000" // /* MW 4 */
+ 14365 "11110000" // /* MW 3 */
+ 14366 "00101100" // /* MW 2 */
+ 14367 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 587 4
+.src_ref 4 "tile.hpp" 74 8 first
+.delay_slot
+ 14368 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14369 "00000000" // /* MW 15 */
+ 14370 "00000000" // /* MW 14 */
+ 14371 "01111000" // /* MW 13 */
+ 14372 "01100000" // /* MW 12 */
+ 14373 "00110111" // /* MW 11 */
+ 14374 "00000000" // /* MW 10 */
+ 14375 "00000000" // /* MW 9 */
+ 14376 "10000000" // /* MW 8 */
+ 14377 "00010001" // /* MW 7 */
+ 14378 "00000110" // /* MW 6 */
+ 14379 "00100000" // /* MW 5 */
+ 14380 "00000000" // /* MW 4 */
+ 14381 "11110000" // /* MW 3 */
+ 14382 "00101100" // /* MW 2 */
+ 14383 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 590 35
+.src_ref 7 "superkernels.cpp" 591 4
+.return_address
+ 14384 "01100100" // MOVX r16, #1; MOV dj0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14385 "00000001" // /* MW 5 */
+ 14386 "00000001" // /* MW 4 */
+ 14387 "10100001" // /* MW 3 */
+ 14388 "00000000" // /* MW 2 */
+ 14389 "00000100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 590 35 first
+ 14390 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14391 "01010110" // /* MW 3 */
+ 14392 "00000010" // /* MW 2 */
+ 14393 "00000111" // /* MW 1 */
+ 14394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14395 "00000000" // /* MW 1 */
+ 14396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14397 "00000000" // /* MW 1 */
+ 14398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14399 "00000000" // /* MW 1 */
+ 14400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14401 "00000000" // /* MW 1 */
+ 14402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14403 "00000000" // /* MW 1 */
+ 14404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14405 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4 first
+ 14406 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14407 "00000111" // /* MW 3 */
+ 14408 "10100001" // /* MW 2 */
+ 14409 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4
+ 14410 "10000100" // JNZ r16, #14544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14544 delay_slots=5 */
+ 14411 "00000001" // /* MW 5 */
+ 14412 "01000000" // /* MW 4 */
+ 14413 "01101000" // /* MW 3 */
+ 14414 "00011100" // /* MW 2 */
+ 14415 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 105
+.delay_slot
+ 14416 "11111000" // MOV r17, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14417 "11000000" // /* MW 3 */
+ 14418 "01011110" // /* MW 2 */
+ 14419 "00011100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 105 first
+.delay_slot
+ 14420 "00011000" // ADD.NC dc0, r17, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14421 "10010000" // /* MW 3 */
+ 14422 "11001000" // /* MW 2 */
+ 14423 "00011000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14425 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14427 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14429 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 591 4 first
+ 14430 "10000100" // JNZ r18, #14512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14512 delay_slots=5 */
+ 14431 "00000001" // /* MW 5 */
+ 14432 "01000000" // /* MW 4 */
+ 14433 "01011000" // /* MW 3 */
+ 14434 "00011100" // /* MW 2 */
+ 14435 "10010000" // /* MW 1 */
+.delay_slot
+ 14436 "01000100" // MOVXM r16, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14437 "00000000" // /* MW 5 */
+ 14438 "00101100" // /* MW 4 */
+ 14439 "11001000" // /* MW 3 */
+ 14440 "00000111" // /* MW 2 */
+ 14441 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 27
+.delay_slot
+ 14442 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14443 "00000001" // /* MW 3 */
+ 14444 "00100010" // /* MW 2 */
+ 14445 "00010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14451 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8 first
+.no_stack_arguments
+ 14452 "00111010" // ST p6, [sp, #-28]; JL #11136 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11136 delay_slots=5 */
+ 14453 "01000001" // /* MW 9 */
+ 14454 "00000000" // /* MW 8 */
+ 14455 "00000000" // /* MW 7 */
+ 14456 "01110000" // /* MW 6 */
+ 14457 "00000101" // /* MW 5 */
+ 14458 "00000000" // /* MW 4 */
+ 14459 "10110000" // /* MW 3 */
+ 14460 "11100011" // /* MW 2 */
+ 14461 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 595 38
+.delay_slot
+ 14462 "01000100" // MOVXM p6, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14463 "10000000" // /* MW 5 */
+ 14464 "11001010" // /* MW 4 */
+ 14465 "11001100" // /* MW 3 */
+ 14466 "00000111" // /* MW 2 */
+ 14467 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8
+.delay_slot
+ 14468 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14469 "10000000" // /* MW 5 */
+ 14470 "11001010" // /* MW 4 */
+ 14471 "11000000" // /* MW 3 */
+ 14472 "00000111" // /* MW 2 */
+ 14473 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 594 8
+.delay_slot
+ 14474 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14475 "10000000" // /* MW 3 */
+ 14476 "01100001" // /* MW 2 */
+ 14477 "00011001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14480 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14481 "00000000" // /* MW 15 */
+ 14482 "00000000" // /* MW 14 */
+ 14483 "01111000" // /* MW 13 */
+ 14484 "10100101" // /* MW 12 */
+ 14485 "00000001" // /* MW 11 */
+ 14486 "00000000" // /* MW 10 */
+ 14487 "00000000" // /* MW 9 */
+ 14488 "00000000" // /* MW 8 */
+ 14489 "01011011" // /* MW 7 */
+ 14490 "00000001" // /* MW 6 */
+ 14491 "00100000" // /* MW 5 */
+ 14492 "00000000" // /* MW 4 */
+ 14493 "11110000" // /* MW 3 */
+ 14494 "00101100" // /* MW 2 */
+ 14495 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 595 38 first
+.return_address
+ 14496 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14497 "00010000" // /* MW 9 */
+ 14498 "00000000" // /* MW 8 */
+ 14499 "00001011" // /* MW 7 */
+ 14500 "11110010" // /* MW 6 */
+ 14501 "00000001" // /* MW 5 */
+ 14502 "00000000" // /* MW 4 */
+ 14503 "11010000" // /* MW 3 */
+ 14504 "11000110" // /* MW 2 */
+ 14505 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14506 "00111100" // LDA p6, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14507 "00100000" // /* MW 5 */
+ 14508 "00000000" // /* MW 4 */
+ 14509 "00100000" // /* MW 3 */
+ 14510 "11100011" // /* MW 2 */
+ 14511 "11111100" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272
+ 14512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14513 "00000000" // /* MW 1 */
+ 14514 "10000100" // J #14592 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=14592 delay_slots=5 */
+ 14515 "00000000" // /* MW 5 */
+ 14516 "00000000" // /* MW 4 */
+ 14517 "10000000" // /* MW 3 */
+ 14518 "00011100" // /* MW 2 */
+ 14519 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14521 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14523 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14525 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14527 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.delay_slot
+ 14528 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14529 "00000000" // /* MW 15 */
+ 14530 "00000000" // /* MW 14 */
+ 14531 "01111000" // /* MW 13 */
+ 14532 "01100000" // /* MW 12 */
+ 14533 "10110110" // /* MW 11 */
+ 14534 "00000000" // /* MW 10 */
+ 14535 "00000000" // /* MW 9 */
+ 14536 "00000000" // /* MW 8 */
+ 14537 "01011011" // /* MW 7 */
+ 14538 "00000001" // /* MW 6 */
+ 14539 "00100000" // /* MW 5 */
+ 14540 "00000000" // /* MW 4 */
+ 14541 "11110000" // /* MW 3 */
+ 14542 "00101100" // /* MW 2 */
+ 14543 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304
+.src_ref 7 "superkernels.cpp" 599 8 first
+.no_stack_arguments
+ 14544 "00111010" // ST p6, [sp, #-28]; JL #11296 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */
+ 14545 "01000001" // /* MW 9 */
+ 14546 "00000000" // /* MW 8 */
+ 14547 "00000000" // /* MW 7 */
+ 14548 "10000100" // /* MW 6 */
+ 14549 "00000101" // /* MW 5 */
+ 14550 "00000000" // /* MW 4 */
+ 14551 "10110000" // /* MW 3 */
+ 14552 "11100011" // /* MW 2 */
+ 14553 "11111100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 600 38
+.delay_slot
+ 14554 "01000100" // MOVXM p6, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14555 "00000000" // /* MW 5 */
+ 14556 "11001011" // /* MW 4 */
+ 14557 "11001100" // /* MW 3 */
+ 14558 "00000111" // /* MW 2 */
+ 14559 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 8
+.delay_slot
+ 14560 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14561 "00000000" // /* MW 5 */
+ 14562 "11001011" // /* MW 4 */
+ 14563 "11000000" // /* MW 3 */
+ 14564 "00000111" // /* MW 2 */
+ 14565 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 599 8
+.delay_slot
+ 14566 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14567 "10000000" // /* MW 3 */
+ 14568 "01100001" // /* MW 2 */
+ 14569 "00011001" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14571 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14572 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14573 "01100111" // /* MW 3 */
+ 14574 "00000001" // /* MW 2 */
+ 14575 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 600 38 first
+.return_address
+ 14576 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14577 "00010000" // /* MW 9 */
+ 14578 "00000000" // /* MW 8 */
+ 14579 "00001011" // /* MW 7 */
+ 14580 "11110010" // /* MW 6 */
+ 14581 "00000001" // /* MW 5 */
+ 14582 "00000000" // /* MW 4 */
+ 14583 "11010000" // /* MW 3 */
+ 14584 "11000110" // /* MW 2 */
+ 14585 "11000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14586 "00111100" // LDA p1, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14587 "00100000" // /* MW 5 */
+ 14588 "00000000" // /* MW 4 */
+ 14589 "00100000" // /* MW 3 */
+ 14590 "10010011" // /* MW 2 */
+ 14591 "11111100" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352
+ 14592 "10011000" // ADD.NC p3, r16, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14593 "00000101" // /* MW 3 */
+ 14594 "01101000" // /* MW 2 */
+ 14595 "00011011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 35 first
+.src_ref 7 "superkernels.cpp" 611 18
+ 14596 "10111010" // LDA.u8 r19, [p3], #7; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14597 "00010000" // /* MW 9 */
+ 14598 "00101000" // /* MW 8 */
+ 14599 "00110010" // /* MW 7 */
+ 14600 "11110011" // /* MW 6 */
+ 14601 "00000001" // /* MW 5 */
+ 14602 "00000000" // /* MW 4 */
+ 14603 "01010000" // /* MW 3 */
+ 14604 "11001101" // /* MW 2 */
+ 14605 "01101111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 18 first
+ 14606 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14607 "01010110" // /* MW 3 */
+ 14608 "00000110" // /* MW 2 */
+ 14609 "00000110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 37 first
+ 14610 "10011000" // LDA.u16 r21, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14611 "10111010" // /* MW 3 */
+ 14612 "00011110" // /* MW 2 */
+ 14613 "00000011" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 73
+ 14614 "10011000" // LDA.u16 r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14615 "00011010" // /* MW 3 */
+ 14616 "00000110" // /* MW 2 */
+ 14617 "00000011" // /* MW 1 */
+ 14618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14619 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 110
+ 14620 "10011000" // LDA.u16 r20, [p3, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14621 "10011010" // /* MW 3 */
+ 14622 "00010110" // /* MW 2 */
+ 14623 "00000011" // /* MW 1 */
+ 14624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14625 "00000000" // /* MW 1 */
+ 14626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14627 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 19
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id first
+ 14628 "01000100" // MOVXM p0, #508996 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14629 "10001000" // /* MW 5 */
+ 14630 "11001000" // /* MW 4 */
+ 14631 "11000000" // /* MW 3 */
+ 14632 "00000111" // /* MW 2 */
+ 14633 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 57
+.aggressive_scheduled_block_id 2
+.noswbrkpt
+ 14634 "10011000" // MUL r19, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14635 "01011111" // /* MW 3 */
+ 14636 "11100111" // /* MW 2 */
+ 14637 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 606 19 first
+.src_ref 7 "superkernels.cpp" 611 16
+.aggressive_scheduled_block_id 2
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14638 "00111010" // ST r19, [p0]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14639 "00010001" // /* MW 9 */
+ 14640 "00101110" // /* MW 8 */
+ 14641 "00110010" // /* MW 7 */
+ 14642 "11110001" // /* MW 6 */
+ 14643 "00000001" // /* MW 5 */
+ 14644 "00000000" // /* MW 4 */
+ 14645 "00110000" // /* MW 3 */
+ 14646 "11001110" // /* MW 2 */
+ 14647 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 607 94 first
+ 14648 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14649 "00001111" // /* MW 3 */
+ 14650 "11100001" // /* MW 2 */
+ 14651 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 611 27 first
+ 14652 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14653 "00101111" // /* MW 3 */
+ 14654 "01100011" // /* MW 2 */
+ 14655 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 28 first
+ 14656 "10011000" // MUL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14657 "00001111" // /* MW 3 */
+ 14658 "00100001" // /* MW 2 */
+ 14659 "00010101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 13
+.src_ref 7 "superkernels.cpp" 611 16 first
+ 14660 "01110110" // NOPA; ST r17, [p2]; MOVXM p6, #509024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 14661 "00010000" // /* MW 11 */
+ 14662 "00110000" // /* MW 10 */
+ 14663 "00110010" // /* MW 9 */
+ 14664 "11110011" // /* MW 8 */
+ 14665 "00000001" // /* MW 7 */
+ 14666 "10000000" // /* MW 6 */
+ 14667 "00110001" // /* MW 5 */
+ 14668 "00000110" // /* MW 4 */
+ 14669 "11110010" // /* MW 3 */
+ 14670 "00101100" // /* MW 2 */
+ 14671 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 608 13 first
+ 14672 "11100001" // NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14673 "00000000" // /* MW 15 */
+ 14674 "00000000" // /* MW 14 */
+ 14675 "01111000" // /* MW 13 */
+ 14676 "10100101" // /* MW 12 */
+ 14677 "00000001" // /* MW 11 */
+ 14678 "00000000" // /* MW 10 */
+ 14679 "00000000" // /* MW 9 */
+ 14680 "10000000" // /* MW 8 */
+ 14681 "00010001" // /* MW 7 */
+ 14682 "00000110" // /* MW 6 */
+ 14683 "00100110" // /* MW 5 */
+ 14684 "00000000" // /* MW 4 */
+ 14685 "11110000" // /* MW 3 */
+ 14686 "00101100" // /* MW 2 */
+ 14687 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448
+.src_ref 7 "superkernels.cpp" 614 12
+ 14688 "01000100" // MOVXM p0, #509000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14689 "10010000" // /* MW 5 */
+ 14690 "11001000" // /* MW 4 */
+ 14691 "11000000" // /* MW 3 */
+ 14692 "00000111" // /* MW 2 */
+ 14693 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.src_ref 7 "superkernels.cpp" 616 11
+ 14694 "10111010" // LDA r16, [p0]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14695 "00010000" // /* MW 9 */
+ 14696 "00100000" // /* MW 8 */
+ 14697 "00110010" // /* MW 7 */
+ 14698 "11110001" // /* MW 6 */
+ 14699 "00000001" // /* MW 5 */
+ 14700 "00000000" // /* MW 4 */
+ 14701 "11010000" // /* MW 3 */
+ 14702 "11000010" // /* MW 2 */
+ 14703 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13
+.src_ref 7 "superkernels.cpp" 616 11 first
+ 14704 "10111010" // LDA r17, [p2]; MOVXM p6, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14705 "00010000" // /* MW 9 */
+ 14706 "00100110" // /* MW 8 */
+ 14707 "00110010" // /* MW 7 */
+ 14708 "11110011" // /* MW 6 */
+ 14709 "00000001" // /* MW 5 */
+ 14710 "00000000" // /* MW 4 */
+ 14711 "11010000" // /* MW 3 */
+ 14712 "11000110" // /* MW 2 */
+ 14713 "01000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+ 14714 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14715 "01010110" // /* MW 3 */
+ 14716 "00000110" // /* MW 2 */
+ 14717 "00000110" // /* MW 1 */
+ 14718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14719 "00000000" // /* MW 1 */
+ 14720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14721 "00000000" // /* MW 1 */
+ 14722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14723 "00000000" // /* MW 1 */
+ 14724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14725 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 618 6 first
+.src_ref 7 "superkernels.cpp" 618 17 first
+ 14726 "10000100" // JNZ r16, #14832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14832 delay_slots=5 */
+ 14727 "00000001" // /* MW 5 */
+ 14728 "01000000" // /* MW 4 */
+ 14729 "11111000" // /* MW 3 */
+ 14730 "00011100" // /* MW 2 */
+ 14731 "10000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.src_ref 7 "superkernels.cpp" 616 11 first
+.delay_slot
+ 14732 "00100100" // ADD r17, r17, #1; ADD.NC r19, r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14733 "00000001" // /* MW 5 */
+ 14734 "10110000" // /* MW 4 */
+ 14735 "11101001" // /* MW 3 */
+ 14736 "01000000" // /* MW 2 */
+ 14737 "10001100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+.delay_slot
+ 14738 "00011000" // ADD r18, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14739 "00000111" // /* MW 3 */
+ 14740 "10100100" // /* MW 2 */
+ 14741 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 616 11 first
+.delay_slot
+ 14742 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14743 "00110001" // /* MW 3 */
+ 14744 "00000110" // /* MW 2 */
+ 14745 "00001010" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 615 13 first
+.delay_slot
+ 14746 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14747 "01010001" // /* MW 3 */
+ 14748 "00000110" // /* MW 2 */
+ 14749 "00001110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 614 12 first
+.delay_slot
+ 14750 "10011000" // ST r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14751 "01110001" // /* MW 3 */
+ 14752 "00000110" // /* MW 2 */
+ 14753 "00001000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14754 "00011000" // LDA r17, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14755 "00110001" // /* MW 3 */
+ 14756 "11110110" // /* MW 2 */
+ 14757 "00000111" // /* MW 1 */
+ 14758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14759 "00000000" // /* MW 1 */
+ 14760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14761 "00000000" // /* MW 1 */
+ 14762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14763 "00000000" // /* MW 1 */
+ 14764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14765 "00000000" // /* MW 1 */
+ 14766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14767 "00000000" // /* MW 1 */
+ 14768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14769 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49 first
+ 14770 "00011000" // ADD.NC p6, r17, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14771 "10000110" // /* MW 3 */
+ 14772 "01101000" // /* MW 2 */
+ 14773 "00011110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14774 "10011000" // LDA r27, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14775 "01110110" // /* MW 3 */
+ 14776 "11111111" // /* MW 2 */
+ 14777 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 14778 "10011000" // LDA r17, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14779 "00110110" // /* MW 3 */
+ 14780 "11111110" // /* MW 2 */
+ 14781 "00000110" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 14782 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14783 "01010110" // /* MW 3 */
+ 14784 "11111110" // /* MW 2 */
+ 14785 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id first
+ 14786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14787 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 28 first
+.aggressive_scheduled_block_id 3
+.noswbrkpt
+ 14788 "10011000" // LDA r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14789 "00110110" // /* MW 3 */
+ 14790 "01000110" // /* MW 2 */
+ 14791 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14793 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14795 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14797 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14799 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+.aggressive_scheduled_block_id 3
+.nohwbrkpt
+.noswbrkpt
+ 14800 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14801 "00010010" // /* MW 3 */
+ 14802 "10100011" // /* MW 2 */
+ 14803 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 20
+.src_ref 1 "io_buffer_main.h" 395 8
+.aggressive_scheduled_block_id 3
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 14804 "01011100" // ST r17, [p6]; MOVX r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14805 "11111010" // /* MW 5 */
+ 14806 "11000001" // /* MW 4 */
+ 14807 "00111111" // /* MW 3 */
+ 14808 "11000110" // /* MW 2 */
+ 14809 "11000000" // /* MW 1 */
+ 14810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14811 "00000000" // /* MW 1 */
+ 14812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14813 "00000000" // /* MW 1 */
+ 14814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14815 "00000000" // /* MW 1 */
+ 14816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14817 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14818 "01111110" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */
+ 14819 "01100000" // /* MW 13 */
+ 14820 "00101011" // /* MW 12 */
+ 14821 "00000000" // /* MW 11 */
+ 14822 "10101111" // /* MW 10 */
+ 14823 "00110100" // /* MW 9 */
+ 14824 "00000000" // /* MW 8 */
+ 14825 "00001000" // /* MW 7 */
+ 14826 "01010011" // /* MW 6 */
+ 14827 "00100100" // /* MW 5 */
+ 14828 "00000000" // /* MW 4 */
+ 14829 "11110000" // /* MW 3 */
+ 14830 "00101100" // /* MW 2 */
+ 14831 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592
+ 14832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14833 "00000000" // /* MW 1 */
+ 14834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14835 "00000000" // /* MW 1 */
+ 14836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14837 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.src_ref 1 "io_buffer_main.h" 125 25
+ 14838 "00011000" // LDA p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14839 "00011001" // /* MW 3 */
+ 14840 "11110101" // /* MW 2 */
+ 14841 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+ 14842 "00011000" // LDA p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14843 "00011001" // /* MW 3 */
+ 14844 "11101000" // /* MW 2 */
+ 14845 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2 first
+.no_stack_arguments
+ 14846 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */
+ 14847 "00000001" // /* MW 5 */
+ 14848 "00000000" // /* MW 4 */
+ 14849 "10111000" // /* MW 3 */
+ 14850 "00001000" // /* MW 2 */
+ 14851 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 621 2
+.delay_slot
+ 14852 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14853 "00000000" // /* MW 5 */
+ 14854 "11001100" // /* MW 4 */
+ 14855 "11000110" // /* MW 3 */
+ 14856 "00000111" // /* MW 2 */
+ 14857 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14859 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14861 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14863 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25
+.delay_slot
+ 14864 "11100001" // NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 14865 "00000000" // /* MW 15 */
+ 14866 "00000000" // /* MW 14 */
+ 14867 "01111000" // /* MW 13 */
+ 14868 "10100101" // /* MW 12 */
+ 14869 "00000001" // /* MW 11 */
+ 14870 "00000000" // /* MW 10 */
+ 14871 "00000000" // /* MW 9 */
+ 14872 "00000000" // /* MW 8 */
+ 14873 "10001011" // /* MW 7 */
+ 14874 "10001000" // /* MW 6 */
+ 14875 "00100110" // /* MW 5 */
+ 14876 "00000000" // /* MW 4 */
+ 14877 "11110000" // /* MW 3 */
+ 14878 "00101100" // /* MW 2 */
+ 14879 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6
+.src_ref 1 "io_buffer_main.h" 218 49
+.return_address
+ 14880 "10111010" // LDA r16, [sp, #-16]; MOVXM p1, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14881 "00010000" // /* MW 9 */
+ 14882 "00100100" // /* MW 8 */
+ 14883 "10110010" // /* MW 7 */
+ 14884 "11110000" // /* MW 6 */
+ 14885 "00000001" // /* MW 5 */
+ 14886 "00000000" // /* MW 4 */
+ 14887 "00100000" // /* MW 3 */
+ 14888 "01000010" // /* MW 2 */
+ 14889 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6 first
+.src_ref 7 "superkernels.cpp" 623 20
+ 14890 "10111010" // LDA r17, [p1]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14891 "00010000" // /* MW 9 */
+ 14892 "00100010" // /* MW 8 */
+ 14893 "10110010" // /* MW 7 */
+ 14894 "11110000" // /* MW 6 */
+ 14895 "00000001" // /* MW 5 */
+ 14896 "00000000" // /* MW 4 */
+ 14897 "11010000" // /* MW 3 */
+ 14898 "11000110" // /* MW 2 */
+ 14899 "00100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 20
+ 14900 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14901 "01010110" // /* MW 3 */
+ 14902 "00000110" // /* MW 2 */
+ 14903 "00000001" // /* MW 1 */
+ 14904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14905 "00000000" // /* MW 1 */
+ 14906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14907 "00000000" // /* MW 1 */
+ 14908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14909 "00000000" // /* MW 1 */
+ 14910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14911 "00000000" // /* MW 1 */
+ 14912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14913 "00000000" // /* MW 1 */
+ 14914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14915 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 17
+ 14916 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14917 "00101000" // /* MW 3 */
+ 14918 "01100011" // /* MW 2 */
+ 14919 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 623 6
+ 14920 "10000100" // JNZ r17, #15264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15264 delay_slots=5 */
+ 14921 "00000001" // /* MW 5 */
+ 14922 "01000000" // /* MW 4 */
+ 14923 "11010000" // /* MW 3 */
+ 14924 "00011101" // /* MW 2 */
+ 14925 "10001000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14927 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14929 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14931 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14933 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 14934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14935 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 629 34
+.src_ref 1 "io_buffer_main.h" 218 49 first
+.src_ref 1 "io_buffer_main.h" 395 8
+ 14936 "10111010" // MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14937 "00001000" // /* MW 9 */
+ 14938 "00000011" // /* MW 8 */
+ 14939 "10110100" // /* MW 7 */
+ 14940 "11101000" // /* MW 6 */
+ 14941 "00010111" // /* MW 5 */
+ 14942 "00111111" // /* MW 4 */
+ 14943 "10000000" // /* MW 3 */
+ 14944 "00000010" // /* MW 2 */
+ 14945 "00001000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 52
+.src_ref 1 "io_buffer_main.h" 218 49
+ 14946 "10111010" // LDA r27, [p1], #-4; MOVXM p0, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 14947 "00010000" // /* MW 9 */
+ 14948 "00101110" // /* MW 8 */
+ 14949 "00110010" // /* MW 7 */
+ 14950 "11110000" // /* MW 6 */
+ 14951 "00000001" // /* MW 5 */
+ 14952 "00000000" // /* MW 4 */
+ 14953 "11010000" // /* MW 3 */
+ 14954 "11101110" // /* MW 2 */
+ 14955 "00111111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 64
+ 14956 "10011000" // LDA r18, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14957 "01010110" // /* MW 3 */
+ 14958 "11111110" // /* MW 2 */
+ 14959 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 80
+ 14960 "10011000" // LDA r19, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14961 "01110110" // /* MW 3 */
+ 14962 "11111110" // /* MW 2 */
+ 14963 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 235 47 first
+ 14964 "10011000" // LDA r20, [p1, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14965 "10010110" // /* MW 3 */
+ 14966 "01010110" // /* MW 2 */
+ 14967 "00000001" // /* MW 1 */
+ 14968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14969 "00000000" // /* MW 1 */
+ 14970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14971 "00000000" // /* MW 1 */
+ 14972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14973 "00000000" // /* MW 1 */
+ 14974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14975 "00000000" // /* MW 1 */
+ 14976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14977 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 218 36 first
+.src_ref 1 "io_buffer_main.h" 218 43 first
+ 14978 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14979 "00100010" // /* MW 3 */
+ 14980 "11100101" // /* MW 2 */
+ 14981 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50
+.src_ref 7 "superkernels.cpp" 630 3
+.src_ref 1 "io_buffer_main.h" 218 20
+ 14982 "01011100" // ST r18, [p1]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 14983 "00001010" // /* MW 5 */
+ 14984 "01000000" // /* MW 4 */
+ 14985 "00110000" // /* MW 3 */
+ 14986 "11001010" // /* MW 2 */
+ 14987 "00100000" // /* MW 1 */
+ 14988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14989 "00000000" // /* MW 1 */
+ 14990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14991 "00000000" // /* MW 1 */
+ 14992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14993 "00000000" // /* MW 1 */
+ 14994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 14995 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 395 8 first
+ 14996 "00011000" // ACQ r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 14997 "00011000" // /* MW 3 */
+ 14998 "00010011" // /* MW 2 */
+ 14999 "00010101" // /* MW 1 */
+ 15000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15001 "00000000" // /* MW 1 */
+ 15002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15003 "00000000" // /* MW 1 */
+ 15004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15005 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 52 first
+ 15006 "10011000" // LDA r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15007 "01110110" // /* MW 3 */
+ 15008 "00000110" // /* MW 2 */
+ 15009 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 629 34 first
+ 15010 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15011 "01010110" // /* MW 3 */
+ 15012 "00000010" // /* MW 2 */
+ 15013 "00000111" // /* MW 1 */
+ 15014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15015 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id first
+ 15016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15017 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 125 25 first
+.aggressive_scheduled_block_id 4
+.noswbrkpt
+ 15018 "10011000" // LDA p0, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15019 "00011110" // /* MW 3 */
+ 15020 "01011100" // /* MW 2 */
+ 15021 "00000110" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15023 "00000000" // /* MW 1 */
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15025 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50 first
+.src_ref 1 "io_buffer_main.h" 125 25
+.src_ref 1 "io_buffer_main.h" 324 32
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15026 "10111010" // LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15027 "01111000" // /* MW 9 */
+ 15028 "01100000" // /* MW 8 */
+ 15029 "00110001" // /* MW 7 */
+ 15030 "01101100" // /* MW 6 */
+ 15031 "00111000" // /* MW 5 */
+ 15032 "00100111" // /* MW 4 */
+ 15033 "11010000" // /* MW 3 */
+ 15034 "11000110" // /* MW 2 */
+ 15035 "00101001" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3 first
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15036 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15037 "00000111" // /* MW 3 */
+ 15038 "10100001" // /* MW 2 */
+ 15039 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3
+.aggressive_scheduled_block_id 4
+.nohwbrkpt
+.noswbrkpt
+ 15040 "10000100" // JNZ r16, #15120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15120 delay_slots=5 */
+ 15041 "00000001" // /* MW 5 */
+ 15042 "01000000" // /* MW 4 */
+ 15043 "10001000" // /* MW 3 */
+ 15044 "00011101" // /* MW 2 */
+ 15045 "10000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 32
+.delay_slot
+.aggressive_scheduled_block_id 4
+.aggressive_scheduled_block_id last
+.nohwbrkpt
+.noswbrkpt
+ 15046 "00011000" // MOVS p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15047 "10001011" // /* MW 3 */
+ 15048 "10000000" // /* MW 2 */
+ 15049 "00001111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15051 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15053 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15055 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 626 50 first
+.delay_slot
+ 15056 "00000010" // ST p1, [sp, #-16]; ADD.NC p1, r19, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */
+ 15057 "10100000" // /* MW 7 */
+ 15058 "11100010" // /* MW 6 */
+ 15059 "10110100" // /* MW 5 */
+ 15060 "00000000" // /* MW 4 */
+ 15061 "10110000" // /* MW 3 */
+ 15062 "00010011" // /* MW 2 */
+ 15063 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 630 3 first
+ 15064 "10000100" // JNZ r18, #15152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15152 delay_slots=5 */
+ 15065 "00000001" // /* MW 5 */
+ 15066 "01000000" // /* MW 4 */
+ 15067 "10011000" // /* MW 3 */
+ 15068 "00011101" // /* MW 2 */
+ 15069 "10010000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15071 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15073 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15075 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15077 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15079 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8 first
+.no_stack_arguments
+ 15080 "00000100" // JL #11248 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11248 delay_slots=5 */
+ 15081 "00000001" // /* MW 5 */
+ 15082 "00000000" // /* MW 4 */
+ 15083 "11111000" // /* MW 3 */
+ 15084 "00010101" // /* MW 2 */
+ 15085 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8
+.delay_slot
+ 15086 "01000100" // MOVXM p3, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15087 "10000000" // /* MW 5 */
+ 15088 "11001010" // /* MW 4 */
+ 15089 "11000110" // /* MW 3 */
+ 15090 "00000111" // /* MW 2 */
+ 15091 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15093 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15095 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15097 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 633 8
+.delay_slot
+ 15098 "11010100" // NOPA; MOV p2, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15099 "10000001" // /* MW 5 */
+ 15100 "11000001" // /* MW 4 */
+ 15101 "11110100" // /* MW 3 */
+ 15102 "00101100" // /* MW 2 */
+ 15103 "00000000" // /* MW 1 */
+.return_address
+ 15104 "10000100" // J #15152 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15152 delay_slots=5 */
+ 15105 "00000000" // /* MW 5 */
+ 15106 "00000000" // /* MW 4 */
+ 15107 "10011000" // /* MW 3 */
+ 15108 "00011101" // /* MW 2 */
+ 15109 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15111 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15113 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15115 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15117 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15119 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880
+.src_ref 7 "superkernels.cpp" 637 8 first
+.no_stack_arguments
+ 15120 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */
+ 15121 "00000001" // /* MW 5 */
+ 15122 "00000000" // /* MW 4 */
+ 15123 "01011000" // /* MW 3 */
+ 15124 "00010110" // /* MW 2 */
+ 15125 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 637 8
+.delay_slot
+ 15126 "01000100" // MOVXM p3, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15127 "00000000" // /* MW 5 */
+ 15128 "11001011" // /* MW 4 */
+ 15129 "11000110" // /* MW 3 */
+ 15130 "00000111" // /* MW 2 */
+ 15131 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 637 8
+.delay_slot
+ 15132 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15133 "11000000" // /* MW 3 */
+ 15134 "01100000" // /* MW 2 */
+ 15135 "00011010" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15137 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15139 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15140 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */
+ 15141 "10000001" // /* MW 11 */
+ 15142 "10101101" // /* MW 10 */
+ 15143 "00000000" // /* MW 9 */
+ 15144 "00000000" // /* MW 8 */
+ 15145 "00000000" // /* MW 7 */
+ 15146 "00000000" // /* MW 6 */
+ 15147 "00100000" // /* MW 5 */
+ 15148 "00000000" // /* MW 4 */
+ 15149 "11110000" // /* MW 3 */
+ 15150 "00101100" // /* MW 2 */
+ 15151 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912
+.src_ref 1 "io_buffer_main.h" 327 28
+.src_ref 1 "io_buffer_main.h" 327 40
+.return_address
+ 15152 "00011000" // LDA p1, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15153 "10011001" // /* MW 3 */
+ 15154 "11110000" // /* MW 2 */
+ 15155 "00000111" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 327 32
+.src_ref 1 "io_buffer_main.h" 425 8
+.src_ref 1 "io_buffer_main.h" 425 8
+ 15156 "00101100" // LDA p0, [sp, #-12]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15157 "00001010" // /* MW 5 */
+ 15158 "01000100" // /* MW 4 */
+ 15159 "00100000" // /* MW 3 */
+ 15160 "10000011" // /* MW 2 */
+ 15161 "11111110" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15
+.src_ref 1 "io_buffer_main.h" 324 32 first
+ 15162 "10111010" // LDA r16, [p7, #16]; MOVXM p7, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15163 "00010000" // /* MW 9 */
+ 15164 "00100100" // /* MW 8 */
+ 15165 "10110010" // /* MW 7 */
+ 15166 "11110011" // /* MW 6 */
+ 15167 "00000001" // /* MW 5 */
+ 15168 "00000000" // /* MW 4 */
+ 15169 "11010000" // /* MW 3 */
+ 15170 "11000010" // /* MW 2 */
+ 15171 "11101000" // /* MW 1 */
+ 15172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15173 "00000000" // /* MW 1 */
+ 15174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15175 "00000000" // /* MW 1 */
+ 15176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15177 "00000000" // /* MW 1 */
+ 15178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15179 "00000000" // /* MW 1 */
+ 15180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15181 "00000000" // /* MW 1 */
+ 15182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15183 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 15184 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15185 "00011000" // /* MW 3 */
+ 15186 "00010001" // /* MW 2 */
+ 15187 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 15188 "10011000" // LDA r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15189 "01010110" // /* MW 3 */
+ 15190 "11110110" // /* MW 2 */
+ 15191 "00000001" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 324 51 first
+ 15192 "10011000" // LDA r16, [p0, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15193 "00010110" // /* MW 3 */
+ 15194 "01010110" // /* MW 2 */
+ 15195 "00000000" // /* MW 1 */
+ 15196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15197 "00000000" // /* MW 1 */
+ 15198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15199 "00000000" // /* MW 1 */
+ 15200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15201 "00000000" // /* MW 1 */
+ 15202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15203 "00000000" // /* MW 1 */
+ 15204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15205 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 32 first
+ 15206 "10011000" // SUB r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15207 "00100001" // /* MW 3 */
+ 15208 "01100101" // /* MW 2 */
+ 15209 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28
+ 15210 "10011000" // ST r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15211 "01010001" // /* MW 3 */
+ 15212 "11110110" // /* MW 2 */
+ 15213 "00001001" // /* MW 1 */
+ 15214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15215 "00000000" // /* MW 1 */
+ 15216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15217 "00000000" // /* MW 1 */
+ 15218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15219 "00000000" // /* MW 1 */
+ 15220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15221 "00000000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 425 8 first
+ 15222 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15223 "00011000" // /* MW 3 */
+ 15224 "00010001" // /* MW 2 */
+ 15225 "00010100" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 40 first
+ 15226 "10011000" // LDA r18, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15227 "01010110" // /* MW 3 */
+ 15228 "11100110" // /* MW 2 */
+ 15229 "00000110" // /* MW 1 */
+ 15230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15231 "00000000" // /* MW 1 */
+ 15232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15233 "00000000" // /* MW 1 */
+ 15234 "10000100" // J #15280 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15280 delay_slots=5 */
+ 15235 "00000000" // /* MW 5 */
+ 15236 "00000000" // /* MW 4 */
+ 15237 "11011000" // /* MW 3 */
+ 15238 "00011101" // /* MW 2 */
+ 15239 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15241 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15243 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15
+.src_ref 7 "superkernels.cpp" 649 14
+.delay_slot
+ 15244 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15245 "00000001" // /* MW 3 */
+ 15246 "00100000" // /* MW 2 */
+ 15247 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 645 15 first
+.src_ref 1 "io_buffer_main.h" 327 32
+.delay_slot
+ 15248 "01011100" // ST r16, [p7]; SUB r17, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15249 "01000011" // /* MW 5 */
+ 15250 "11000110" // /* MW 4 */
+ 15251 "00111000" // /* MW 3 */
+ 15252 "11000010" // /* MW 2 */
+ 15253 "11100000" // /* MW 1 */
+.src_ref 1 "io_buffer_main.h" 327 28 first
+.delay_slot
+ 15254 "01111010" // NOPA; ST r17, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15255 "00000000" // /* MW 9 */
+ 15256 "00000000" // /* MW 8 */
+ 15257 "00000000" // /* MW 7 */
+ 15258 "10000000" // /* MW 6 */
+ 15259 "00110001" // /* MW 5 */
+ 15260 "11100110" // /* MW 4 */
+ 15261 "11110110" // /* MW 3 */
+ 15262 "00101100" // /* MW 2 */
+ 15263 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024
+.src_ref 7 "superkernels.cpp" 649 14
+ 15264 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */
+ 15265 "00000000" // /* MW 15 */
+ 15266 "00000000" // /* MW 14 */
+ 15267 "01111000" // /* MW 13 */
+ 15268 "10100101" // /* MW 12 */
+ 15269 "00000001" // /* MW 11 */
+ 15270 "00001000" // /* MW 10 */
+ 15271 "00000000" // /* MW 9 */
+ 15272 "00000001" // /* MW 8 */
+ 15273 "01011011" // /* MW 7 */
+ 15274 "00000001" // /* MW 6 */
+ 15275 "00100000" // /* MW 5 */
+ 15276 "00000000" // /* MW 4 */
+ 15277 "11110000" // /* MW 3 */
+ 15278 "00101100" // /* MW 2 */
+ 15279 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040
+.src_ref 7 "superkernels.cpp" 648 19
+.src_ref 7 "superkernels.cpp" 651
+ 15280 "10111010" // LDA lr, [sp, #-20]; MOVXM p7, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15281 "00010000" // /* MW 9 */
+ 15282 "00110000" // /* MW 8 */
+ 15283 "10110010" // /* MW 7 */
+ 15284 "11110011" // /* MW 6 */
+ 15285 "00000001" // /* MW 5 */
+ 15286 "00000000" // /* MW 4 */
+ 15287 "00100000" // /* MW 3 */
+ 15288 "10000111" // /* MW 2 */
+ 15289 "11111101" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+.src_ref 7 "superkernels.cpp" 648 19 first
+.src_ref 7 "superkernels.cpp" 649 14
+ 15290 "10111010" // LDA r18, [p7]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */
+ 15291 "00010000" // /* MW 9 */
+ 15292 "00100000" // /* MW 8 */
+ 15293 "00110010" // /* MW 7 */
+ 15294 "11110011" // /* MW 6 */
+ 15295 "00000001" // /* MW 5 */
+ 15296 "00000000" // /* MW 4 */
+ 15297 "11010000" // /* MW 3 */
+ 15298 "11001010" // /* MW 2 */
+ 15299 "11100000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+ 15300 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15301 "00110110" // /* MW 3 */
+ 15302 "00000110" // /* MW 2 */
+ 15303 "00000110" // /* MW 1 */
+ 15304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15305 "00000000" // /* MW 1 */
+ 15306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15307 "00000000" // /* MW 1 */
+ 15308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15309 "00000000" // /* MW 1 */
+ 15310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15311 "00000000" // /* MW 1 */
+ 15312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15313 "00000000" // /* MW 1 */
+ 15314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15315 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 16
+ 15316 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15317 "00101000" // /* MW 3 */
+ 15318 "01100011" // /* MW 2 */
+ 15319 "00010100" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 648 6
+ 15320 "10000100" // JNZ r17, #15344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15344 delay_slots=5 */
+ 15321 "00000001" // /* MW 5 */
+ 15322 "01000000" // /* MW 4 */
+ 15323 "11111000" // /* MW 3 */
+ 15324 "00011101" // /* MW 2 */
+ 15325 "10001000" // /* MW 1 */
+.delay_slot
+ 15326 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15327 "10011001" // /* MW 3 */
+ 15328 "11111011" // /* MW 2 */
+ 15329 "00000111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15331 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15333 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15335 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15337 "00000000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 649 14 first
+ 15338 "00001100" // NOPA; ST r16, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15339 "00100011" // /* MW 5 */
+ 15340 "00001100" // /* MW 4 */
+ 15341 "11111100" // /* MW 3 */
+ 15342 "00101100" // /* MW 2 */
+ 15343 "00000000" // /* MW 1 */
+.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104
+ 15344 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15345 "00011001" // /* MW 3 */
+ 15346 "11111111" // /* MW 2 */
+ 15347 "00000111" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 651 first
+ 15348 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 15349 "00000000" // /* MW 3 */
+ 15350 "00101000" // /* MW 2 */
+ 15351 "00010000" // /* MW 1 */
+.src_ref 7 "superkernels.cpp" 651
+.delay_slot
+ 15352 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15353 "00000001" // /* MW 5 */
+ 15354 "00000000" // /* MW 4 */
+ 15355 "00000000" // /* MW 3 */
+ 15356 "11111000" // /* MW 2 */
+ 15357 "11111111" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15359 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15361 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15363 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end
+.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0
+ 15365 "00000000" // /* MW 1 */
+.label __Z13_b896_wrapperPPv___func_begin0
+.label _Z13_b896_wrapperPPv
+.function _b896_wrapper _Z13_b896_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 21 first
+.src_ref 0 "0_0_reloadable5.cc" 23 79
+.function_start
+ 15376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15377 "11000000" // /* MW 3 */
+ 15378 "01100000" // /* MW 2 */
+ 15379 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 23 79 first
+ 15380 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15381 "00011110" // /* MW 3 */
+ 15382 "00011100" // /* MW 2 */
+ 15383 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 24 79 first
+ 15384 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15385 "10011110" // /* MW 3 */
+ 15386 "00101100" // /* MW 2 */
+ 15387 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 26 81 first
+ 15388 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15389 "10011110" // /* MW 3 */
+ 15390 "11110101" // /* MW 2 */
+ 15391 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 25 47 first
+ 15392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15393 "00011110" // /* MW 3 */
+ 15394 "00000101" // /* MW 2 */
+ 15395 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 22 4 first
+.tail_call
+ 15396 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */
+ 15397 "00000000" // /* MW 5 */
+ 15398 "00000000" // /* MW 4 */
+ 15399 "01110000" // /* MW 3 */
+ 15400 "00001101" // /* MW 2 */
+ 15401 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15403 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15405 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15407 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15409 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b896_wrapperPPv__end
+.label __Z13_b896_wrapperPPv___func_end0
+ 15411 "00000000" // /* MW 1 */
+.label __Z13_b901_wrapperPPv___func_begin0
+.label _Z13_b901_wrapperPPv
+.function _b901_wrapper _Z13_b901_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 30 first
+.src_ref 0 "0_0_reloadable5.cc" 32 79
+.function_start
+ 15424 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15425 "11000000" // /* MW 3 */
+ 15426 "01100000" // /* MW 2 */
+ 15427 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 32 79 first
+ 15428 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15429 "00011110" // /* MW 3 */
+ 15430 "00101100" // /* MW 2 */
+ 15431 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 34 81 first
+ 15432 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15433 "00011110" // /* MW 3 */
+ 15434 "11110101" // /* MW 2 */
+ 15435 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 33 47 first
+ 15436 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15437 "10011110" // /* MW 3 */
+ 15438 "00000100" // /* MW 2 */
+ 15439 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 31 4 first
+.tail_call
+ 15440 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */
+ 15441 "00000000" // /* MW 5 */
+ 15442 "00000000" // /* MW 4 */
+ 15443 "00011000" // /* MW 3 */
+ 15444 "00010000" // /* MW 2 */
+ 15445 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15447 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15449 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15451 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15453 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b901_wrapperPPv__end
+.label __Z13_b901_wrapperPPv___func_end0
+ 15455 "00000000" // /* MW 1 */
+.label __Z13_b906_wrapperPPv___func_begin0
+.label _Z13_b906_wrapperPPv
+.function _b906_wrapper _Z13_b906_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 38 first
+.src_ref 0 "0_0_reloadable5.cc" 40 79
+.function_start
+ 15456 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15457 "11000000" // /* MW 3 */
+ 15458 "01100000" // /* MW 2 */
+ 15459 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 40 79 first
+ 15460 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15461 "00011110" // /* MW 3 */
+ 15462 "00101100" // /* MW 2 */
+ 15463 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 42 81 first
+ 15464 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15465 "00011110" // /* MW 3 */
+ 15466 "11110101" // /* MW 2 */
+ 15467 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 41 47 first
+ 15468 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15469 "10011110" // /* MW 3 */
+ 15470 "00000100" // /* MW 2 */
+ 15471 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 39 4 first
+.tail_call
+ 15472 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */
+ 15473 "00000000" // /* MW 5 */
+ 15474 "00000000" // /* MW 4 */
+ 15475 "11001000" // /* MW 3 */
+ 15476 "00010001" // /* MW 2 */
+ 15477 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15479 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15481 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15483 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15485 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b906_wrapperPPv__end
+.label __Z13_b906_wrapperPPv___func_end0
+ 15487 "00000000" // /* MW 1 */
+.label __Z13_b881_wrapperPPv___func_begin0
+.label _Z13_b881_wrapperPPv
+.function _b881_wrapper _Z13_b881_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 46 first
+.src_ref 0 "0_0_reloadable5.cc" 48 79
+.function_start
+ 15488 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15489 "11000000" // /* MW 3 */
+ 15490 "01100000" // /* MW 2 */
+ 15491 "00011001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 48 79 first
+ 15492 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15493 "00011110" // /* MW 3 */
+ 15494 "00101100" // /* MW 2 */
+ 15495 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 50 81 first
+ 15496 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15497 "00011110" // /* MW 3 */
+ 15498 "11110101" // /* MW 2 */
+ 15499 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 49 47 first
+ 15500 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15501 "10011110" // /* MW 3 */
+ 15502 "00000100" // /* MW 2 */
+ 15503 "00000001" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 47 4 first
+.tail_call
+ 15504 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */
+ 15505 "00000000" // /* MW 5 */
+ 15506 "00000000" // /* MW 4 */
+ 15507 "10001000" // /* MW 3 */
+ 15508 "00010100" // /* MW 2 */
+ 15509 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15511 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15513 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15515 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15517 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b881_wrapperPPv__end
+.label __Z13_b881_wrapperPPv___func_end0
+ 15519 "00000000" // /* MW 1 */
+.label __Z13_b891_wrapperPPv___func_begin0
+.label _Z13_b891_wrapperPPv
+.function _b891_wrapper _Z13_b891_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 54 first
+.src_ref 0 "0_0_reloadable5.cc" 56 79
+.function_start
+ 15520 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15521 "11000000" // /* MW 3 */
+ 15522 "01100000" // /* MW 2 */
+ 15523 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 56 79 first
+ 15524 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15525 "00011110" // /* MW 3 */
+ 15526 "00111100" // /* MW 2 */
+ 15527 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 57 47 first
+ 15528 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15529 "10011110" // /* MW 3 */
+ 15530 "11101100" // /* MW 2 */
+ 15531 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 59 81 first
+ 15532 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15533 "10011110" // /* MW 3 */
+ 15534 "00010101" // /* MW 2 */
+ 15535 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 58 80 first
+ 15536 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15537 "00011110" // /* MW 3 */
+ 15538 "00000101" // /* MW 2 */
+ 15539 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 55 4 first
+.tail_call
+ 15540 "10000100" // J #11744 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */
+ 15541 "00000000" // /* MW 5 */
+ 15542 "00000000" // /* MW 4 */
+ 15543 "11110000" // /* MW 3 */
+ 15544 "00010110" // /* MW 2 */
+ 15545 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15547 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15549 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15551 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15553 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b891_wrapperPPv__end
+.label __Z13_b891_wrapperPPv___func_end0
+ 15555 "00000000" // /* MW 1 */
+.label __Z13_b924_wrapperPPv___func_begin0
+.label _Z13_b924_wrapperPPv
+.function _b924_wrapper _Z13_b924_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 63 first
+.src_ref 0 "0_0_reloadable5.cc" 65 79
+.function_start
+ 15568 "11111000" // MOV p3, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15569 "11000000" // /* MW 3 */
+ 15570 "01100000" // /* MW 2 */
+ 15571 "00011011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 65 79 first
+ 15572 "10011000" // LDA p0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15573 "00011110" // /* MW 3 */
+ 15574 "00011100" // /* MW 2 */
+ 15575 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 66 79 first
+ 15576 "10011000" // LDA p1, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15577 "10011110" // /* MW 3 */
+ 15578 "00011100" // /* MW 2 */
+ 15579 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 67 80 first
+ 15580 "10011000" // LDA p2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15581 "00011110" // /* MW 3 */
+ 15582 "00101101" // /* MW 2 */
+ 15583 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 69 81 first
+ 15584 "10011000" // LDA p4, [p3, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15585 "00011110" // /* MW 3 */
+ 15586 "11110110" // /* MW 2 */
+ 15587 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 68 47 first
+ 15588 "10011000" // LDA p3, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15589 "10011110" // /* MW 3 */
+ 15590 "00000101" // /* MW 2 */
+ 15591 "00000011" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 64 4 first
+.tail_call
+ 15592 "10000100" // J #14240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=14240 delay_slots=5 */
+ 15593 "00000000" // /* MW 5 */
+ 15594 "00000000" // /* MW 4 */
+ 15595 "11010000" // /* MW 3 */
+ 15596 "00011011" // /* MW 2 */
+ 15597 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15599 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15601 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15603 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15605 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b924_wrapperPPv__end
+.label __Z13_b924_wrapperPPv___func_end0
+ 15607 "00000000" // /* MW 1 */
+.label __Z13_b919_wrapperPPv___func_begin0
+.label _Z13_b919_wrapperPPv
+.function _b919_wrapper _Z13_b919_wrapperPPv
+.src_ref 0 "0_0_reloadable5.cc" 73 first
+.src_ref 0 "0_0_reloadable5.cc" 75 79
+.function_start
+ 15616 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15617 "11000000" // /* MW 3 */
+ 15618 "01100000" // /* MW 2 */
+ 15619 "00011010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 75 79 first
+ 15620 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15621 "00011110" // /* MW 3 */
+ 15622 "00011100" // /* MW 2 */
+ 15623 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 76 79 first
+ 15624 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15625 "10011110" // /* MW 3 */
+ 15626 "00101100" // /* MW 2 */
+ 15627 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 78 81 first
+ 15628 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15629 "10011110" // /* MW 3 */
+ 15630 "11110101" // /* MW 2 */
+ 15631 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 77 47 first
+ 15632 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15633 "00011110" // /* MW 3 */
+ 15634 "00000101" // /* MW 2 */
+ 15635 "00000010" // /* MW 1 */
+.src_ref 0 "0_0_reloadable5.cc" 74 4 first
+.tail_call
+ 15636 "10000100" // J #13760 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13760 delay_slots=5 */
+ 15637 "00000000" // /* MW 5 */
+ 15638 "00000000" // /* MW 4 */
+ 15639 "11100000" // /* MW 3 */
+ 15640 "00011010" // /* MW 2 */
+ 15641 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15643 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15645 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15647 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+ 15649 "00000000" // /* MW 1 */
+.delay_slot
+.swstall delay_slot
+ 15650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */
+.label _Z13_b919_wrapperPPv__end
+.label __Z13_b919_wrapperPPv___func_end0
+ 15651 "00000000" // /* MW 1 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0_
+.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 108 19
+.src_ref 10 "me_div.c" 115 4 first
+.function_start
+ 15664 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */
+ 15665 "01000001" // /* MW 5 */
+ 15666 "10100000" // /* MW 4 */
+ 15667 "00101111" // /* MW 3 */
+ 15668 "11000000" // /* MW 2 */
+ 15669 "00000000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15670 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15671 "00011100" // /* MW 3 */
+ 15672 "11000110" // /* MW 2 */
+ 15673 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15674 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15675 "00011100" // /* MW 3 */
+ 15676 "11000110" // /* MW 2 */
+ 15677 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15678 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15679 "00011100" // /* MW 3 */
+ 15680 "11000110" // /* MW 2 */
+ 15681 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15682 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15683 "00011100" // /* MW 3 */
+ 15684 "11000110" // /* MW 2 */
+ 15685 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15686 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15687 "00011100" // /* MW 3 */
+ 15688 "11000110" // /* MW 2 */
+ 15689 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15690 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15691 "00011100" // /* MW 3 */
+ 15692 "11000110" // /* MW 2 */
+ 15693 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15694 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15695 "00011100" // /* MW 3 */
+ 15696 "11000110" // /* MW 2 */
+ 15697 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15698 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15699 "00011100" // /* MW 3 */
+ 15700 "11000110" // /* MW 2 */
+ 15701 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15702 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15703 "00011100" // /* MW 3 */
+ 15704 "11000110" // /* MW 2 */
+ 15705 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15706 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15707 "00011100" // /* MW 3 */
+ 15708 "11000110" // /* MW 2 */
+ 15709 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15710 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15711 "00011100" // /* MW 3 */
+ 15712 "11000110" // /* MW 2 */
+ 15713 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15714 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15715 "00011100" // /* MW 3 */
+ 15716 "11000110" // /* MW 2 */
+ 15717 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15718 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15719 "00011100" // /* MW 3 */
+ 15720 "11000110" // /* MW 2 */
+ 15721 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15722 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15723 "00011100" // /* MW 3 */
+ 15724 "11000110" // /* MW 2 */
+ 15725 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15726 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15727 "00011100" // /* MW 3 */
+ 15728 "11000110" // /* MW 2 */
+ 15729 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15730 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15731 "00011100" // /* MW 3 */
+ 15732 "11000110" // /* MW 2 */
+ 15733 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15734 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15735 "00011100" // /* MW 3 */
+ 15736 "11000110" // /* MW 2 */
+ 15737 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15738 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15739 "00011100" // /* MW 3 */
+ 15740 "11000110" // /* MW 2 */
+ 15741 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15742 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15743 "00011100" // /* MW 3 */
+ 15744 "11000110" // /* MW 2 */
+ 15745 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15746 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15747 "00011100" // /* MW 3 */
+ 15748 "11000110" // /* MW 2 */
+ 15749 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15750 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15751 "00011100" // /* MW 3 */
+ 15752 "11000110" // /* MW 2 */
+ 15753 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15754 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15755 "00011100" // /* MW 3 */
+ 15756 "11000110" // /* MW 2 */
+ 15757 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15758 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15759 "00011100" // /* MW 3 */
+ 15760 "11000110" // /* MW 2 */
+ 15761 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15762 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15763 "00011100" // /* MW 3 */
+ 15764 "11000110" // /* MW 2 */
+ 15765 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15766 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15767 "00011100" // /* MW 3 */
+ 15768 "11000110" // /* MW 2 */
+ 15769 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15770 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15771 "00011100" // /* MW 3 */
+ 15772 "11000110" // /* MW 2 */
+ 15773 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15774 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15775 "00011100" // /* MW 3 */
+ 15776 "11000110" // /* MW 2 */
+ 15777 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+ 15778 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15779 "00011100" // /* MW 3 */
+ 15780 "11000110" // /* MW 2 */
+ 15781 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 119 first
+ 15782 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */
+ 15783 "00000000" // /* MW 3 */
+ 15784 "00101000" // /* MW 2 */
+ 15785 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19 first
+.delay_slot
+ 15786 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15787 "00011100" // /* MW 3 */
+ 15788 "11000110" // /* MW 2 */
+ 15789 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15790 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15791 "00011100" // /* MW 3 */
+ 15792 "11000110" // /* MW 2 */
+ 15793 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15794 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15795 "00011100" // /* MW 3 */
+ 15796 "11000110" // /* MW 2 */
+ 15797 "00010000" // /* MW 1 */
+.src_ref 10 "me_div.c" 108 19
+.delay_slot
+ 15798 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15799 "00011100" // /* MW 3 */
+ 15800 "11000110" // /* MW 2 */
+ 15801 "00010000" // /* MW 1 */
+.delay_slot
+ 15802 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */
+ 15803 "10100000" // /* MW 3 */
+ 15804 "10011111" // /* MW 2 */
+.label _ZN12me_primitive10udiv_dstepEjjRjS0___end
+ 15805 "00011000" // /* MW 1 */
+.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src"
+.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer"
+.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv"
+.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common"
+.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2"
+.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p"
+.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib"
+.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend"
+.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc"
+.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail"
+.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib"
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaa1644fb33f11a55e17a2e7f02cedec89cc05c6
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/Release/3_3_reloadable15.txt
@@ -0,0 +1,5263 @@
+Contents of the .debug_line section:
+
+sigmoid_carf_templated_lut.h:
+File name Line number Starting address View Stmt
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 205 0x2580 x
+elementwise_binary_shared.h 211 0x2580 1 x
+elementwise_binary_shared.h 216 0x2580 2
+elementwise_binary_shared.h 216 0x2580 3
+elementwise_binary_shared.h 216 0x258a
+elementwise_binary_shared.h 211 0x2598 x
+elementwise_binary_shared.h 212 0x259c x
+elementwise_binary_shared.h 212 0x25ac
+elementwise_binary_shared.h 213 0x25b0 x
+elementwise_binary_shared.h 213 0x25c0
+elementwise_binary_shared.h 214 0x25c4 x
+elementwise_binary_shared.h 214 0x25d4
+elementwise_binary_shared.h 216 0x25d8 x
+elementwise_binary_shared.h 217 0x25dc x
+elementwise_binary_shared.h 216 0x25e0
+elementwise_binary_shared.h 216 0x25e6 x
+elementwise_binary_shared.h 216 0x25ea
+elementwise_binary_shared.h 216 0x25ee
+elementwise_binary_shared.h 107 0x2650 x
+elementwise_binary_shared.h 119 0x2650 1
+elementwise_binary_shared.h 126 0x2650 2
+elementwise_binary_shared.h 131 0x2650 3
+elementwise_binary_shared.h 119 0x2654 x
+elementwise_binary_shared.h 122 0x2658 x
+elementwise_binary_shared.h 124 0x265c x
+elementwise_binary_shared.h 124 0x2668
+elementwise_binary_shared.h 107 0x266c
+elementwise_binary_shared.h 124 0x2672
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 65 0x2676
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 124 0x2676 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 65 0x2680 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 150 0x268c
+elementwise_binary_shared.h 119 0x2692 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x2696 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 126 0x2696 1
+elementwise_binary_shared.h 126 0x2696 2
+elementwise_binary_shared.h 131 0x2696 3
+elementwise_binary_shared.h 131 0x2696 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26a0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 126 0x26a0 1 x
+elementwise_binary_shared.h 131 0x26a0 2 x
+elementwise_binary_shared.h 171 0x26a0 3
+elementwise_binary_shared.h 131 0x26b2
+elementwise_binary_shared.h 131 0x26b2 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26b8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x26b8 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x26b8 2
+elementwise_binary_shared.h 166 0x26bc
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26c8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x26c8 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x26da x
+vector.hpp 1139 0x26e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x26e0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26e4
+vector.hpp 1159 0x26e4 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 171 0x26e4 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x26f6
+vector.hpp 1139 0x26f6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x26f6 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x26f6 3
+elementwise_binary_shared.h 173 0x26f6 4
+elementwise_binary_shared.h 150 0x2710
+elementwise_binary_shared.h 150 0x2714 x
+elementwise_binary_shared.h 150 0x2718
+elementwise_binary_shared.h 150 0x271e
+elementwise_binary_shared.h 150 0x2724
+elementwise_binary_shared.h 166 0x2724 1
+elementwise_binary_shared.h 150 0x2730
+elementwise_binary_shared.h 150 0x2740
+elementwise_binary_shared.h 150 0x2740 1
+elementwise_binary_shared.h 150 0x2740 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x274a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x274a 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x274a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x274e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x274e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2752
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 171 0x2752 1
+elementwise_binary_shared.h 150 0x2758
+elementwise_binary_shared.h 150 0x275c
+elementwise_binary_shared.h 150 0x275c 1
+elementwise_binary_shared.h 150 0x2762
+elementwise_binary_shared.h 150 0x2766
+elementwise_binary_shared.h 150 0x276c
+elementwise_binary_shared.h 150 0x2774
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x2784 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x278a x
+vector.hpp 1139 0x2790 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 166 0x2790 1 x
+elementwise_binary_shared.h 166 0x2790 2 x
+elementwise_binary_shared.h 169 0x2790 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x279c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 166 0x279c 1
+elementwise_binary_shared.h 166 0x279c 2
+elementwise_binary_shared.h 171 0x279c 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27a8 x
+vector.hpp 1139 0x27a8 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x27a8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x27a8 3 x
+elementwise_binary_shared.h 173 0x27a8 4 x
+elementwise_binary_shared.h 177 0x27a8 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27b0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 166 0x27b0 1 x
+elementwise_binary_shared.h 171 0x27b0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x27b8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x27b8 2 x
+elementwise_binary_shared.h 166 0x27be x
+elementwise_binary_shared.h 166 0x27c2
+elementwise_binary_shared.h 177 0x27c2 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27ca x
+vector.hpp 1139 0x27ca 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x27ca 2 x
+elementwise_binary_shared.h 171 0x27ca 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x27d0
+vector.hpp 1159 0x27d0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x27d0 2 x
+accum.hpp 1110 0x27d0 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x27d0 4 x
+elementwise_binary_shared.h 185 0x27d0 5
+elementwise_binary_shared.h 177 0x27f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2800 x
+vector.hpp 1139 0x2800 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 169 0x2800 2 x
+elementwise_binary_shared.h 171 0x2800 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2810
+vector.hpp 1159 0x2810 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x2810 2 x
+accum.hpp 1110 0x2810 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 173 0x2810 4 x
+elementwise_binary_shared.h 185 0x2810 5 x
+elementwise_binary_shared.h 177 0x2830 x
+elementwise_binary_shared.h 187 0x2840 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2846 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2846 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 185 0x2846 2 x
+elementwise_binary_shared.h 177 0x284c x
+elementwise_binary_shared.h 187 0x2852 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2856 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2856 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 185 0x2856 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2860
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2860 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 185 0x2860 2
+elementwise_binary_shared.h 205 0x2b00 x
+elementwise_binary_shared.h 211 0x2b00 1 x
+elementwise_binary_shared.h 216 0x2b00 2
+elementwise_binary_shared.h 216 0x2b00 3
+elementwise_binary_shared.h 216 0x2b0a
+elementwise_binary_shared.h 211 0x2b18 x
+elementwise_binary_shared.h 212 0x2b1c x
+elementwise_binary_shared.h 212 0x2b2c
+elementwise_binary_shared.h 213 0x2b30 x
+elementwise_binary_shared.h 213 0x2b40
+elementwise_binary_shared.h 214 0x2b44 x
+elementwise_binary_shared.h 214 0x2b54
+elementwise_binary_shared.h 216 0x2b58 x
+elementwise_binary_shared.h 217 0x2b5c x
+elementwise_binary_shared.h 216 0x2b60
+elementwise_binary_shared.h 216 0x2b66 x
+elementwise_binary_shared.h 216 0x2b6a
+elementwise_binary_shared.h 216 0x2b6e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x32e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 199 0x32e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x32e4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x32e4 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x32ea
+io_buffer_main.h 125 0x32ea 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x32f0 x
+conv2d_dw_bf16.h 221 0x32f4 x
+conv2d_dw_bf16.h 221 0x32f8
+conv2d_dw_bf16.h 221 0x32fc
+conv2d_dw_bf16.h 221 0x3300
+conv2d_dw_bf16.h 221 0x3304
+conv2d_dw_bf16.h 222 0x3308 x
+conv2d_dw_bf16.h 222 0x330c
+conv2d_dw_bf16.h 222 0x3310
+conv2d_dw_bf16.h 222 0x3314
+conv2d_dw_bf16.h 222 0x3318
+conv2d_dw_bf16.h 223 0x331c x
+conv2d_dw_bf16.h 223 0x3320
+conv2d_dw_bf16.h 223 0x3324
+conv2d_dw_bf16.h 223 0x3328
+conv2d_dw_bf16.h 223 0x332c
+conv2d_dw_bf16.h 224 0x3330 x
+conv2d_dw_bf16.h 224 0x3334
+conv2d_dw_bf16.h 224 0x3338
+conv2d_dw_bf16.h 244 0x3338 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3342
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3342 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x3342 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3342 3 x
+conv2d_dw_bf16.h 225 0x3348
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x334c
+aie_core.h 81 0x334c 1
+aie_core.h 100 0x334c 2
+aie_core.h 100 0x334c 3
+aie_core.h 100 0x334c 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x334c 5
+vector.hpp 1139 0x334c 6
+vector.hpp 1139 0x334c 7 x
+vector.hpp 1139 0x334c 8 x
+vector.hpp 1159 0x334c 9
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x334c 10 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x334c 11
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3358
+aie_core.h 81 0x3358 1
+aie_core.h 100 0x3358 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3358 3
+vector.hpp 1139 0x3358 4
+vector.hpp 1159 0x3358 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x3358 6 x
+conv2d_dw_bf16.h 225 0x3358 7 x
+conv2d_dw_bf16.h 244 0x3358 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3366
+aie_core.h 100 0x3366 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3366 2
+vector.hpp 1159 0x3366 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x3366 4
+conv2d_dw_bf16.h 225 0x3366 5
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3370
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3370 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 204 0x3370 2
+conv2d_dw_bf16.h 225 0x3370 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x337a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x337a 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x337a 2
+conv2d_dw_bf16.h 244 0x337a 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3384
+shuffle.hpp 142 0x3384 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3384 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x338a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x338a 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x338a 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x3396
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3396 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3396 2 x
+conv2d_dw_bf16.h 250 0x3396 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x33a2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33a2 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x33a2 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x33a8
+conv2d_dw_bf16.h 244 0x33ac
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x33b6
+shuffle.hpp 142 0x33b6 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 268 0x33b6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x33c0
+shuffle.hpp 142 0x33c0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x33c0 2
+conv2d_dw_bf16.h 271 0x33c0 3
+conv2d_dw_bf16.h 272 0x33c0 4
+conv2d_dw_bf16.h 273 0x33c0 5
+conv2d_dw_bf16.h 274 0x33c0 6
+conv2d_dw_bf16.h 275 0x33c0 7
+conv2d_dw_bf16.h 276 0x33c0 8
+conv2d_dw_bf16.h 277 0x33c0 9
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x33d0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x33d0 1
+accum.hpp 1110 0x33d0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 265 0x33d0 3 x
+conv2d_dw_bf16.h 270 0x33d0 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x33e0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x33e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x33e0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x33e0 3 x
+conv2d_dw_bf16.h 274 0x33e0 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x33f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33f0 1 x
+vector.hpp 1139 0x33f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x33f0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x33fa
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 266 0x33fa 1 x
+conv2d_dw_bf16.h 271 0x33fa 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x3404 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3404 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3404 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3404 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x340e
+shuffle.hpp 142 0x3412
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 272 0x3412 1 x
+conv2d_dw_bf16.h 267 0x341a x
+conv2d_dw_bf16.h 276 0x341a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3422 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 268 0x3426 x
+conv2d_dw_bf16.h 273 0x3426 1 x
+conv2d_dw_bf16.h 265 0x342e x
+conv2d_dw_bf16.h 277 0x342e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3436 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x3440 x
+conv2d_dw_bf16.h 274 0x3450 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3460 x
+aie_core.h 100 0x3460 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3460 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 266 0x346a x
+conv2d_dw_bf16.h 271 0x346a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3472 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3472 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x347a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 267 0x347e x
+conv2d_dw_bf16.h 272 0x347e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3486 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 276 0x3486 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3490 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3490 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3490 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x3496 x
+conv2d_dw_bf16.h 273 0x3496 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x34a0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x34a0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 250 0x34a0 2
+conv2d_dw_bf16.h 277 0x34a0 3 x
+conv2d_dw_bf16.h 250 0x34ac x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x34b0 x
+vector.hpp 1139 0x34b4
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x34b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x34b8 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x34bc x
+accum.hpp 1110 0x34c0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x34c4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 286 0x34c8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x34cc x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x34cc 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 285 0x34cc 2 x
+conv2d_dw_bf16.h 268 0x34d4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x34d8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x34d8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x34d8 2
+conv2d_dw_bf16.h 265 0x34e0 x
+conv2d_dw_bf16.h 270 0x34e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x34e8 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 274 0x34e8 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x34f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x34f0 1 x
+vector.hpp 1139 0x34f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x34f0 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x34fa
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 266 0x34fa 1 x
+conv2d_dw_bf16.h 271 0x34fa 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x3504 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x3504 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3504 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3504 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x350e
+shuffle.hpp 142 0x3512
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 272 0x3512 1 x
+conv2d_dw_bf16.h 267 0x351a x
+conv2d_dw_bf16.h 276 0x351a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3522 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 268 0x3526 x
+conv2d_dw_bf16.h 273 0x3526 1 x
+conv2d_dw_bf16.h 265 0x352e x
+conv2d_dw_bf16.h 277 0x352e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3536 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 270 0x353c x
+conv2d_dw_bf16.h 274 0x3540 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 81 0x3550 x
+aie_core.h 100 0x3550 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3550 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 244 0x3550 3 x
+conv2d_dw_bf16.h 266 0x355c x
+conv2d_dw_bf16.h 271 0x355c 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3564 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 275 0x3564 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x356c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 267 0x3570 x
+conv2d_dw_bf16.h 272 0x3570 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3578 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 276 0x3578 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp:
+shuffle.hpp 142 0x3580
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 273 0x3584 x
+conv2d_dw_bf16.h 277 0x3588 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x3594 x
+accum.hpp 1110 0x3598
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 290 0x3598 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x359e x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 286 0x35a2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x35a6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x35aa x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h:
+conv2d_dw_bf16.h 285 0x35aa 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x35ae x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x35ae 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 432 0xac0 x
+conv2d_bf16_params.h 438 0xac0 1 x
+conv2d_bf16_params.h 452 0xac0 2
+conv2d_bf16_params.h 453 0xac0 3
+conv2d_bf16_params.h 458 0xac0 4
+conv2d_bf16_params.h 470 0xac0 5
+conv2d_bf16_params.h 438 0xaca
+conv2d_bf16_params.h 438 0xaca 1 x
+conv2d_bf16_params.h 452 0xaca 2
+conv2d_bf16_params.h 462 0xaca 3
+conv2d_bf16_params.h 432 0xad4
+conv2d_bf16_params.h 444 0xad4 1
+conv2d_bf16_params.h 453 0xade
+conv2d_bf16_params.h 458 0xade 1
+conv2d_bf16_params.h 458 0xade 2
+conv2d_bf16_params.h 444 0xaea
+conv2d_bf16_params.h 470 0xaea 1
+conv2d_bf16_params.h 477 0xaea 2
+conv2d_bf16_params.h 557 0xaea 3
+conv2d_bf16_params.h 452 0xaf6
+conv2d_bf16_params.h 458 0xaf6 1
+conv2d_bf16_params.h 462 0xaf6 2
+conv2d_bf16_params.h 438 0xafe
+conv2d_bf16_params.h 438 0xb02
+conv2d_bf16_params.h 438 0xb06
+conv2d_bf16_params.h 438 0xb0a
+conv2d_bf16_params.h 438 0xb18
+conv2d_bf16_params.h 438 0xb1c
+conv2d_bf16_params.h 438 0xb20
+conv2d_bf16_params.h 438 0xb24
+conv2d_bf16_params.h 438 0xb32
+conv2d_bf16_params.h 438 0xb36
+conv2d_bf16_params.h 438 0xb3a
+conv2d_bf16_params.h 438 0xb3e
+conv2d_bf16_params.h 438 0xb4c
+conv2d_bf16_params.h 438 0xb50
+conv2d_bf16_params.h 444 0xb54 x
+conv2d_bf16_params.h 447 0xb58 x
+conv2d_bf16_params.h 448 0xb5c x
+conv2d_bf16_params.h 452 0xb60 x
+conv2d_bf16_params.h 453 0xb64 x
+conv2d_bf16_params.h 458 0xb68 x
+conv2d_bf16_params.h 444 0xb6e x
+conv2d_bf16_params.h 458 0xb72 x
+conv2d_bf16_params.h 462 0xb72 1 x
+conv2d_bf16_params.h 462 0xb78
+conv2d_bf16_params.h 452 0xb7c x
+conv2d_bf16_params.h 452 0xb80
+conv2d_bf16_params.h 462 0xb80 1 x
+conv2d_bf16_params.h 557 0xb80 2
+conv2d_bf16_params.h 462 0xb86
+conv2d_bf16_params.h 458 0xb8a x
+conv2d_bf16_params.h 458 0xb8e
+conv2d_bf16_params.h 458 0xb92
+conv2d_bf16_params.h 477 0xb92 1
+conv2d_bf16_params.h 557 0xb92 2 x
+conv2d_bf16_params.h 458 0xb98 x
+conv2d_bf16_params.h 458 0xb9e
+conv2d_bf16_params.h 477 0xb9e 1 x
+conv2d_bf16_params.h 458 0xba4 x
+conv2d_bf16_params.h 444 0xba8 x
+conv2d_bf16_params.h 462 0xbac x
+conv2d_bf16_params.h 470 0xbb0 x
+conv2d_bf16_params.h 470 0xbb4
+conv2d_bf16_params.h 477 0xbb4 1 x
+conv2d_bf16_params.h 477 0xbb8
+conv2d_bf16_params.h 491 0xbc8
+conv2d_bf16_params.h 492 0xbc8 1
+conv2d_bf16_params.h 495 0xbc8 2
+conv2d_bf16_params.h 502 0xbc8 3
+conv2d_bf16_params.h 533 0xbc8 4
+conv2d_bf16_params.h 539 0xbc8 5
+conv2d_bf16_params.h 557 0xbc8 6
+conv2d_bf16_params.h 621 0xbc8 7
+conv2d_bf16_params.h 645 0xbc8 8
+conv2d_bf16_params.h 709 0xbc8 9
+conv2d_bf16_params.h 477 0xbd2
+conv2d_bf16_params.h 481 0xbd2 1
+conv2d_bf16_params.h 500 0xbd2 2
+conv2d_bf16_params.h 506 0xbd2 3
+conv2d_bf16_params.h 507 0xbd2 4
+conv2d_bf16_params.h 524 0xbd2 5
+conv2d_bf16_params.h 539 0xbd2 6
+conv2d_bf16_params.h 655 0xbd2 7
+conv2d_bf16_params.h 477 0xbdc
+conv2d_bf16_params.h 504 0xbdc 1
+conv2d_bf16_params.h 510 0xbdc 2
+conv2d_bf16_params.h 520 0xbdc 3
+conv2d_bf16_params.h 700 0xbdc 4
+conv2d_bf16_params.h 477 0xbe2
+conv2d_bf16_params.h 539 0xbe2 1
+conv2d_bf16_params.h 578 0xbe2 2
+conv2d_bf16_params.h 642 0xbe2 3
+conv2d_bf16_params.h 529 0xbe6
+conv2d_bf16_params.h 642 0xbe6 1
+conv2d_bf16_params.h 642 0xbe6 2
+conv2d_bf16_params.h 655 0xbea
+conv2d_bf16_params.h 453 0xbf0
+conv2d_bf16_params.h 453 0xbf0 1
+conv2d_bf16_params.h 477 0xbf0 2
+conv2d_bf16_params.h 504 0xbf0 3
+conv2d_bf16_params.h 655 0xbf0 4
+conv2d_bf16_params.h 453 0xbfc x
+conv2d_bf16_params.h 477 0xbfc 1
+conv2d_bf16_params.h 481 0xbfc 2
+conv2d_bf16_params.h 500 0xbfc 3
+conv2d_bf16_params.h 506 0xbfc 4
+conv2d_bf16_params.h 507 0xbfc 5
+conv2d_bf16_params.h 524 0xbfc 6
+conv2d_bf16_params.h 539 0xbfc 7
+conv2d_bf16_params.h 491 0xc06
+conv2d_bf16_params.h 492 0xc06 1
+conv2d_bf16_params.h 495 0xc06 2
+conv2d_bf16_params.h 502 0xc06 3
+conv2d_bf16_params.h 510 0xc06 4
+conv2d_bf16_params.h 520 0xc06 5
+conv2d_bf16_params.h 533 0xc06 6
+conv2d_bf16_params.h 539 0xc06 7
+conv2d_bf16_params.h 557 0xc06 8
+conv2d_bf16_params.h 621 0xc06 9
+conv2d_bf16_params.h 645 0xc06 10
+conv2d_bf16_params.h 655 0xc06 11
+conv2d_bf16_params.h 700 0xc06 12
+conv2d_bf16_params.h 709 0xc06 13
+conv2d_bf16_params.h 477 0xc10
+conv2d_bf16_params.h 529 0xc10 1
+conv2d_bf16_params.h 539 0xc10 2
+conv2d_bf16_params.h 578 0xc10 3
+conv2d_bf16_params.h 642 0xc10 4
+conv2d_bf16_params.h 642 0xc10 5
+conv2d_bf16_params.h 642 0xc10 6
+conv2d_bf16_params.h 477 0xc20 x
+conv2d_bf16_params.h 495 0xc20 1 x
+conv2d_bf16_params.h 495 0xc20 2
+conv2d_bf16_params.h 682 0xc20 3
+conv2d_bf16_params.h 477 0xc2a
+conv2d_bf16_params.h 481 0xc2a 1 x
+conv2d_bf16_params.h 495 0xc2a 2
+conv2d_bf16_params.h 495 0xc2a 3
+conv2d_bf16_params.h 477 0xc34 x
+conv2d_bf16_params.h 496 0xc34 1
+conv2d_bf16_params.h 504 0xc34 2
+conv2d_bf16_params.h 539 0xc34 3
+conv2d_bf16_params.h 578 0xc34 4
+conv2d_bf16_params.h 496 0xc3e
+conv2d_bf16_params.h 499 0xc3e 1
+conv2d_bf16_params.h 504 0xc3e 2 x
+conv2d_bf16_params.h 509 0xc3e 3
+conv2d_bf16_params.h 519 0xc3e 4
+conv2d_bf16_params.h 700 0xc3e 5
+conv2d_bf16_params.h 492 0xc48 x
+conv2d_bf16_params.h 497 0xc48 1
+conv2d_bf16_params.h 509 0xc48 2
+conv2d_bf16_params.h 500 0xc52
+conv2d_bf16_params.h 520 0xc52 1 x
+conv2d_bf16_params.h 502 0xc58
+conv2d_bf16_params.h 520 0xc58 1
+conv2d_bf16_params.h 502 0xc62
+conv2d_bf16_params.h 507 0xc62 1 x
+conv2d_bf16_params.h 495 0xc68 x
+conv2d_bf16_params.h 495 0xc6c
+conv2d_bf16_params.h 495 0xc6c 1
+conv2d_bf16_params.h 610 0xc6c 2
+conv2d_bf16_params.h 709 0xc6c 3
+conv2d_bf16_params.h 507 0xc72 x
+conv2d_bf16_params.h 495 0xc76 x
+conv2d_bf16_params.h 495 0xc7a
+conv2d_bf16_params.h 506 0xc7a 1
+conv2d_bf16_params.h 519 0xc7a 2 x
+conv2d_bf16_params.h 496 0xc84 x
+conv2d_bf16_params.h 504 0xc84 1 x
+conv2d_bf16_params.h 522 0xc84 2
+conv2d_bf16_params.h 509 0xc8e x
+conv2d_bf16_params.h 496 0xc94 x
+conv2d_bf16_params.h 520 0xc94 1 x
+conv2d_bf16_params.h 529 0xc94 2
+conv2d_bf16_params.h 497 0xc9e x
+conv2d_bf16_params.h 509 0xc9e 1 x
+conv2d_bf16_params.h 533 0xc9e 2
+conv2d_bf16_params.h 539 0xca8 x
+conv2d_bf16_params.h 499 0xcac x
+conv2d_bf16_params.h 499 0xcb0
+conv2d_bf16_params.h 529 0xcb4 x
+conv2d_bf16_params.h 507 0xcb8 x
+conv2d_bf16_params.h 511 0xcb8 1
+conv2d_bf16_params.h 491 0xcbe x
+conv2d_bf16_params.h 507 0xcbe 1
+conv2d_bf16_params.h 500 0xcc8 x
+conv2d_bf16_params.h 511 0xcc8 1 x
+conv2d_bf16_params.h 500 0xcce
+conv2d_bf16_params.h 534 0xcce 1
+conv2d_bf16_params.h 502 0xcd6 x
+conv2d_bf16_params.h 509 0xcd6 1 x
+conv2d_bf16_params.h 642 0xcd6 2
+conv2d_bf16_params.h 510 0xce2 x
+conv2d_bf16_params.h 506 0xce6 x
+conv2d_bf16_params.h 527 0xcea x
+conv2d_bf16_params.h 502 0xcf4 x
+conv2d_bf16_params.h 502 0xcf8
+conv2d_bf16_params.h 506 0xcfc x
+conv2d_bf16_params.h 506 0xd0c
+conv2d_bf16_params.h 506 0xd10
+conv2d_bf16_params.h 510 0xd14 x
+conv2d_bf16_params.h 510 0xd18
+conv2d_bf16_params.h 510 0xd1e
+conv2d_bf16_params.h 510 0xd22
+conv2d_bf16_params.h 510 0xd28
+conv2d_bf16_params.h 539 0xd28 1
+conv2d_bf16_params.h 642 0xd28 2
+conv2d_bf16_params.h 511 0xd2e x
+conv2d_bf16_params.h 524 0xd2e 1
+conv2d_bf16_params.h 539 0xd2e 2
+conv2d_bf16_params.h 512 0xd34 x
+conv2d_bf16_params.h 524 0xd34 1 x
+conv2d_bf16_params.h 524 0xd3a
+conv2d_bf16_params.h 524 0xd3e
+conv2d_bf16_params.h 520 0xd42 x
+conv2d_bf16_params.h 511 0xd46 x
+conv2d_bf16_params.h 522 0xd46 1 x
+conv2d_bf16_params.h 524 0xd4c x
+conv2d_bf16_params.h 529 0xd4c 1 x
+conv2d_bf16_params.h 539 0xd4c 2 x
+conv2d_bf16_params.h 534 0xd56
+conv2d_bf16_params.h 539 0xd56 1
+conv2d_bf16_params.h 527 0xd5c x
+conv2d_bf16_params.h 533 0xd5c 1 x
+conv2d_bf16_params.h 529 0xd6a x
+conv2d_bf16_params.h 533 0xd6a 1
+conv2d_bf16_params.h 539 0xd70 x
+conv2d_bf16_params.h 529 0xd76 x
+conv2d_bf16_params.h 529 0xd76 1
+conv2d_bf16_params.h 529 0xd7c
+conv2d_bf16_params.h 534 0xd80 x
+conv2d_bf16_params.h 534 0xd84
+conv2d_bf16_params.h 539 0xd84 1 x
+conv2d_bf16_params.h 555 0xd84 2
+conv2d_bf16_params.h 559 0xd84 3
+conv2d_bf16_params.h 700 0xd84 4
+conv2d_bf16_params.h 669 0xd8e
+conv2d_bf16_params.h 700 0xd8e 1
+conv2d_bf16_params.h 539 0xd92
+conv2d_bf16_params.h 539 0xda2
+conv2d_bf16_params.h 539 0xdb2
+conv2d_bf16_params.h 539 0xdb2 1
+conv2d_bf16_params.h 539 0xdb2 2
+conv2d_bf16_params.h 539 0xdb2 3
+conv2d_bf16_params.h 539 0xdbc
+conv2d_bf16_params.h 539 0xdc0
+conv2d_bf16_params.h 539 0xdc4
+conv2d_bf16_params.h 539 0xdc4 1
+conv2d_bf16_params.h 539 0xdca
+conv2d_bf16_params.h 539 0xdce
+conv2d_bf16_params.h 539 0xdd2
+conv2d_bf16_params.h 669 0xdd2 1
+conv2d_bf16_params.h 539 0xdd8
+conv2d_bf16_params.h 539 0xddc
+conv2d_bf16_params.h 539 0xde0
+conv2d_bf16_params.h 539 0xde4
+conv2d_bf16_params.h 555 0xde8 x
+conv2d_bf16_params.h 642 0xdf0
+conv2d_bf16_params.h 669 0xdf0 1
+conv2d_bf16_params.h 669 0xdf0 2
+conv2d_bf16_params.h 669 0xdfa x
+conv2d_bf16_params.h 497 0xdfe x
+conv2d_bf16_params.h 641 0xdfe 1 x
+conv2d_bf16_params.h 645 0xdfe 2
+conv2d_bf16_params.h 559 0xe08 x
+conv2d_bf16_params.h 640 0xe08 1
+conv2d_bf16_params.h 642 0xe08 2
+conv2d_bf16_params.h 642 0xe08 3
+conv2d_bf16_params.h 642 0xe12 x
+conv2d_bf16_params.h 578 0xe16 x
+conv2d_bf16_params.h 640 0xe1a x
+conv2d_bf16_params.h 557 0xe1e
+conv2d_bf16_params.h 645 0xe1e 1
+conv2d_bf16_params.h 641 0xe28 x
+conv2d_bf16_params.h 642 0xe28 1 x
+conv2d_bf16_params.h 642 0xe2e
+conv2d_bf16_params.h 642 0xe2e 1
+conv2d_bf16_params.h 558 0xe32 x
+conv2d_bf16_params.h 645 0xe32 1
+conv2d_bf16_params.h 540 0xe38
+conv2d_bf16_params.h 645 0xe38 1 x
+conv2d_bf16_params.h 540 0xe3e x
+conv2d_bf16_params.h 557 0xe3e 1
+conv2d_bf16_params.h 642 0xe44 x
+conv2d_bf16_params.h 557 0xe48 x
+conv2d_bf16_params.h 655 0xe48 1
+conv2d_bf16_params.h 558 0xe4e
+conv2d_bf16_params.h 655 0xe4e 1 x
+conv2d_bf16_params.h 558 0xe54 x
+conv2d_bf16_params.h 540 0xe58 x
+conv2d_bf16_params.h 655 0xe58 1
+conv2d_bf16_params.h 655 0xe58 2
+conv2d_bf16_params.h 679 0xe58 3
+conv2d_bf16_params.h 655 0xe62 x
+conv2d_bf16_params.h 558 0xe66 x
+conv2d_bf16_params.h 655 0xe66 1
+conv2d_bf16_params.h 655 0xe66 2
+conv2d_bf16_params.h 679 0xe66 3
+conv2d_bf16_params.h 655 0xe70 x
+conv2d_bf16_params.h 126 0xe74 x
+conv2d_bf16_params.h 559 0xe74 1 x
+conv2d_bf16_params.h 669 0xe7a x
+conv2d_bf16_params.h 700 0xe7a 1
+conv2d_bf16_params.h 558 0xe80 x
+conv2d_bf16_params.h 700 0xe86 x
+conv2d_bf16_params.h 578 0xe8a x
+conv2d_bf16_params.h 559 0xe8e x
+conv2d_bf16_params.h 578 0xe92 x
+conv2d_bf16_params.h 610 0xe96 x
+conv2d_bf16_params.h 611 0xe96 1
+conv2d_bf16_params.h 621 0xe96 2
+conv2d_bf16_params.h 621 0xe96 3
+conv2d_bf16_params.h 629 0xe96 4
+conv2d_bf16_params.h 621 0xea2
+conv2d_bf16_params.h 621 0xea2 1 x
+conv2d_bf16_params.h 645 0xea2 2
+conv2d_bf16_params.h 649 0xea2 3
+conv2d_bf16_params.h 645 0xea8
+conv2d_bf16_params.h 554 0xeae x
+conv2d_bf16_params.h 645 0xeae 1 x
+conv2d_bf16_params.h 554 0xeb8
+conv2d_bf16_params.h 555 0xeb8 1
+conv2d_bf16_params.h 555 0xeb8 2 x
+conv2d_bf16_params.h 645 0xeb8 3
+conv2d_bf16_params.h 555 0xec4
+conv2d_bf16_params.h 621 0xec4 1
+conv2d_bf16_params.h 621 0xec4 2 x
+conv2d_bf16_params.h 645 0xec4 3
+conv2d_bf16_params.h 558 0xece x
+conv2d_bf16_params.h 559 0xece 1
+conv2d_bf16_params.h 621 0xece 2
+conv2d_bf16_params.h 621 0xece 3
+conv2d_bf16_params.h 645 0xece 4
+conv2d_bf16_params.h 559 0xeda x
+conv2d_bf16_params.h 621 0xeda 1 x
+conv2d_bf16_params.h 645 0xeda 2 x
+conv2d_bf16_params.h 610 0xee0 x
+conv2d_bf16_params.h 621 0xee0 1
+conv2d_bf16_params.h 655 0xee0 2
+conv2d_bf16_params.h 679 0xee0 3
+conv2d_bf16_params.h 621 0xeec
+conv2d_bf16_params.h 649 0xeec 1
+conv2d_bf16_params.h 655 0xeec 2 x
+conv2d_bf16_params.h 661 0xeec 3
+conv2d_bf16_params.h 127 0xef6 x
+conv2d_bf16_params.h 127 0xef6 1 x
+conv2d_bf16_params.h 621 0xef6 2
+conv2d_bf16_params.h 649 0xef6 3
+conv2d_bf16_params.h 655 0xef6 4
+conv2d_bf16_params.h 679 0xef6 5
+conv2d_bf16_params.h 710 0xef6 6
+conv2d_bf16_params.h 710 0xef6 7
+conv2d_bf16_params.h 655 0xf00 x
+conv2d_bf16_params.h 679 0xf00 1 x
+conv2d_bf16_params.h 621 0xf06 x
+conv2d_bf16_params.h 649 0xf06 1 x
+conv2d_bf16_params.h 655 0xf06 2
+conv2d_bf16_params.h 655 0xf06 3
+conv2d_bf16_params.h 700 0xf06 4
+conv2d_bf16_params.h 700 0xf06 5
+conv2d_bf16_params.h 655 0xf10 x
+conv2d_bf16_params.h 700 0xf10 1 x
+conv2d_bf16_params.h 629 0xf14 x
+conv2d_bf16_params.h 611 0xf18 x
+conv2d_bf16_params.h 643 0xf26 x
+conv2d_bf16_params.h 664 0xf2a
+conv2d_bf16_params.h 621 0xf30 x
+conv2d_bf16_params.h 629 0xf30 1
+conv2d_bf16_params.h 684 0xf30 2 x
+conv2d_bf16_params.h 629 0xf3a x
+conv2d_bf16_params.h 127 0xf40 x
+conv2d_bf16_params.h 644 0xf40 1
+conv2d_bf16_params.h 700 0xf40 2 x
+conv2d_bf16_params.h 705 0xf40 3
+conv2d_bf16_params.h 705 0xf40 4
+conv2d_bf16_params.h 645 0xf4a x
+conv2d_bf16_params.h 700 0xf4a 1
+conv2d_bf16_params.h 700 0xf4a 2
+conv2d_bf16_params.h 705 0xf4a 3
+conv2d_bf16_params.h 644 0xf54
+conv2d_bf16_params.h 649 0xf54 1 x
+conv2d_bf16_params.h 674 0xf54 2
+conv2d_bf16_params.h 644 0xf5e x
+conv2d_bf16_params.h 662 0xf5e 1
+conv2d_bf16_params.h 664 0xf5e 2 x
+conv2d_bf16_params.h 127 0xf68 x
+conv2d_bf16_params.h 663 0xf68 1 x
+conv2d_bf16_params.h 664 0xf68 2
+conv2d_bf16_params.h 126 0xf6e x
+conv2d_bf16_params.h 664 0xf6e 1 x
+conv2d_bf16_params.h 126 0xf74
+conv2d_bf16_params.h 664 0xf74 1
+conv2d_bf16_params.h 127 0xf7a x
+conv2d_bf16_params.h 127 0xf7a 1 x
+conv2d_bf16_params.h 664 0xf7a 2
+conv2d_bf16_params.h 664 0xf7a 3
+conv2d_bf16_params.h 675 0xf7a 4
+conv2d_bf16_params.h 696 0xf7a 5
+conv2d_bf16_params.h 644 0xf84 x
+conv2d_bf16_params.h 664 0xf84 1 x
+conv2d_bf16_params.h 705 0xf84 2
+conv2d_bf16_params.h 664 0xf8e
+conv2d_bf16_params.h 705 0xf8e 1 x
+conv2d_bf16_params.h 705 0xf8e 2 x
+conv2d_bf16_params.h 127 0xf94
+conv2d_bf16_params.h 674 0xf94 1 x
+conv2d_bf16_params.h 675 0xf94 2 x
+conv2d_bf16_params.h 682 0xf94 3
+conv2d_bf16_params.h 718 0xf94 4
+conv2d_bf16_params.h 720 0xf94 5
+conv2d_bf16_params.h 127 0xf9e x
+conv2d_bf16_params.h 642 0xf9e 1
+conv2d_bf16_params.h 675 0xf9e 2
+conv2d_bf16_params.h 675 0xfa8 x
+conv2d_bf16_params.h 707 0xfa8 1 x
+conv2d_bf16_params.h 642 0xfae
+conv2d_bf16_params.h 674 0xfae 1 x
+conv2d_bf16_params.h 675 0xfae 2
+conv2d_bf16_params.h 642 0xfb8 x
+conv2d_bf16_params.h 655 0xfb8 1
+conv2d_bf16_params.h 655 0xfb8 2
+conv2d_bf16_params.h 675 0xfb8 3 x
+conv2d_bf16_params.h 679 0xfb8 4
+conv2d_bf16_params.h 679 0xfb8 5
+conv2d_bf16_params.h 655 0xfc4 x
+conv2d_bf16_params.h 679 0xfc4 1 x
+conv2d_bf16_params.h 713 0xfc4 2
+conv2d_bf16_params.h 691 0xfca x
+conv2d_bf16_params.h 675 0xfce
+conv2d_bf16_params.h 675 0xfce 1 x
+conv2d_bf16_params.h 709 0xfce 2 x
+conv2d_bf16_params.h 675 0xfd8
+conv2d_bf16_params.h 706 0xfd8 1 x
+conv2d_bf16_params.h 706 0xfd8 2
+conv2d_bf16_params.h 709 0xfd8 3
+conv2d_bf16_params.h 682 0xfe4 x
+conv2d_bf16_params.h 706 0xfe4 1
+conv2d_bf16_params.h 126 0xfea x
+conv2d_bf16_params.h 696 0xfea 1 x
+conv2d_bf16_params.h 127 0xff0 x
+conv2d_bf16_params.h 127 0xff0 1 x
+conv2d_bf16_params.h 696 0xff0 2
+conv2d_bf16_params.h 696 0xff6 x
+conv2d_bf16_params.h 713 0xff6 1 x
+conv2d_bf16_params.h 696 0xffc
+conv2d_bf16_params.h 706 0xffc 1
+conv2d_bf16_params.h 706 0xffc 2 x
+conv2d_bf16_params.h 706 0x1006
+conv2d_bf16_params.h 696 0x100a x
+conv2d_bf16_params.h 707 0x100a 1 x
+conv2d_bf16_params.h 696 0x1010
+conv2d_bf16_params.h 709 0x1010 1 x
+conv2d_bf16_params.h 696 0x1016 x
+conv2d_bf16_params.h 709 0x1016 1
+conv2d_bf16_params.h 707 0x1020 x
+conv2d_bf16_params.h 708 0x1020 1
+conv2d_bf16_params.h 710 0x1020 2 x
+conv2d_bf16_params.h 710 0x1020 3 x
+conv2d_bf16_params.h 708 0x102c x
+conv2d_bf16_params.h 713 0x102c 1 x
+conv2d_bf16_params.h 709 0x1036 x
+conv2d_bf16_params.h 800 0x1036 1 x
+conv2d_bf16_params.h 710 0x103c x
+conv2d_bf16_params.h 718 0x1044 x
+conv2d_bf16_params.h 718 0x1048
+conv2d_bf16_params.h 720 0x104c x
+conv2d_bf16_params.h 800 0x104c 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x1060
+utils.h 531 0x1060 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 689 0x1060 2 x
+conv2d_bf16.h 698 0x1060 3
+conv2d_bf16.h 704 0x1060 4
+conv2d_bf16.h 707 0x1060 5
+conv2d_bf16.h 707 0x1060 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x106c
+utils.h 526 0x106c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 698 0x106c 2 x
+conv2d_bf16.h 704 0x106c 3 x
+conv2d_bf16.h 707 0x106c 4
+conv2d_bf16.h 707 0x106c 5
+conv2d_bf16.h 698 0x107a
+conv2d_bf16.h 702 0x107a 1
+conv2d_bf16.h 698 0x1084
+conv2d_bf16.h 702 0x1084 1 x
+conv2d_bf16.h 699 0x108e x
+conv2d_bf16.h 702 0x108e 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1098
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 702 0x1098 1 x
+conv2d_bf16.h 702 0x109e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x10a6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x10a6 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x10ac x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 704 0x10b0 x
+conv2d_bf16.h 702 0x10b4 x
+conv2d_bf16.h 705 0x10b4 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x10ba x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 707 0x10ba 1
+conv2d_bf16.h 707 0x10ba 2
+conv2d_bf16.h 704 0x10c0 x
+conv2d_bf16.h 705 0x10c6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x10d0 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x10d0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x10d0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x10e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x10e0 1 x
+conv2d_bf16.h 704 0x10f0 x
+conv2d_bf16.h 705 0x1100 x
+conv2d_bf16.h 707 0x1100 1 x
+conv2d_bf16.h 707 0x1100 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1110 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 526 0x1110 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x1110 2
+conv2d_bf16.h 708 0x1110 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1120
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1120 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x1120 2 x
+conv2d_bf16.h 707 0x1132 x
+conv2d_bf16.h 707 0x1132 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1136 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x1136 1 x
+conv2d_bf16.h 708 0x1136 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x113e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x113e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1142 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x1146 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 705 0x1146 1 x
+conv2d_bf16.h 707 0x1146 2 x
+conv2d_bf16.h 707 0x1146 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 350 0x114e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 706 0x114e 1 x
+conv2d_bf16.h 708 0x114e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1156 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 707 0x115a x
+conv2d_bf16.h 707 0x115a 1 x
+conv2d_bf16.h 723 0x115a 2 x
+conv2d_bf16.h 708 0x1160 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h:
+utils.h 531 0x1164 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1170
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1836 0x1170 1
+conv2d_bf16.h 1836 0x1170 2 x
+conv2d_bf16.h 1836 0x1170 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 240 0x1170 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1836 0x117e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 241 0x117e 1
+conv2d_bf16_params.h 242 0x117e 2
+conv2d_bf16_params.h 250 0x117e 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 866 0x118a
+conv2d_bf16.h 876 0x118a 1
+conv2d_bf16.h 876 0x118a 2
+conv2d_bf16.h 881 0x118a 3
+conv2d_bf16.h 1836 0x118a 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 242 0x118a 5
+conv2d_bf16_params.h 242 0x118a 6
+conv2d_bf16_params.h 242 0x118a 7
+conv2d_bf16_params.h 242 0x118a 8
+conv2d_bf16_params.h 242 0x118a 9
+conv2d_bf16_params.h 243 0x118a 10
+conv2d_bf16_params.h 245 0x118a 11
+conv2d_bf16_params.h 250 0x118a 12
+conv2d_bf16_params.h 250 0x118a 13
+conv2d_bf16_params.h 240 0x1196
+conv2d_bf16_params.h 240 0x1196 1 x
+conv2d_bf16_params.h 242 0x11a2
+conv2d_bf16_params.h 245 0x11a2 1
+conv2d_bf16_params.h 242 0x11ae
+conv2d_bf16_params.h 244 0x11ae 1
+conv2d_bf16_params.h 244 0x11ae 2
+conv2d_bf16_params.h 249 0x11ae 3
+conv2d_bf16_params.h 243 0x11ba
+conv2d_bf16_params.h 244 0x11ba 1
+conv2d_bf16_params.h 250 0x11ba 2
+conv2d_bf16_params.h 244 0x11c6
+conv2d_bf16_params.h 240 0x11d4
+conv2d_bf16_params.h 240 0x11d8
+conv2d_bf16_params.h 241 0x11d8 1 x
+conv2d_bf16_params.h 242 0x11de x
+conv2d_bf16_params.h 242 0x11de 1 x
+conv2d_bf16_params.h 245 0x11e4 x
+conv2d_bf16_params.h 242 0x11f2 x
+conv2d_bf16_params.h 242 0x11f6
+conv2d_bf16_params.h 242 0x11fa
+conv2d_bf16_params.h 241 0x11fe x
+conv2d_bf16_params.h 242 0x11fe 1
+conv2d_bf16_params.h 242 0x1204 x
+conv2d_bf16_params.h 242 0x1208
+conv2d_bf16_params.h 242 0x120c
+conv2d_bf16_params.h 242 0x1210
+conv2d_bf16_params.h 242 0x1210 1
+conv2d_bf16_params.h 242 0x1216
+conv2d_bf16_params.h 243 0x121a x
+conv2d_bf16_params.h 242 0x121e x
+conv2d_bf16_params.h 243 0x121e 1
+conv2d_bf16_params.h 244 0x1224 x
+conv2d_bf16_params.h 245 0x1224 1 x
+conv2d_bf16_params.h 244 0x1236
+conv2d_bf16_params.h 244 0x1236 1
+conv2d_bf16_params.h 245 0x123c
+conv2d_bf16_params.h 244 0x1242
+conv2d_bf16_params.h 244 0x1246
+conv2d_bf16_params.h 244 0x124a
+conv2d_bf16_params.h 244 0x124e
+conv2d_bf16_params.h 244 0x1252
+conv2d_bf16_params.h 245 0x1256
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 876 0x1268
+conv2d_bf16.h 876 0x1268 1
+conv2d_bf16.h 1849 0x1276
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 250 0x1280 x
+conv2d_bf16_params.h 250 0x1280 1
+conv2d_bf16_params.h 250 0x128c
+conv2d_bf16_params.h 250 0x1290
+conv2d_bf16_params.h 250 0x1294
+conv2d_bf16_params.h 250 0x1298
+conv2d_bf16_params.h 250 0x1298 1
+conv2d_bf16_params.h 250 0x129e
+conv2d_bf16_params.h 249 0x12a2 x
+conv2d_bf16_params.h 249 0x12a6
+conv2d_bf16_params.h 250 0x12aa x
+conv2d_bf16_params.h 258 0x12b0 x
+conv2d_bf16_params.h 259 0x12c8
+conv2d_bf16_params.h 259 0x12ce x
+conv2d_bf16_params.h 259 0x12d2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1841 0x12e0 x
+conv2d_bf16.h 1849 0x12e0 1
+conv2d_bf16.h 1849 0x12e0 2 x
+conv2d_bf16.h 876 0x12ea
+conv2d_bf16.h 881 0x12ea 1
+conv2d_bf16.h 1841 0x12ea 2
+conv2d_bf16.h 1842 0x12ea 3
+conv2d_bf16.h 1842 0x12ea 4
+conv2d_bf16.h 1842 0x12ea 5
+conv2d_bf16.h 1845 0x12ea 6
+conv2d_bf16.h 1849 0x12ea 7
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x12ea 8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1842 0x12f4 x
+conv2d_bf16.h 1842 0x12f4 1
+conv2d_bf16.h 1849 0x12f4 2
+conv2d_bf16.h 862 0x1300
+conv2d_bf16.h 1842 0x1300 1
+conv2d_bf16.h 1845 0x1300 2
+conv2d_bf16.h 1845 0x130c x
+conv2d_bf16.h 862 0x1310 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x1314 x
+io_buffer_main.h 125 0x1318
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1841 0x1318 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x131e x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1842 0x1322 x
+conv2d_bf16.h 1845 0x1328 x
+conv2d_bf16.h 866 0x132c x
+conv2d_bf16.h 866 0x1330
+conv2d_bf16.h 1842 0x1336 x
+conv2d_bf16.h 1842 0x1336 1 x
+conv2d_bf16.h 1842 0x133c
+conv2d_bf16.h 1845 0x133c 1 x
+conv2d_bf16.h 1841 0x1342 x
+conv2d_bf16.h 881 0x134a
+conv2d_bf16.h 885 0x134a 1
+conv2d_bf16.h 1845 0x134e x
+conv2d_bf16.h 867 0x1352
+conv2d_bf16.h 867 0x1358
+conv2d_bf16.h 867 0x1358 1 x
+conv2d_bf16.h 867 0x1360
+conv2d_bf16.h 867 0x1366
+conv2d_bf16.h 867 0x1372
+conv2d_bf16.h 867 0x1372 1
+conv2d_bf16.h 867 0x1378
+conv2d_bf16.h 867 0x137c
+conv2d_bf16.h 867 0x1382
+conv2d_bf16.h 867 0x138a
+conv2d_bf16.h 881 0x13a0
+conv2d_bf16.h 883 0x13a0 1
+conv2d_bf16.h 884 0x13a0 2
+conv2d_bf16.h 876 0x13ac x
+conv2d_bf16.h 876 0x13ac 1 x
+conv2d_bf16.h 881 0x13ac 2 x
+conv2d_bf16.h 883 0x13ac 3
+conv2d_bf16.h 884 0x13ac 4
+conv2d_bf16.h 885 0x13b8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x13b8 1
+conv2d_bf16_params.h 243 0x13b8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 883 0x13c2 x
+conv2d_bf16.h 884 0x13c8 x
+conv2d_bf16.h 876 0x13ce x
+conv2d_bf16.h 876 0x13d2
+conv2d_bf16.h 881 0x13d6 x
+conv2d_bf16.h 881 0x13da
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x13da 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 881 0x13e0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h:
+conv2d_bf16_params.h 243 0x13e0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 885 0x13f0
+conv2d_bf16.h 885 0x13f4 x
+conv2d_bf16.h 885 0x13fe
+conv2d_bf16.h 885 0x1402
+conv2d_bf16.h 885 0x1406
+conv2d_bf16.h 896 0x1410
+conv2d_bf16.h 1115 0x1410 1
+conv2d_bf16.h 1115 0x1410 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x141a
+vector.hpp 1152 0x141a 1
+vector.hpp 1152 0x141a 2
+vector.hpp 1152 0x141a 3
+vector.hpp 1152 0x141a 4
+vector.hpp 1152 0x141a 5
+vector.hpp 1152 0x141a 6
+vector.hpp 1152 0x141a 7
+vector.hpp 1152 0x141a 8
+vector.hpp 1152 0x141a 9
+vector.hpp 1152 0x141a 10
+vector.hpp 1152 0x141a 11
+vector.hpp 1152 0x141a 12
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x141a 13
+accum.hpp 149 0x141a 14
+accum.hpp 149 0x141a 15
+accum.hpp 149 0x141a 16
+accum.hpp 149 0x141a 17
+accum.hpp 149 0x141a 18
+accum.hpp 149 0x141a 19
+accum.hpp 149 0x141a 20
+accum.hpp 149 0x141a 21
+accum.hpp 149 0x141a 22
+accum.hpp 149 0x141a 23
+accum.hpp 149 0x141a 24
+accum.hpp 149 0x141a 25
+accum.hpp 149 0x141a 26
+accum.hpp 149 0x141a 27
+accum.hpp 149 0x141a 28
+accum.hpp 1110 0x141a 29
+accum.hpp 1110 0x141a 30
+accum.hpp 1110 0x141a 31
+accum.hpp 1110 0x141a 32
+accum.hpp 1110 0x141a 33
+accum.hpp 1110 0x141a 34
+accum.hpp 1110 0x141a 35
+accum.hpp 1110 0x141a 36
+accum.hpp 1110 0x141a 37
+accum.hpp 1110 0x141a 38
+accum.hpp 1110 0x141a 39
+accum.hpp 1110 0x141a 40
+accum.hpp 1110 0x141a 41
+accum.hpp 1110 0x141a 42
+accum.hpp 1110 0x141a 43
+accum.hpp 1110 0x141a 44
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 886 0x141a 45
+conv2d_bf16.h 896 0x141a 46 x
+conv2d_bf16.h 1123 0x141a 47
+conv2d_bf16.h 896 0x1420
+conv2d_bf16.h 896 0x1424
+conv2d_bf16.h 896 0x1428
+conv2d_bf16.h 896 0x142c
+conv2d_bf16.h 896 0x1430
+conv2d_bf16.h 896 0x1434
+conv2d_bf16.h 897 0x1438 x
+conv2d_bf16.h 897 0x143c
+conv2d_bf16.h 897 0x1440
+conv2d_bf16.h 897 0x1444
+conv2d_bf16.h 897 0x1448
+conv2d_bf16.h 897 0x144c
+conv2d_bf16.h 897 0x1450
+conv2d_bf16.h 898 0x1454 x
+conv2d_bf16.h 898 0x1458
+conv2d_bf16.h 898 0x145c
+conv2d_bf16.h 898 0x1460
+conv2d_bf16.h 898 0x1464
+conv2d_bf16.h 898 0x1468
+conv2d_bf16.h 1115 0x146c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1470
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 898 0x1474 x
+conv2d_bf16.h 1115 0x1480 x
+conv2d_bf16.h 1115 0x1484
+conv2d_bf16.h 886 0x148a
+conv2d_bf16.h 886 0x1490 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1494 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1123 0x149c
+conv2d_bf16.h 1123 0x149c 1
+conv2d_bf16.h 1123 0x149c 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14a6
+aie_core.h 100 0x14a6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14a6 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x14a6 3
+accum.hpp 946 0x14a6 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1123 0x14a6 5
+conv2d_bf16.h 1125 0x14a6 6
+conv2d_bf16.h 1154 0x14a6 7
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14b0
+aie_core.h 100 0x14b0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14b0 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x14b0 3
+accum.hpp 946 0x14b0 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1123 0x14b0 5
+conv2d_bf16.h 1125 0x14b0 6
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14ba
+aie_core.h 100 0x14ba 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x14ba 2
+vector.hpp 1152 0x14ba 3
+vector.hpp 1152 0x14ba 4
+vector.hpp 1152 0x14ba 5
+vector.hpp 1152 0x14ba 6
+vector.hpp 1152 0x14ba 7
+vector.hpp 1152 0x14ba 8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x14ba 9
+accum.hpp 149 0x14ba 10
+accum.hpp 149 0x14ba 11
+accum.hpp 149 0x14ba 12
+accum.hpp 149 0x14ba 13
+accum.hpp 149 0x14ba 14
+accum.hpp 149 0x14ba 15
+accum.hpp 149 0x14ba 16
+accum.hpp 578 0x14ba 17
+accum.hpp 946 0x14ba 18
+accum.hpp 1110 0x14ba 19
+accum.hpp 1110 0x14ba 20
+accum.hpp 1110 0x14ba 21
+accum.hpp 1110 0x14ba 22
+accum.hpp 1110 0x14ba 23
+accum.hpp 1110 0x14ba 24
+accum.hpp 1110 0x14ba 25
+accum.hpp 1110 0x14ba 26
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 746 0x14ba 27
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x14c6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 738 0x14c6 1
+conv2d_bf16.h 1187 0x14c6 2
+conv2d_bf16.h 1199 0x14c6 3
+conv2d_bf16.h 1200 0x14c6 4
+conv2d_bf16.h 1201 0x14c6 5
+conv2d_bf16.h 1202 0x14c6 6
+conv2d_bf16.h 1143 0x14d2
+conv2d_bf16.h 1218 0x14d2 1
+conv2d_bf16.h 749 0x14dc
+conv2d_bf16.h 750 0x14dc 1
+conv2d_bf16.h 751 0x14dc 2
+conv2d_bf16.h 752 0x14dc 3
+conv2d_bf16.h 1123 0x14dc 4
+conv2d_bf16.h 736 0x14e6
+conv2d_bf16.h 738 0x14e6 1
+conv2d_bf16.h 1123 0x14e6 2
+conv2d_bf16.h 1873 0x14e6 3
+conv2d_bf16.h 1125 0x14f2 x
+conv2d_bf16.h 1125 0x14f6
+conv2d_bf16.h 1125 0x14fa
+conv2d_bf16.h 1149 0x14fe x
+conv2d_bf16.h 1154 0x1502 x
+conv2d_bf16.h 743 0x1506 x
+conv2d_bf16.h 745 0x150a x
+conv2d_bf16.h 746 0x150e x
+conv2d_bf16.h 1125 0x150e 1 x
+conv2d_bf16.h 1143 0x1514 x
+conv2d_bf16.h 1206 0x1518 x
+conv2d_bf16.h 1149 0x151c
+conv2d_bf16.h 1154 0x1524
+conv2d_bf16.h 1125 0x1528 x
+conv2d_bf16.h 1149 0x152c x
+conv2d_bf16.h 1154 0x1530 x
+conv2d_bf16.h 1287 0x1536
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1540 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1540 1 x
+accum.hpp 946 0x1540 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x1540 3
+conv2d_bf16.h 738 0x1540 4
+conv2d_bf16.h 1147 0x1540 5 x
+conv2d_bf16.h 1187 0x1540 6 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x154c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x154c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x154c 2
+accum.hpp 946 0x154c 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x154c 4 x
+conv2d_bf16.h 738 0x154c 5 x
+conv2d_bf16.h 1188 0x154c 6 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1558
+aie_core.h 100 0x1558 1
+aie_core.h 100 0x1558 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1558 3
+vector.hpp 1139 0x1558 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1558 5
+accum.hpp 578 0x1558 6
+accum.hpp 946 0x1558 7
+accum.hpp 946 0x1558 8
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 737 0x1558 9 x
+conv2d_bf16.h 742 0x1558 10 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1564
+aie_core.h 100 0x1564 1
+aie_core.h 100 0x1564 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1564 3
+vector.hpp 1139 0x1564 4
+vector.hpp 1139 0x1564 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1564 6
+accum.hpp 578 0x1564 7
+accum.hpp 578 0x1564 8 x
+accum.hpp 946 0x1564 9
+accum.hpp 946 0x1564 10
+accum.hpp 946 0x1564 11 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 740 0x1564 12 x
+conv2d_bf16.h 1149 0x1564 13 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1570
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1570 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1570 2
+accum.hpp 946 0x1570 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1570 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 743 0x1570 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x157a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x157a 1 x
+accum.hpp 946 0x157a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x157a 3 x
+conv2d_bf16.h 1152 0x157a 4 x
+conv2d_bf16.h 1206 0x157a 5 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1586
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1586 1
+accum.hpp 946 0x1586 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 737 0x1586 3 x
+conv2d_bf16.h 1154 0x1586 4 x
+conv2d_bf16.h 1206 0x1586 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1592 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1592 1 x
+accum.hpp 946 0x1592 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 740 0x1592 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1598
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1598 1
+accum.hpp 946 0x1598 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1598 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1157 0x1598 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x159e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x159e 1 x
+accum.hpp 946 0x159e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x159e 3 x
+conv2d_bf16.h 1159 0x159e 4 x
+conv2d_bf16.h 737 0x15a4 x
+conv2d_bf16.h 738 0x15a4 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15aa x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15aa 1 x
+accum.hpp 946 0x15aa 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 740 0x15aa 3 x
+conv2d_bf16.h 1192 0x15aa 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15b0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15b0 1
+accum.hpp 946 0x15b0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x15b0 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x15b0 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15ba
+vector.hpp 1139 0x15ba 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15ba 2
+accum.hpp 578 0x15ba 3 x
+accum.hpp 946 0x15ba 4
+accum.hpp 946 0x15ba 5 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x15ba 6 x
+conv2d_bf16.h 746 0x15ba 7 x
+conv2d_bf16.h 1162 0x15ba 8
+conv2d_bf16.h 737 0x15c6 x
+conv2d_bf16.h 742 0x15c6 1 x
+conv2d_bf16.h 749 0x15c6 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x15d0 x
+aie_core.h 143 0x15d0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15d0 2 x
+vector.hpp 1152 0x15d0 3
+vector.hpp 1152 0x15d0 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15d0 5 x
+accum.hpp 946 0x15d0 6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 750 0x15d0 7 x
+conv2d_bf16.h 1286 0x15d0 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x15de
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15de 1
+vector.hpp 1139 0x15de 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15de 3
+accum.hpp 578 0x15de 4
+accum.hpp 946 0x15de 5
+accum.hpp 946 0x15de 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x15de 7 x
+conv2d_bf16.h 751 0x15de 8 x
+conv2d_bf16.h 1162 0x15de 9 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x15ec
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x15ec 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x15ec 2
+accum.hpp 946 0x15ec 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 743 0x15ec 4 x
+conv2d_bf16.h 746 0x15ec 5 x
+conv2d_bf16.h 1199 0x15ec 6 x
+conv2d_bf16.h 738 0x15fa x
+conv2d_bf16.h 1200 0x15fa 1 x
+conv2d_bf16.h 742 0x1602 x
+conv2d_bf16.h 1201 0x1602 1 x
+conv2d_bf16.h 743 0x160a x
+conv2d_bf16.h 752 0x160a 1 x
+conv2d_bf16.h 738 0x1612 x
+conv2d_bf16.h 740 0x1612 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1618 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x161c x
+conv2d_bf16.h 742 0x161c 1 x
+conv2d_bf16.h 1202 0x161c 2 x
+conv2d_bf16.h 1206 0x161c 3 x
+conv2d_bf16.h 737 0x1628 x
+conv2d_bf16.h 743 0x1628 1 x
+conv2d_bf16.h 749 0x1628 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1632
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 738 0x1632 1 x
+conv2d_bf16.h 740 0x1632 2 x
+conv2d_bf16.h 751 0x1632 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1640 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x1640 1 x
+conv2d_bf16.h 750 0x1640 2 x
+conv2d_bf16.h 736 0x1650 x
+conv2d_bf16.h 742 0x1650 1 x
+conv2d_bf16.h 746 0x1650 2 x
+conv2d_bf16.h 752 0x1650 3 x
+conv2d_bf16.h 737 0x1660 x
+conv2d_bf16.h 743 0x1660 1 x
+conv2d_bf16.h 749 0x1660 2 x
+conv2d_bf16.h 738 0x1670 x
+conv2d_bf16.h 740 0x1670 1 x
+conv2d_bf16.h 751 0x1670 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1680 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x1680 1 x
+conv2d_bf16.h 750 0x1680 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1690
+aie_core.h 100 0x1690 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1690 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1690 3
+accum.hpp 946 0x1690 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 742 0x1690 5 x
+conv2d_bf16.h 746 0x1690 6 x
+conv2d_bf16.h 752 0x1690 7 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x169e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x169e 1
+vector.hpp 1152 0x169e 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 743 0x169e 3 x
+conv2d_bf16.h 749 0x169e 4 x
+conv2d_bf16.h 1286 0x169e 5
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16ac
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x16ac 1
+vector.hpp 1152 0x16ac 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 751 0x16ac 3 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16b6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 745 0x16b6 1 x
+conv2d_bf16.h 750 0x16b6 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16c0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 746 0x16c0 1 x
+conv2d_bf16.h 752 0x16c0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x16ca
+vector.hpp 1152 0x16ca 1
+vector.hpp 1152 0x16ca 2
+vector.hpp 1152 0x16ca 3
+vector.hpp 1152 0x16ca 4
+vector.hpp 1152 0x16ca 5
+vector.hpp 1152 0x16ca 6
+vector.hpp 1152 0x16ca 7
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 749 0x16ca 8 x
+conv2d_bf16.h 1285 0x16ca 9 x
+conv2d_bf16.h 1286 0x16ca 10
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x16d6
+aie_core.h 100 0x16d6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x16d6 2
+vector.hpp 1152 0x16d6 3
+vector.hpp 1152 0x16d6 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x16d6 5
+accum.hpp 946 0x16d6 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 751 0x16d6 7 x
+conv2d_bf16.h 746 0x16e0 x
+conv2d_bf16.h 750 0x16e0 1 x
+conv2d_bf16.h 745 0x16e8 x
+conv2d_bf16.h 752 0x16e8 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16f0
+aie_core.h 143 0x16f4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 750 0x16f4 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x16fc
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 749 0x16fc 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1704 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 752 0x1704 1 x
+conv2d_bf16.h 1286 0x1704 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x170e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x170e 1
+vector.hpp 1152 0x170e 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 751 0x170e 3 x
+conv2d_bf16.h 1286 0x170e 4 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x171a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x171a 1
+vector.hpp 1152 0x171a 2
+vector.hpp 1152 0x171a 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x171a 4
+accum.hpp 946 0x171a 5
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1722
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1287 0x1722 1
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x172a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x172a 1 x
+accum.hpp 1110 0x172a 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1732
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1732 1
+accum.hpp 1110 0x1732 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x173a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 736 0x173a 1
+conv2d_bf16.h 1287 0x173a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1744 x
+accum.hpp 1110 0x1744 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1287 0x1744 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x174c
+accum.hpp 1110 0x174c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1218 0x174c 2 x
+conv2d_bf16.h 1287 0x174c 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1758 x
+accum.hpp 1110 0x1758 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 738 0x1758 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1760
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1760 1
+accum.hpp 1110 0x1760 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1768
+vector.hpp 1152 0x1768 1
+vector.hpp 1152 0x1768 2
+vector.hpp 1152 0x1768 3
+vector.hpp 1152 0x1768 4
+vector.hpp 1152 0x1768 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1768 6
+accum.hpp 1110 0x1768 7
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1772
+vector.hpp 1152 0x1772 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1772 2 x
+accum.hpp 1110 0x1772 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1286 0x1772 4
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x177a
+aie_core.h 143 0x177a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x177a 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x177a 3
+accum.hpp 946 0x177a 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1187 0x177a 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1782 x
+max_min.hpp 20 0x1786
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x178a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x178a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1792
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1792 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x179a x
+vector.hpp 1152 0x17a4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x17a4 1 x
+max_min.hpp 20 0x17ac
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17b0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x17b0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17b8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x17b8 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17c0 x
+vector.hpp 1152 0x17d0
+vector.hpp 1152 0x17d4
+vector.hpp 1152 0x17d8
+vector.hpp 1152 0x17dc
+vector.hpp 1152 0x17e0
+vector.hpp 1152 0x17e4
+vector.hpp 1152 0x17e8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x17f0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x17f0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1143 0x17f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x17fc
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x17fc 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x17fc 2
+accum.hpp 946 0x17fc 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 100 0x1800
+aie_core.h 100 0x1804 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1804 1
+vector.hpp 1152 0x1804 2
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x180a
+aie_core.h 143 0x1820
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x1820 1
+conv2d_bf16.h 1364 0x1820 2
+conv2d_bf16.h 1364 0x1820 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x182c
+aie_core.h 143 0x182c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x182c 2
+vector.hpp 1152 0x182c 3
+vector.hpp 1152 0x182c 4
+vector.hpp 1152 0x182c 5
+vector.hpp 1152 0x182c 6
+vector.hpp 1152 0x182c 7
+vector.hpp 1152 0x182c 8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x182c 9
+accum.hpp 149 0x182c 10
+accum.hpp 149 0x182c 11
+accum.hpp 149 0x182c 12
+accum.hpp 149 0x182c 13
+accum.hpp 149 0x182c 14
+accum.hpp 149 0x182c 15
+accum.hpp 149 0x182c 16
+accum.hpp 1110 0x182c 17
+accum.hpp 1110 0x182c 18
+accum.hpp 1110 0x182c 19
+accum.hpp 1110 0x182c 20
+accum.hpp 1110 0x182c 21
+accum.hpp 1110 0x182c 22
+accum.hpp 1110 0x182c 23
+accum.hpp 1110 0x182c 24
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1369 0x182c 25
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1838
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 807 0x1838 1
+conv2d_bf16.h 808 0x1838 2
+conv2d_bf16.h 809 0x1838 3
+conv2d_bf16.h 810 0x1838 4
+conv2d_bf16.h 1436 0x1838 5
+conv2d_bf16.h 1437 0x1838 6
+conv2d_bf16.h 1438 0x1838 7
+conv2d_bf16.h 1439 0x1838 8
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x1842
+aie_core.h 143 0x1842 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 802 0x1842 2
+conv2d_bf16.h 1428 0x1842 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x184e
+aie_core.h 143 0x184e 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x184e 2
+conv2d_bf16.h 794 0x184e 3
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x185a
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 794 0x185a 1
+conv2d_bf16.h 1455 0x185a 2
+conv2d_bf16.h 1337 0x1864
+conv2d_bf16.h 1364 0x186e x
+conv2d_bf16.h 1873 0x186e 1
+conv2d_bf16.h 1364 0x1874
+conv2d_bf16.h 1369 0x1878 x
+conv2d_bf16.h 799 0x187c x
+conv2d_bf16.h 801 0x1880 x
+conv2d_bf16.h 802 0x1884 x
+conv2d_bf16.h 1337 0x1888 x
+conv2d_bf16.h 1443 0x188c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1892
+vector.hpp 1152 0x1892 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1369 0x1892 2
+conv2d_bf16.h 1364 0x1896
+conv2d_bf16.h 1518 0x1896 1
+conv2d_bf16.h 1364 0x189a
+conv2d_bf16.h 1364 0x189e x
+conv2d_bf16.h 1369 0x18a2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x18a8
+vector.hpp 1152 0x18a8 1
+vector.hpp 1139 0x18b0
+vector.hpp 1139 0x18b0 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18b0 2
+accum.hpp 578 0x18b0 3
+accum.hpp 578 0x18b0 4 x
+accum.hpp 946 0x18b0 5
+accum.hpp 946 0x18b0 6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x18b0 7 x
+conv2d_bf16.h 1362 0x18b0 8 x
+conv2d_bf16.h 1429 0x18b0 9
+conv2d_bf16.h 1443 0x18b0 10 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18be
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18be 1
+accum.hpp 946 0x18be 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 793 0x18be 3 x
+conv2d_bf16.h 1364 0x18be 4 x
+conv2d_bf16.h 1443 0x18be 5
+conv2d_bf16.h 794 0x18ca x
+conv2d_bf16.h 795 0x18ca 1 x
+conv2d_bf16.h 1428 0x18ca 2 x
+conv2d_bf16.h 1443 0x18ca 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18d6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18d6 1
+accum.hpp 578 0x18d6 2
+accum.hpp 946 0x18d6 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 796 0x18d6 4 x
+conv2d_bf16.h 799 0x18d6 5 x
+conv2d_bf16.h 1429 0x18d6 6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18e0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18e0 1 x
+accum.hpp 946 0x18e0 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 797 0x18e0 3 x
+conv2d_bf16.h 1367 0x18e0 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18e6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18e6 1
+accum.hpp 946 0x18e6 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x18e6 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1369 0x18e6 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18ec x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18ec 1 x
+accum.hpp 946 0x18ec 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 792 0x18ec 3 x
+conv2d_bf16.h 1372 0x18ec 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18f2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18f2 1
+accum.hpp 946 0x18f2 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 793 0x18f2 3 x
+conv2d_bf16.h 1374 0x18f2 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18f8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18f8 1 x
+accum.hpp 946 0x18f8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 795 0x18f8 3 x
+conv2d_bf16.h 1377 0x18f8 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x18fe
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x18fe 1
+accum.hpp 946 0x18fe 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 796 0x18fe 3 x
+conv2d_bf16.h 1379 0x18fe 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1904 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x1904 1 x
+accum.hpp 946 0x1904 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 797 0x1904 3 x
+conv2d_bf16.h 1429 0x1904 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x190a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 578 0x190a 1
+accum.hpp 946 0x190a 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x190a 3 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 801 0x190a 4 x
+conv2d_bf16.h 1429 0x190a 5
+conv2d_bf16.h 792 0x1914 x
+conv2d_bf16.h 794 0x1914 1 x
+conv2d_bf16.h 802 0x1914 2 x
+conv2d_bf16.h 793 0x191e x
+conv2d_bf16.h 799 0x191e 1 x
+conv2d_bf16.h 803 0x191e 2 x
+conv2d_bf16.h 807 0x191e 3 x
+conv2d_bf16.h 794 0x192a x
+conv2d_bf16.h 804 0x192a 1 x
+conv2d_bf16.h 808 0x192a 2 x
+conv2d_bf16.h 809 0x1934 x
+conv2d_bf16.h 810 0x1938 x
+conv2d_bf16.h 795 0x193c x
+conv2d_bf16.h 802 0x193c 1 x
+conv2d_bf16.h 1437 0x193c 2 x
+conv2d_bf16.h 796 0x1946 x
+conv2d_bf16.h 1436 0x1946 1 x
+conv2d_bf16.h 797 0x194e x
+conv2d_bf16.h 1438 0x194e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1956 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 799 0x1956 1 x
+conv2d_bf16.h 1439 0x1956 2 x
+conv2d_bf16.h 792 0x1960 x
+conv2d_bf16.h 801 0x1960 1 x
+conv2d_bf16.h 793 0x1966 x
+conv2d_bf16.h 804 0x1966 1 x
+conv2d_bf16.h 808 0x1966 2 x
+conv2d_bf16.h 795 0x1970 x
+conv2d_bf16.h 803 0x1970 1 x
+conv2d_bf16.h 807 0x1970 2 x
+conv2d_bf16.h 796 0x197a x
+conv2d_bf16.h 810 0x197a 1 x
+conv2d_bf16.h 794 0x1982 x
+conv2d_bf16.h 797 0x1982 1 x
+conv2d_bf16.h 809 0x1982 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h:
+kernel_helpers.h 978 0x1990 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 799 0x1990 1 x
+conv2d_bf16.h 802 0x1990 2 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x19a0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x19a0 1
+vector.hpp 1152 0x19a0 2
+vector.hpp 1152 0x19a0 3
+vector.hpp 1152 0x19a0 4
+vector.hpp 1152 0x19a0 5
+vector.hpp 1152 0x19a0 6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 801 0x19a0 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x19ac
+vector.hpp 1152 0x19ac 1
+vector.hpp 1152 0x19ac 2
+vector.hpp 1152 0x19ac 3
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 804 0x19ac 4 x
+conv2d_bf16.h 808 0x19ac 5 x
+conv2d_bf16.h 1517 0x19ac 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x19b8
+vector.hpp 1152 0x19b8 1
+vector.hpp 1152 0x19b8 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 803 0x19b8 3 x
+conv2d_bf16.h 807 0x19b8 4 x
+conv2d_bf16.h 1518 0x19b8 5 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x19c4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 810 0x19c4 1 x
+
+/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h:
+aie_core.h 143 0x19cc x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 809 0x19cc 1 x
+conv2d_bf16.h 1428 0x19cc 2
+conv2d_bf16.h 801 0x19d6 x
+conv2d_bf16.h 802 0x19da x
+conv2d_bf16.h 803 0x19de x
+conv2d_bf16.h 807 0x19de 1 x
+conv2d_bf16.h 804 0x19e6 x
+conv2d_bf16.h 808 0x19e6 1 x
+conv2d_bf16.h 809 0x19ee x
+conv2d_bf16.h 810 0x19f2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x19fa x
+accum.hpp 1110 0x19fa 1 x
+accum.hpp 149 0x19fe
+accum.hpp 1110 0x19fe 1
+accum.hpp 149 0x1a02
+accum.hpp 1110 0x1a02 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1455 0x1a02 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 149 0x1a0c x
+accum.hpp 1110 0x1a0c 1 x
+accum.hpp 149 0x1a10
+accum.hpp 1110 0x1a10 1
+accum.hpp 149 0x1a14
+accum.hpp 1110 0x1a14 1
+accum.hpp 149 0x1a18
+accum.hpp 1110 0x1a18 1
+accum.hpp 149 0x1a1c
+accum.hpp 1110 0x1a1c 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a20 x
+max_min.hpp 20 0x1a24
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a28 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a28 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a30
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a30 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a38 x
+vector.hpp 1152 0x1a42
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a42 1 x
+max_min.hpp 20 0x1a4a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a4e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a4e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a56
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x1a56 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1152 0x1a60 x
+vector.hpp 1152 0x1a70
+vector.hpp 1152 0x1a74
+vector.hpp 1152 0x1a78
+vector.hpp 1152 0x1a7c
+vector.hpp 1152 0x1a80
+vector.hpp 1152 0x1a84
+vector.hpp 1152 0x1a88
+vector.hpp 1152 0x1a90
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h:
+conv2d_bf16.h 1337 0x1a90 1 x
+conv2d_bf16.h 1873 0x1ac8 x
+conv2d_bf16.h 1873 0x1acc
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 74 0x1ae0 x
+superkernels.cpp 79 0x1ae0 1
+superkernels.cpp 81 0x1ae0 2
+superkernels.cpp 79 0x1aea x
+superkernels.cpp 81 0x1aea 1
+superkernels.cpp 74 0x1af4
+superkernels.cpp 79 0x1b06
+superkernels.cpp 79 0x1b06 1
+superkernels.cpp 81 0x1b1c
+superkernels.cpp 113 0x1b22
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x1b22 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 81 0x1b2c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x1b2c 1
+tile.hpp 86 0x1b2c 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 81 0x1b3c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x1b44
+tile.hpp 74 0x1b48
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 113 0x1b4c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x1b4c 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 81 0x1b54
+superkernels.cpp 81 0x1b60
+superkernels.cpp 87 0x1b64
+superkernels.cpp 87 0x1b64 1 x
+superkernels.cpp 88 0x1b6e x
+superkernels.cpp 89 0x1b6e 1
+superkernels.cpp 88 0x1b78
+superkernels.cpp 88 0x1b7e
+superkernels.cpp 87 0x1b86 x
+superkernels.cpp 113 0x1b86 1
+superkernels.cpp 88 0x1b8e x
+superkernels.cpp 88 0x1b94
+superkernels.cpp 89 0x1b9a x
+superkernels.cpp 89 0x1ba0
+superkernels.cpp 113 0x1ba0 1
+superkernels.cpp 106 0x1bb0
+superkernels.cpp 113 0x1bb0 1
+superkernels.cpp 117 0x1bb0 2
+superkernels.cpp 136 0x1bb0 3
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x1bb0 4
+io_buffer_main.h 324 0x1bb0 5
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 106 0x1bba x
+superkernels.cpp 108 0x1bba 1
+superkernels.cpp 107 0x1bc4
+superkernels.cpp 108 0x1bc4 1 x
+superkernels.cpp 139 0x1bc4 2
+superkernels.cpp 140 0x1bc4 3
+superkernels.cpp 107 0x1bce x
+superkernels.cpp 110 0x1bda x
+superkernels.cpp 110 0x1bda 1 x
+superkernels.cpp 108 0x1be0 x
+superkernels.cpp 107 0x1be4 x
+superkernels.cpp 108 0x1be4 1
+superkernels.cpp 106 0x1bea x
+superkernels.cpp 106 0x1bee
+superkernels.cpp 107 0x1bf2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x1bf6 x
+io_buffer_main.h 218 0x1bfa
+io_buffer_main.h 218 0x1bfe
+io_buffer_main.h 218 0x1c02
+io_buffer_main.h 235 0x1c08 x
+io_buffer_main.h 218 0x1c14 x
+io_buffer_main.h 218 0x1c14 1 x
+io_buffer_main.h 218 0x1c18
+io_buffer_main.h 395 0x1c1c
+io_buffer_main.h 395 0x1c26 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 113 0x1c30 x
+superkernels.cpp 113 0x1c36
+superkernels.cpp 113 0x1c42
+superkernels.cpp 117 0x1c50 x
+superkernels.cpp 117 0x1c50 1
+superkernels.cpp 117 0x1c5a
+superkernels.cpp 117 0x1c6c
+superkernels.cpp 117 0x1c70
+superkernels.cpp 136 0x1c76
+superkernels.cpp 140 0x1c76 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x1c82 x
+io_buffer_main.h 327 0x1c82 1
+io_buffer_main.h 425 0x1c82 2
+io_buffer_main.h 324 0x1c88
+io_buffer_main.h 425 0x1c98 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 136 0x1c9c x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x1c9c 1 x
+io_buffer_main.h 327 0x1cae
+io_buffer_main.h 327 0x1cb2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 139 0x1cc0 x
+superkernels.cpp 139 0x1cc0 1
+superkernels.cpp 139 0x1cca
+superkernels.cpp 142 0x1cd2
+superkernels.cpp 139 0x1cde
+superkernels.cpp 139 0x1ce2
+superkernels.cpp 140 0x1cf4 x
+superkernels.cpp 142 0x1d04 x
+superkernels.cpp 142 0x1d08
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 141 0x1d20 x
+elementwise_binary.h 142 0x1d20 1
+elementwise_binary.h 144 0x1d20 2 x
+elementwise_binary.h 141 0x1d26
+elementwise_binary.h 141 0x1d2a
+elementwise_binary.h 142 0x1d2e x
+elementwise_binary.h 142 0x1d32
+elementwise_binary.h 130 0x1d40 x
+elementwise_binary.h 133 0x1d40 1 x
+elementwise_binary.h 130 0x1d44
+elementwise_binary.h 133 0x1d58 x
+elementwise_binary.h 134 0x1d5c x
+elementwise_binary.h 134 0x1d6c
+elementwise_binary.h 135 0x1d70 x
+elementwise_binary.h 135 0x1d80
+elementwise_binary.h 136 0x1d84 x
+elementwise_binary.h 137 0x1d8c x
+elementwise_binary.h 136 0x1d98 x
+elementwise_binary.h 137 0x1d9c
+elementwise_binary.h 137 0x1da0
+elementwise_binary.h 139 0x1da0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 146 0x1da0 2
+add_impl.h 146 0x1daa
+add_impl.h 147 0x1daa 1
+add_impl.h 147 0x1daa 2
+add_impl.h 146 0x1db4 x
+add_impl.h 147 0x1db4 1
+add_impl.h 147 0x1dbe x
+add_impl.h 147 0x1dc6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x1dca x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 147 0x1dce
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x1dd2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 147 0x1dd8 x
+add_impl.h 147 0x1ddc
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 81 0x1df0
+elementwise_binary_broadcasting.h 81 0x1df0 1 x
+elementwise_binary_broadcasting.h 82 0x1df0 2
+elementwise_binary_broadcasting.h 82 0x1df0 3
+elementwise_binary_broadcasting.h 83 0x1df0 4
+elementwise_binary_broadcasting.h 81 0x1dfa
+elementwise_binary_broadcasting.h 82 0x1dfa 1
+elementwise_binary_broadcasting.h 82 0x1e00 x
+elementwise_binary_broadcasting.h 84 0x1e0e x
+elementwise_binary_broadcasting.h 82 0x1e12 x
+elementwise_binary_broadcasting.h 83 0x1e16 x
+elementwise_binary_broadcasting.h 82 0x1e1a x
+elementwise_binary_broadcasting.h 83 0x1e1a 1
+elementwise_binary_broadcasting.h 82 0x1e20
+elementwise_binary_broadcasting.h 82 0x1e24
+elementwise_binary_broadcasting.h 76 0x1e30
+elementwise_binary_broadcasting.h 76 0x1e30 1 x
+elementwise_binary_broadcasting.h 77 0x1e3a x
+elementwise_binary_broadcasting.h 78 0x1e44
+elementwise_binary_broadcasting.h 78 0x1e54
+elementwise_binary_broadcasting.h 78 0x1e58 x
+elementwise_binary_broadcasting.h 78 0x1e5e
+elementwise_binary_broadcasting.h 79 0x1e62 x
+elementwise_binary_broadcasting.h 89 0x1e70 x
+elementwise_binary_broadcasting.h 96 0x1e70 1 x
+elementwise_binary_broadcasting.h 102 0x1e70 2
+elementwise_binary_broadcasting.h 102 0x1e76 x
+elementwise_binary_broadcasting.h 117 0x1e76 1
+elementwise_binary_broadcasting.h 102 0x1e88
+elementwise_binary_broadcasting.h 102 0x1e88 1
+elementwise_binary_broadcasting.h 96 0x1e8e
+elementwise_binary_broadcasting.h 96 0x1e92 x
+elementwise_binary_broadcasting.h 103 0x1e9c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1eb0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1eb6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 106 0x1ec0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp:
+broadcast.hpp 56 0x1ed0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1ed6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1ee0
+add_accum.hpp 19 0x1ee0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 117 0x1ee0 2 x
+elementwise_binary_broadcasting.h 117 0x1ee0 3 x
+elementwise_binary_broadcasting.h 117 0x1eea
+elementwise_binary_broadcasting.h 117 0x1eea 1
+elementwise_binary_broadcasting.h 117 0x1ef4
+elementwise_binary_broadcasting.h 117 0x1efa
+elementwise_binary_broadcasting.h 117 0x1f00
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f08 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f08 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f08 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f0c
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f0c 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f0c 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f10 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f10 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f10 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f14
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f14 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f14 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f18 x
+vector.hpp 1159 0x1f18 1
+vector.hpp 1159 0x1f18 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f18 3 x
+accum.hpp 1110 0x1f18 4
+accum.hpp 1110 0x1f18 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f18 6 x
+elementwise_binary.h 195 0x1f18 7
+elementwise_binary.h 218 0x1f18 8
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f1e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f1e 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f1e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f1e 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f26 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f26 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f26 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f2a
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f2a 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f2a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f2a 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f32 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f32 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f32 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f36
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f36 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f36 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f36 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f3e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f3e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f3e 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f42
+vector.hpp 1159 0x1f42 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f42 2
+accum.hpp 1110 0x1f42 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f42 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f42 5 x
+elementwise_binary.h 218 0x1f42 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f50 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f50 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x1f50 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f54
+vector.hpp 1159 0x1f54 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f54 2
+accum.hpp 1110 0x1f54 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f54 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x1f54 5 x
+elementwise_binary.h 195 0x1f54 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f60 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f60 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x1f60 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x1f70
+vector.hpp 1159 0x1f70 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 946 0x1f70 2
+accum.hpp 1110 0x1f70 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f70 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 213 0x1f70 5 x
+elementwise_binary.h 218 0x1f70 6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f82
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f82 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f82 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x1f82 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f8c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f8c 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp:
+add_accum.hpp 19 0x1f8c 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x1f8c 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f96
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f96 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x1f96 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h:
+elementwise_binary_broadcasting.h 121 0x1f96 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1f9e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1f9e 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x1f9e 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x1fa4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x1fa4 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x1fa4 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h:
+ise_binary_attribute_broadcasting.h 82 0x1fb0
+ise_binary_attribute_broadcasting.h 82 0x1fb0 1 x
+ise_binary_attribute_broadcasting.h 90 0x1fb6
+ise_binary_attribute_broadcasting.h 90 0x1fbe x
+ise_binary_attribute_broadcasting.h 117 0x1fbe 1
+ise_binary_attribute_broadcasting.h 92 0x1fc6 x
+ise_binary_attribute_broadcasting.h 92 0x1fc6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x1fd6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp:
+vector_native_types.hpp 374 0x1fd6 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h:
+ise_binary_attribute_broadcasting.h 117 0x1fe2 x
+ise_binary_attribute_broadcasting.h 92 0x1fe8
+ise_binary_attribute_broadcasting.h 92 0x1fee x
+ise_binary_attribute_broadcasting.h 92 0x1ff2
+ise_binary_attribute_broadcasting.h 117 0x1ff2 1
+ise_binary_attribute_broadcasting.h 117 0x1ff8
+ise_binary_attribute_broadcasting.h 118 0x2000
+ise_binary_attribute_broadcasting.h 118 0x2010 x
+ise_binary_attribute_broadcasting.h 118 0x2014
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 147 0x2030 x
+superkernels.cpp 152 0x2030 1
+superkernels.cpp 152 0x2036 x
+superkernels.cpp 147 0x203c
+superkernels.cpp 149 0x204a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2054
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 152 0x205c
+superkernels.cpp 152 0x205c 1
+superkernels.cpp 149 0x2062 x
+superkernels.cpp 149 0x2066
+superkernels.cpp 149 0x206e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x206e 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 155 0x2076
+superkernels.cpp 166 0x2076 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x207c
+tile.hpp 74 0x2082
+tile.hpp 86 0x2082 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 155 0x208e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2098
+tile.hpp 74 0x209c
+tile.hpp 74 0x20a0 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 159 0x20b0
+superkernels.cpp 159 0x20b6 x
+superkernels.cpp 159 0x20b6 1
+superkernels.cpp 157 0x20c0
+superkernels.cpp 159 0x20c0 1
+superkernels.cpp 166 0x20c0 2
+superkernels.cpp 157 0x20ca x
+superkernels.cpp 159 0x20ca 1
+superkernels.cpp 164 0x20ca 2
+superkernels.cpp 157 0x20de
+superkernels.cpp 159 0x20e6 x
+superkernels.cpp 157 0x20ea x
+superkernels.cpp 159 0x20f0 x
+superkernels.cpp 164 0x2100
+superkernels.cpp 166 0x2100 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2110 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 163 0x2118
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2118 1
+io_buffer_main.h 218 0x2122
+io_buffer_main.h 218 0x2126
+io_buffer_main.h 235 0x212a x
+io_buffer_main.h 218 0x2138 x
+io_buffer_main.h 218 0x2138 1 x
+io_buffer_main.h 218 0x213c
+io_buffer_main.h 395 0x2140
+io_buffer_main.h 395 0x214a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 164 0x214e
+superkernels.cpp 163 0x2158 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x215c x
+io_buffer_main.h 324 0x215c 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 164 0x2162 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2166 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 166 0x216c x
+superkernels.cpp 163 0x2174 x
+superkernels.cpp 163 0x2178
+superkernels.cpp 164 0x217c x
+superkernels.cpp 164 0x2180
+superkernels.cpp 168 0x2190
+superkernels.cpp 169 0x2190 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2190 2 x
+io_buffer_main.h 327 0x219a
+io_buffer_main.h 425 0x219a 1
+io_buffer_main.h 425 0x21a8 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 168 0x21ac
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x21ac 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 168 0x21b6 x
+superkernels.cpp 168 0x21ba
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x21c6 x
+io_buffer_main.h 327 0x21ca
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 168 0x21ce x
+superkernels.cpp 168 0x21d2
+superkernels.cpp 169 0x21e2
+superkernels.cpp 169 0x21e6 x
+superkernels.cpp 171 0x21f0
+superkernels.cpp 171 0x2204 x
+superkernels.cpp 171 0x220c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 124 0x2220 x
+elementwise_unary.h 126 0x2220 1 x
+elementwise_unary.h 126 0x2230 x
+elementwise_unary.h 127 0x2234 x
+elementwise_unary.h 127 0x2244
+elementwise_unary.h 128 0x2248 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 113 0x224c x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 128 0x225a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 113 0x225e x
+clip_impl.h 114 0x226e x
+clip_impl.h 114 0x2272
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 130 0x2276 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2290
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 136 0x2290 1 x
+elementwise_unary.h 142 0x2290 2
+elementwise_unary.h 154 0x2290 3 x
+elementwise_unary.h 171 0x2290 4
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x229c x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 142 0x229c 1
+elementwise_unary.h 154 0x229c 2 x
+elementwise_unary.h 190 0x229c 3 x
+elementwise_unary.h 136 0x22a8
+elementwise_unary.h 136 0x22ac x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 103 0x22b0 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 142 0x22b4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22b8 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 171 0x22b8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h:
+clip_impl.h 104 0x22b8 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22c4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 190 0x22c4 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22cc x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x22cc 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 142 0x22cc 2 x
+elementwise_unary.h 171 0x22cc 3 x
+elementwise_unary.h 154 0x22d6 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x22de x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x22e2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x22e2 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 190 0x22e2 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x22f0 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x22f0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x22f0 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2300 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2310 x
+vector.hpp 1159 0x2310 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x2310 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 171 0x2310 3 x
+elementwise_unary.h 176 0x2310 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2320
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2320 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 190 0x2320 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2330 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x2330 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x2330 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2340 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2350 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x2350 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 176 0x2350 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2358 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x235c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x235c 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x235c 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2364 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 158 0x2364 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x236a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 21 0x236a 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 176 0x236a 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp:
+max_min.hpp 20 0x2372 x
+max_min.hpp 21 0x2376 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x237a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 195 0x237a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x237e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h:
+elementwise_unary.h 176 0x237e 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 176 0x2390 x
+superkernels.cpp 181 0x2390 1
+superkernels.cpp 181 0x2396 x
+superkernels.cpp 176 0x239c
+superkernels.cpp 178 0x23aa
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x23b4
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 181 0x23bc
+superkernels.cpp 181 0x23bc 1
+superkernels.cpp 178 0x23c2 x
+superkernels.cpp 178 0x23c6
+superkernels.cpp 178 0x23ce
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x23ce 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 184 0x23d6
+superkernels.cpp 195 0x23d6 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x23dc
+tile.hpp 74 0x23e2
+tile.hpp 86 0x23e2 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 184 0x23ee x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x23f8
+tile.hpp 74 0x23fc
+tile.hpp 74 0x2400 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 188 0x2410
+superkernels.cpp 188 0x2416 x
+superkernels.cpp 188 0x2416 1
+superkernels.cpp 186 0x2420
+superkernels.cpp 188 0x2420 1
+superkernels.cpp 195 0x2420 2
+superkernels.cpp 186 0x242a x
+superkernels.cpp 188 0x242a 1
+superkernels.cpp 193 0x242a 2
+superkernels.cpp 186 0x243e
+superkernels.cpp 188 0x2446 x
+superkernels.cpp 186 0x244a x
+superkernels.cpp 188 0x2450 x
+superkernels.cpp 193 0x2460
+superkernels.cpp 195 0x2460 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2470 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 192 0x2478
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2478 1
+io_buffer_main.h 218 0x2482
+io_buffer_main.h 218 0x2486
+io_buffer_main.h 235 0x248a x
+io_buffer_main.h 218 0x2498 x
+io_buffer_main.h 218 0x2498 1 x
+io_buffer_main.h 218 0x249c
+io_buffer_main.h 395 0x24a0
+io_buffer_main.h 395 0x24aa x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 193 0x24ae
+superkernels.cpp 192 0x24b8 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x24bc x
+io_buffer_main.h 324 0x24bc 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 193 0x24c2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x24c6 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 195 0x24cc x
+superkernels.cpp 192 0x24d4 x
+superkernels.cpp 192 0x24d8
+superkernels.cpp 193 0x24dc x
+superkernels.cpp 193 0x24e0
+superkernels.cpp 197 0x24f0
+superkernels.cpp 198 0x24f0 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x24f0 2 x
+io_buffer_main.h 327 0x24fa
+io_buffer_main.h 425 0x24fa 1
+io_buffer_main.h 425 0x2508 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 197 0x250c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x250c 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 197 0x2516 x
+superkernels.cpp 197 0x251a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2526 x
+io_buffer_main.h 327 0x252a
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 197 0x252e x
+superkernels.cpp 197 0x2532
+superkernels.cpp 198 0x2542
+superkernels.cpp 198 0x2546 x
+superkernels.cpp 200 0x2550
+superkernels.cpp 200 0x2564 x
+superkernels.cpp 200 0x256c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 219 0x2600
+elementwise_binary_shared.h 219 0x2600 1 x
+elementwise_binary_shared.h 220 0x260a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 193 0x2614
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 222 0x2620
+elementwise_binary_shared.h 222 0x2632 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 193 0x263c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 222 0x2640
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 193 0x2640 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x2870
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 237 0x2870 1 x
+elementwise_binary_shared.h 244 0x2870 2
+elementwise_binary_shared.h 245 0x2870 3
+elementwise_binary_shared.h 247 0x2870 4
+elementwise_binary_shared.h 250 0x2870 5
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x287a x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 244 0x287a 1 x
+elementwise_binary_shared.h 245 0x287a 2
+elementwise_binary_shared.h 247 0x287a 3
+elementwise_binary_shared.h 244 0x288c
+elementwise_binary_shared.h 244 0x288c 1
+elementwise_binary_shared.h 237 0x2892
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 538 0x28a0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp:
+vector_native_types.hpp 374 0x28a0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 247 0x28a6 x
+elementwise_binary_shared.h 245 0x28d0 x
+elementwise_binary_shared.h 245 0x28d6
+elementwise_binary_shared.h 245 0x28d6 1
+elementwise_binary_shared.h 250 0x28f0
+elementwise_binary_shared.h 250 0x28f4 x
+elementwise_binary_shared.h 250 0x28f8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 205 0x2910 x
+superkernels.cpp 210 0x2910 1
+superkernels.cpp 210 0x2916 x
+superkernels.cpp 205 0x291c
+superkernels.cpp 207 0x292a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2934
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 210 0x293c
+superkernels.cpp 210 0x293c 1
+superkernels.cpp 207 0x2942 x
+superkernels.cpp 207 0x2946
+superkernels.cpp 207 0x294e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x294e 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 213 0x2956
+superkernels.cpp 224 0x2956 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x295c
+tile.hpp 74 0x2962
+tile.hpp 86 0x2962 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 213 0x296e x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2978
+tile.hpp 74 0x297c
+tile.hpp 74 0x2980 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 217 0x2990
+superkernels.cpp 217 0x2996 x
+superkernels.cpp 217 0x2996 1
+superkernels.cpp 215 0x29a0
+superkernels.cpp 217 0x29a0 1
+superkernels.cpp 224 0x29a0 2
+superkernels.cpp 215 0x29aa x
+superkernels.cpp 217 0x29aa 1
+superkernels.cpp 222 0x29aa 2
+superkernels.cpp 215 0x29be
+superkernels.cpp 217 0x29c6 x
+superkernels.cpp 215 0x29ca x
+superkernels.cpp 217 0x29d0 x
+superkernels.cpp 222 0x29e0
+superkernels.cpp 224 0x29e0 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x29f0 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 221 0x29f8
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x29f8 1
+io_buffer_main.h 218 0x2a02
+io_buffer_main.h 218 0x2a06
+io_buffer_main.h 235 0x2a0a x
+io_buffer_main.h 218 0x2a18 x
+io_buffer_main.h 218 0x2a18 1 x
+io_buffer_main.h 218 0x2a1c
+io_buffer_main.h 395 0x2a20
+io_buffer_main.h 395 0x2a2a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 222 0x2a2e
+superkernels.cpp 221 0x2a38 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2a3c x
+io_buffer_main.h 324 0x2a3c 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 222 0x2a42 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2a46 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 224 0x2a4c x
+superkernels.cpp 221 0x2a54 x
+superkernels.cpp 221 0x2a58
+superkernels.cpp 222 0x2a5c x
+superkernels.cpp 222 0x2a60
+superkernels.cpp 226 0x2a70
+superkernels.cpp 227 0x2a70 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2a70 2 x
+io_buffer_main.h 327 0x2a7a
+io_buffer_main.h 425 0x2a7a 1
+io_buffer_main.h 425 0x2a88 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 226 0x2a8c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2a8c 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 226 0x2a96 x
+superkernels.cpp 226 0x2a9a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2aa6 x
+io_buffer_main.h 327 0x2aaa
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 226 0x2aae x
+superkernels.cpp 226 0x2ab2
+superkernels.cpp 227 0x2ac2
+superkernels.cpp 227 0x2ac6 x
+superkernels.cpp 229 0x2ad0
+superkernels.cpp 229 0x2ae4 x
+superkernels.cpp 229 0x2aec
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 219 0x2b80
+elementwise_binary_shared.h 219 0x2b80 1 x
+elementwise_binary_shared.h 220 0x2b8a x
+elementwise_binary_shared.h 220 0x2b98
+elementwise_binary_shared.h 220 0x2ba0
+elementwise_binary_shared.h 222 0x2ba0 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 146 0x2ba0 2
+add_impl.h 146 0x2baa
+add_impl.h 147 0x2baa 1
+add_impl.h 147 0x2baa 2
+add_impl.h 146 0x2bb4 x
+add_impl.h 147 0x2bb4 1
+add_impl.h 147 0x2bbe x
+add_impl.h 147 0x2bc6
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 222 0x2bca x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 147 0x2bce
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 222 0x2bd2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h:
+add_impl.h 147 0x2bd8 x
+add_impl.h 147 0x2bdc
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h:
+elementwise_binary_shared.h 227 0x2bf0 x
+elementwise_binary_shared.h 232 0x2bf0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 141 0x2c00 x
+elementwise_binary.h 142 0x2c00 1
+elementwise_binary.h 144 0x2c00 2 x
+elementwise_binary.h 141 0x2c06
+elementwise_binary.h 141 0x2c0a
+elementwise_binary.h 142 0x2c0e x
+elementwise_binary.h 142 0x2c12
+elementwise_binary.h 130 0x2c20 x
+elementwise_binary.h 133 0x2c20 1 x
+elementwise_binary.h 130 0x2c24
+elementwise_binary.h 133 0x2c36 x
+elementwise_binary.h 134 0x2c3a x
+elementwise_binary.h 134 0x2c4a
+elementwise_binary.h 135 0x2c4e x
+elementwise_binary.h 135 0x2c5e
+elementwise_binary.h 136 0x2c62 x
+elementwise_binary.h 137 0x2c6a x
+elementwise_binary.h 136 0x2c78 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 134 0x2c7c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x2c80
+elementwise_binary.h 139 0x2c92 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 134 0x2c9c
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 139 0x2ca0
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h:
+mul_impl.h 134 0x2ca0 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 149 0x2cb0 x
+elementwise_binary.h 156 0x2cb0 1
+elementwise_binary.h 168 0x2cb0 2 x
+elementwise_binary.h 156 0x2cba x
+elementwise_binary.h 168 0x2cba 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2cc4
+mul_acc32_fp.hpp 36 0x2cc4 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 156 0x2cc4 2
+elementwise_binary.h 156 0x2cc4 3
+elementwise_binary.h 156 0x2cce
+elementwise_binary.h 156 0x2cce 1
+elementwise_binary.h 156 0x2cd8
+elementwise_binary.h 156 0x2ce2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2ce6 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 168 0x2ce6 1
+elementwise_binary.h 187 0x2ce6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2cec
+vector.hpp 1139 0x2cec 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x2cec 2 x
+elementwise_binary.h 211 0x2cec 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2cf2 x
+vector.hpp 1139 0x2cf2 1 x
+vector.hpp 1159 0x2cf2 2
+vector.hpp 1159 0x2cf2 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2cf2 4
+accum.hpp 1110 0x2cf2 5
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2cf2 6 x
+elementwise_binary.h 195 0x2cf2 7
+elementwise_binary.h 213 0x2cf2 8 x
+elementwise_binary.h 218 0x2cf2 9
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2cfa
+vector.hpp 1139 0x2cfa 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x2cfa 2 x
+elementwise_binary.h 211 0x2cfa 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d00 x
+vector.hpp 1139 0x2d00 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d00 2 x
+elementwise_binary.h 213 0x2d00 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d06
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 189 0x2d06 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d0a x
+vector.hpp 1139 0x2d0a 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d0a 2 x
+elementwise_binary.h 213 0x2d0a 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d10
+vector.hpp 1139 0x2d10 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d10 2 x
+elementwise_binary.h 189 0x2d10 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d16 x
+vector.hpp 1139 0x2d16 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d16 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d16 3 x
+elementwise_binary.h 213 0x2d16 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d20
+vector.hpp 1139 0x2d20 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d20 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d20 3 x
+elementwise_binary.h 189 0x2d20 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d2a x
+vector.hpp 1139 0x2d2a 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d2a 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d2a 3 x
+elementwise_binary.h 213 0x2d2a 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d34
+vector.hpp 1139 0x2d34 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d34 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d34 3 x
+elementwise_binary.h 189 0x2d34 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d3e x
+vector.hpp 1139 0x2d3e 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d3e 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d3e 3 x
+elementwise_binary.h 213 0x2d3e 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d48
+vector.hpp 1139 0x2d48 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d48 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d48 3 x
+elementwise_binary.h 189 0x2d48 4 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d52 x
+vector.hpp 1139 0x2d52 1 x
+vector.hpp 1159 0x2d52 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d52 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d52 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d52 5 x
+elementwise_binary.h 213 0x2d52 6 x
+elementwise_binary.h 218 0x2d52 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d60
+vector.hpp 1139 0x2d60 1
+vector.hpp 1159 0x2d60 2
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d60 3
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d60 4
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 187 0x2d60 5 x
+elementwise_binary.h 189 0x2d60 6 x
+elementwise_binary.h 195 0x2d60 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1139 0x2d70 x
+vector.hpp 1139 0x2d70 1 x
+vector.hpp 1159 0x2d70 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d70 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d70 4 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 211 0x2d70 5 x
+elementwise_binary.h 213 0x2d70 6 x
+elementwise_binary.h 218 0x2d70 7 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2d80
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d80 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d80 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2d80 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2d88 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d88 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d88 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2d88 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2d90
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d90 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d90 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2d90 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2d98 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2d98 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2d98 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2d98 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2da0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2da0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2da0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2da0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2da8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2da8 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2da8 2 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2da8 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2db0
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2db0 1
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp:
+mul_acc32_fp.hpp 36 0x2db0 2
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2db0 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2db8 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2db8 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2db8 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2dbc
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2dbc 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 172 0x2dbc 2 x
+elementwise_binary.h 195 0x2dbc 3 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2dc2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2dc2 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2dc2 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2dc6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2dc6 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2dc6 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2dca x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2dca 1 x
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 218 0x2dca 2 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp:
+vector.hpp 1159 0x2dce
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp:
+accum.hpp 1110 0x2dce 1
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h:
+elementwise_binary.h 195 0x2dce 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 369 0x2de0 x
+superkernels.cpp 374 0x2de0 1
+superkernels.cpp 374 0x2de6 x
+superkernels.cpp 369 0x2dec
+superkernels.cpp 371 0x2df2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2df2 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 374 0x2e0e x
+superkernels.cpp 374 0x2e0e 1 x
+superkernels.cpp 371 0x2e14 x
+superkernels.cpp 371 0x2e18
+superkernels.cpp 371 0x2e1e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2e26
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 377 0x2e2a
+superkernels.cpp 379 0x2e2a 1
+superkernels.cpp 381 0x2e2a 2
+superkernels.cpp 393 0x2e2a 3
+superkernels.cpp 377 0x2e34
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2e34 1
+tile.hpp 74 0x2e3e
+tile.hpp 86 0x2e3e 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 377 0x2e4a x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x2e54
+tile.hpp 74 0x2e58
+tile.hpp 74 0x2e5c x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 381 0x2e60
+superkernels.cpp 381 0x2e60 1 x
+superkernels.cpp 381 0x2e6a
+superkernels.cpp 381 0x2e6a 1
+superkernels.cpp 390 0x2e6a 2
+superkernels.cpp 379 0x2e74 x
+superkernels.cpp 382 0x2e74 1
+superkernels.cpp 391 0x2e74 2
+superkernels.cpp 379 0x2e8a
+superkernels.cpp 381 0x2e90 x
+superkernels.cpp 379 0x2e94 x
+superkernels.cpp 381 0x2e98 x
+superkernels.cpp 382 0x2e9c x
+superkernels.cpp 390 0x2ea0
+superkernels.cpp 391 0x2ea6
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2eb0 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2eb4
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2eb4 1
+io_buffer_main.h 218 0x2ebe
+io_buffer_main.h 218 0x2ec2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2ec6 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 235 0x2eca x
+io_buffer_main.h 218 0x2ed6 x
+io_buffer_main.h 218 0x2ed6 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2eda x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x2eda 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 385 0x2ee0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 395 0x2ee4
+io_buffer_main.h 395 0x2ee4 1
+io_buffer_main.h 395 0x2eee x
+io_buffer_main.h 218 0x2ef2 x
+io_buffer_main.h 218 0x2efa
+io_buffer_main.h 218 0x2efe
+io_buffer_main.h 218 0x2f02
+io_buffer_main.h 235 0x2f06 x
+io_buffer_main.h 218 0x2f14 x
+io_buffer_main.h 218 0x2f14 1 x
+io_buffer_main.h 218 0x2f18
+io_buffer_main.h 395 0x2f24 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 390 0x2f28
+superkernels.cpp 391 0x2f28 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2f28 2
+io_buffer_main.h 125 0x2f36
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 390 0x2f3a x
+superkernels.cpp 391 0x2f40 x
+superkernels.cpp 393 0x2f40 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x2f46 x
+io_buffer_main.h 125 0x2f4a
+io_buffer_main.h 327 0x2f4e
+io_buffer_main.h 327 0x2f4e 1
+io_buffer_main.h 125 0x2f54
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 393 0x2f5a x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2f60
+io_buffer_main.h 327 0x2f60 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 390 0x2f64 x
+superkernels.cpp 391 0x2f68 x
+superkernels.cpp 391 0x2f6c
+superkernels.cpp 390 0x2f70 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x2f80 x
+io_buffer_main.h 327 0x2f80 1
+io_buffer_main.h 327 0x2f80 2
+io_buffer_main.h 327 0x2f80 3
+io_buffer_main.h 327 0x2f80 4
+io_buffer_main.h 425 0x2f80 5
+io_buffer_main.h 425 0x2f80 6
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2f8a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 425 0x2f9a x
+io_buffer_main.h 327 0x2f9e x
+io_buffer_main.h 324 0x2fa2
+io_buffer_main.h 327 0x2fb0
+io_buffer_main.h 324 0x2fb4 x
+io_buffer_main.h 327 0x2fb4 1
+io_buffer_main.h 425 0x2fc6 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2fca
+superkernels.cpp 398 0x2fca 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2fca 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2fd4 x
+superkernels.cpp 397 0x2fd8
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x2fe4 x
+io_buffer_main.h 327 0x2fe8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 397 0x2fec x
+superkernels.cpp 397 0x2ff0
+superkernels.cpp 398 0x3000
+superkernels.cpp 398 0x3004 x
+superkernels.cpp 400 0x3010
+superkernels.cpp 400 0x3026 x
+superkernels.cpp 400 0x302e
+
+/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16_params.h:
+conv2d_dw_bf16_params.h 211 0x3040 x
+conv2d_dw_bf16_params.h 215 0x3040 1
+conv2d_dw_bf16_params.h 215 0x3040 2 x
+conv2d_dw_bf16_params.h 215 0x304a x
+conv2d_dw_bf16_params.h 218 0x304a 1
+conv2d_dw_bf16_params.h 218 0x304a 2
+conv2d_dw_bf16_params.h 211 0x3054
+conv2d_dw_bf16_params.h 218 0x305a
+conv2d_dw_bf16_params.h 215 0x306e
+conv2d_dw_bf16_params.h 215 0x3072
+conv2d_dw_bf16_params.h 215 0x3076
+conv2d_dw_bf16_params.h 215 0x307a
+conv2d_dw_bf16_params.h 215 0x3088
+conv2d_dw_bf16_params.h 215 0x308c
+conv2d_dw_bf16_params.h 218 0x3090 x
+conv2d_dw_bf16_params.h 218 0x3094
+conv2d_dw_bf16_params.h 218 0x3098
+conv2d_dw_bf16_params.h 218 0x30a4
+conv2d_dw_bf16_params.h 218 0x30aa
+conv2d_dw_bf16_params.h 218 0x30b0
+conv2d_dw_bf16_params.h 218 0x30b6
+conv2d_dw_bf16_params.h 218 0x30bc
+conv2d_dw_bf16_params.h 218 0x30c0
+conv2d_dw_bf16_params.h 218 0x30d0
+conv2d_dw_bf16_params.h 218 0x30d0 1
+conv2d_dw_bf16_params.h 219 0x30d0 2
+conv2d_dw_bf16_params.h 218 0x30d6
+conv2d_dw_bf16_params.h 219 0x30d6 1 x
+conv2d_dw_bf16_params.h 219 0x30dc
+conv2d_dw_bf16_params.h 219 0x30e0
+conv2d_dw_bf16_params.h 218 0x30ea x
+conv2d_dw_bf16_params.h 218 0x30ee
+conv2d_dw_bf16_params.h 219 0x30f2 x
+conv2d_dw_bf16_params.h 219 0x30f8
+conv2d_dw_bf16_params.h 218 0x3102 x
+conv2d_dw_bf16_params.h 219 0x3106 x
+conv2d_dw_bf16_params.h 219 0x310a
+conv2d_dw_bf16_params.h 218 0x310e x
+conv2d_dw_bf16_params.h 218 0x3112
+conv2d_dw_bf16_params.h 219 0x3112 1 x
+conv2d_dw_bf16_params.h 219 0x3120
+conv2d_dw_bf16_params.h 226 0x3120 1
+conv2d_dw_bf16_params.h 231 0x3120 2
+conv2d_dw_bf16_params.h 219 0x312a
+conv2d_dw_bf16_params.h 219 0x312a 1
+conv2d_dw_bf16_params.h 220 0x312a 2
+conv2d_dw_bf16_params.h 220 0x312a 3
+conv2d_dw_bf16_params.h 232 0x312a 4
+conv2d_dw_bf16_params.h 234 0x312a 5
+conv2d_dw_bf16_params.h 234 0x312a 6
+conv2d_dw_bf16_params.h 243 0x312a 7
+conv2d_dw_bf16_params.h 250 0x312a 8
+conv2d_dw_bf16_params.h 253 0x312a 9
+conv2d_dw_bf16_params.h 260 0x312a 10
+conv2d_dw_bf16_params.h 264 0x312a 11
+conv2d_dw_bf16_params.h 220 0x3134
+conv2d_dw_bf16_params.h 234 0x3134 1
+conv2d_dw_bf16_params.h 246 0x3134 2
+conv2d_dw_bf16_params.h 253 0x3134 3
+conv2d_dw_bf16_params.h 226 0x313e x
+conv2d_dw_bf16_params.h 234 0x313e 1
+conv2d_dw_bf16_params.h 234 0x313e 2
+conv2d_dw_bf16_params.h 231 0x3148
+conv2d_dw_bf16_params.h 232 0x3148 1
+conv2d_dw_bf16_params.h 232 0x3148 2
+conv2d_dw_bf16_params.h 235 0x3152
+conv2d_dw_bf16_params.h 235 0x3152 1
+conv2d_dw_bf16_params.h 242 0x3152 2
+conv2d_dw_bf16_params.h 242 0x3152 3
+conv2d_dw_bf16_params.h 243 0x3152 4
+conv2d_dw_bf16_params.h 250 0x3152 5
+conv2d_dw_bf16_params.h 255 0x3152 6
+conv2d_dw_bf16_params.h 260 0x3152 7
+conv2d_dw_bf16_params.h 264 0x3152 8
+conv2d_dw_bf16_params.h 234 0x315c
+conv2d_dw_bf16_params.h 239 0x315c 1
+conv2d_dw_bf16_params.h 242 0x315c 2
+conv2d_dw_bf16_params.h 248 0x315c 3
+conv2d_dw_bf16_params.h 253 0x315c 4
+conv2d_dw_bf16_params.h 264 0x315c 5
+conv2d_dw_bf16_params.h 219 0x3166 x
+conv2d_dw_bf16_params.h 219 0x316a
+conv2d_dw_bf16_params.h 219 0x316e
+conv2d_dw_bf16_params.h 220 0x316e 1
+conv2d_dw_bf16_params.h 219 0x3174
+conv2d_dw_bf16_params.h 243 0x3174 1
+conv2d_dw_bf16_params.h 247 0x3174 2
+conv2d_dw_bf16_params.h 220 0x317a x
+conv2d_dw_bf16_params.h 250 0x317a 1
+conv2d_dw_bf16_params.h 219 0x3180 x
+conv2d_dw_bf16_params.h 220 0x3184 x
+conv2d_dw_bf16_params.h 231 0x3184 1
+conv2d_dw_bf16_params.h 219 0x318a x
+conv2d_dw_bf16_params.h 231 0x318a 1 x
+conv2d_dw_bf16_params.h 220 0x3190 x
+conv2d_dw_bf16_params.h 253 0x3190 1 x
+conv2d_dw_bf16_params.h 240 0x3196
+conv2d_dw_bf16_params.h 246 0x3196 1 x
+conv2d_dw_bf16_params.h 232 0x319c x
+conv2d_dw_bf16_params.h 226 0x31a0 x
+conv2d_dw_bf16_params.h 231 0x31a4 x
+conv2d_dw_bf16_params.h 238 0x31a4 1
+conv2d_dw_bf16_params.h 234 0x31aa x
+conv2d_dw_bf16_params.h 231 0x31ae x
+conv2d_dw_bf16_params.h 232 0x31ae 1 x
+conv2d_dw_bf16_params.h 234 0x31b4 x
+conv2d_dw_bf16_params.h 232 0x31b8 x
+conv2d_dw_bf16_params.h 227 0x31bc x
+conv2d_dw_bf16_params.h 232 0x31bc 1
+conv2d_dw_bf16_params.h 234 0x31c2 x
+conv2d_dw_bf16_params.h 235 0x31c2 1 x
+conv2d_dw_bf16_params.h 235 0x31c8
+conv2d_dw_bf16_params.h 243 0x31c8 1 x
+conv2d_dw_bf16_params.h 238 0x31ce x
+conv2d_dw_bf16_params.h 242 0x31ce 1 x
+conv2d_dw_bf16_params.h 242 0x31d4
+conv2d_dw_bf16_params.h 243 0x31d4 1 x
+conv2d_dw_bf16_params.h 239 0x31da x
+conv2d_dw_bf16_params.h 242 0x31da 1 x
+conv2d_dw_bf16_params.h 243 0x31e0 x
+conv2d_dw_bf16_params.h 250 0x31e0 1 x
+conv2d_dw_bf16_params.h 234 0x31e6 x
+conv2d_dw_bf16_params.h 240 0x31e6 1 x
+conv2d_dw_bf16_params.h 253 0x31e6 2 x
+conv2d_dw_bf16_params.h 247 0x31ec x
+conv2d_dw_bf16_params.h 242 0x31f0 x
+conv2d_dw_bf16_params.h 247 0x31f0 1
+conv2d_dw_bf16_params.h 241 0x31f6 x
+conv2d_dw_bf16_params.h 243 0x31f6 1 x
+conv2d_dw_bf16_params.h 243 0x31fc
+conv2d_dw_bf16_params.h 245 0x31fc 1 x
+conv2d_dw_bf16_params.h 243 0x3202 x
+conv2d_dw_bf16_params.h 248 0x3202 1 x
+conv2d_dw_bf16_params.h 245 0x3208 x
+conv2d_dw_bf16_params.h 250 0x3208 1 x
+conv2d_dw_bf16_params.h 246 0x320e x
+conv2d_dw_bf16_params.h 250 0x320e 1
+conv2d_dw_bf16_params.h 247 0x3214 x
+conv2d_dw_bf16_params.h 248 0x3214 1 x
+conv2d_dw_bf16_params.h 250 0x321a x
+conv2d_dw_bf16_params.h 250 0x321a 1 x
+conv2d_dw_bf16_params.h 248 0x3220 x
+conv2d_dw_bf16_params.h 250 0x3220 1
+conv2d_dw_bf16_params.h 249 0x3226 x
+conv2d_dw_bf16_params.h 255 0x3226 1 x
+conv2d_dw_bf16_params.h 258 0x3226 2
+conv2d_dw_bf16_params.h 258 0x3226 3
+conv2d_dw_bf16_params.h 252 0x3230 x
+conv2d_dw_bf16_params.h 253 0x3230 1 x
+conv2d_dw_bf16_params.h 253 0x3236
+conv2d_dw_bf16_params.h 255 0x3236 1 x
+conv2d_dw_bf16_params.h 254 0x323c x
+conv2d_dw_bf16_params.h 255 0x323c 1
+conv2d_dw_bf16_params.h 256 0x323c 2
+conv2d_dw_bf16_params.h 258 0x323c 3 x
+conv2d_dw_bf16_params.h 258 0x323c 4 x
+conv2d_dw_bf16_params.h 259 0x323c 5
+conv2d_dw_bf16_params.h 263 0x323c 6
+conv2d_dw_bf16_params.h 255 0x3248 x
+conv2d_dw_bf16_params.h 256 0x324c x
+conv2d_dw_bf16_params.h 260 0x324c 1 x
+conv2d_dw_bf16_params.h 258 0x3252 x
+conv2d_dw_bf16_params.h 260 0x3252 1
+conv2d_dw_bf16_params.h 259 0x3258 x
+conv2d_dw_bf16_params.h 264 0x3258 1 x
+conv2d_dw_bf16_params.h 260 0x325e x
+conv2d_dw_bf16_params.h 264 0x325e 1
+conv2d_dw_bf16_params.h 262 0x3264 x
+conv2d_dw_bf16_params.h 263 0x3268 x
+conv2d_dw_bf16_params.h 264 0x326c x
+conv2d_dw_bf16_params.h 266 0x3270 x
+conv2d_dw_bf16_params.h 266 0x3280
+conv2d_dw_bf16_params.h 266 0x3280 1
+conv2d_dw_bf16_params.h 266 0x3286
+conv2d_dw_bf16_params.h 266 0x328a
+conv2d_dw_bf16_params.h 266 0x3296
+conv2d_dw_bf16_params.h 266 0x32a0
+conv2d_dw_bf16_params.h 267 0x32a0 1
+conv2d_dw_bf16_params.h 266 0x32aa
+conv2d_dw_bf16_params.h 266 0x32aa 1
+conv2d_dw_bf16_params.h 266 0x32b0
+conv2d_dw_bf16_params.h 266 0x32b6
+conv2d_dw_bf16_params.h 267 0x32bc x
+conv2d_dw_bf16_params.h 266 0x32c6 x
+conv2d_dw_bf16_params.h 266 0x32ca
+conv2d_dw_bf16_params.h 267 0x32ca 1 x
+conv2d_dw_bf16_params.h 266 0x32d0 x
+conv2d_dw_bf16_params.h 266 0x32d8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 444 0x35c0 x
+superkernels.cpp 449 0x35c0 1
+superkernels.cpp 449 0x35c6 x
+superkernels.cpp 444 0x35cc
+superkernels.cpp 467 0x35da
+superkernels.cpp 452 0x35ea
+superkernels.cpp 449 0x35f2
+superkernels.cpp 449 0x35f2 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x35f8
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 446 0x35fc x
+superkernels.cpp 446 0x3600
+superkernels.cpp 446 0x3604
+superkernels.cpp 446 0x360a
+superkernels.cpp 461 0x360e
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x360e 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 451 0x3618
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x3618 1
+tile.hpp 86 0x3618 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 451 0x3626 x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x3630
+tile.hpp 74 0x3634
+tile.hpp 74 0x3638 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 452 0x3640
+superkernels.cpp 461 0x3640 1
+superkernels.cpp 452 0x3648 x
+superkernels.cpp 453 0x364c
+superkernels.cpp 453 0x364c 1 x
+superkernels.cpp 452 0x365e
+superkernels.cpp 457 0x365e 1
+superkernels.cpp 452 0x3668 x
+superkernels.cpp 453 0x366c x
+superkernels.cpp 457 0x3670
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3680 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 457 0x3684
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3684 1
+io_buffer_main.h 218 0x368e
+io_buffer_main.h 218 0x3692
+io_buffer_main.h 235 0x3696 x
+io_buffer_main.h 218 0x36a4 x
+io_buffer_main.h 218 0x36a4 1 x
+io_buffer_main.h 218 0x36a8
+io_buffer_main.h 395 0x36ac
+io_buffer_main.h 395 0x36b6 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 456 0x36ba
+superkernels.cpp 459 0x36ba 1
+superkernels.cpp 464 0x36ba 2
+superkernels.cpp 465 0x36ba 3
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x36ba 4
+io_buffer_main.h 425 0x36ba 5
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h:
+io_buffer_impl.h 52 0x36c4
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x36ce
+io_buffer_main.h 324 0x36ce 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 457 0x36d4 x
+superkernels.cpp 457 0x36d8
+superkernels.cpp 461 0x36d8 1
+superkernels.cpp 456 0x36e2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x36ec x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 457 0x36f2 x
+superkernels.cpp 456 0x36f6 x
+superkernels.cpp 459 0x36fa x
+superkernels.cpp 461 0x36fe x
+superkernels.cpp 456 0x3704 x
+superkernels.cpp 459 0x3708 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h:
+io_buffer_impl.h 201 0x370c x
+io_buffer_impl.h 52 0x3710 x
+io_buffer_impl.h 52 0x3714
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x3720
+io_buffer_main.h 324 0x3724 x
+io_buffer_main.h 425 0x3734 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 464 0x3738
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x3738 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 464 0x3742 x
+superkernels.cpp 464 0x3746
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x3756 x
+io_buffer_main.h 327 0x375a
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 464 0x375e x
+superkernels.cpp 464 0x3762
+superkernels.cpp 465 0x3768
+superkernels.cpp 465 0x3774 x
+superkernels.cpp 467 0x3780
+superkernels.cpp 467 0x378a x
+superkernels.cpp 467 0x378e
+superkernels.cpp 578 0x37a0
+superkernels.cpp 578 0x37a0 1 x
+superkernels.cpp 583 0x37a6
+superkernels.cpp 583 0x37b0 x
+superkernels.cpp 587 0x37c2
+superkernels.cpp 590 0x37c2 1
+superkernels.cpp 599 0x37c2 2
+superkernels.cpp 629 0x37c2 3
+superkernels.cpp 583 0x37d0
+superkernels.cpp 583 0x37d0 1
+superkernels.cpp 580 0x37da x
+superkernels.cpp 580 0x37de
+superkernels.cpp 580 0x37e2
+superkernels.cpp 580 0x37e8
+superkernels.cpp 587 0x37ec
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x37ec 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 621 0x37f6
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x37f6 1
+tile.hpp 86 0x37f6 2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 587 0x3802
+superkernels.cpp 587 0x3802 1
+superkernels.cpp 587 0x380c x
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 86 0x3816
+tile.hpp 74 0x381a
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 587 0x3820
+
+/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp:
+tile.hpp 74 0x3820 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 590 0x3830
+superkernels.cpp 591 0x3830 1
+superkernels.cpp 590 0x3836 x
+superkernels.cpp 591 0x3846 x
+superkernels.cpp 591 0x384a
+superkernels.cpp 599 0x3850
+superkernels.cpp 599 0x3854 x
+superkernels.cpp 591 0x385e x
+superkernels.cpp 611 0x386a
+superkernels.cpp 594 0x3874 x
+superkernels.cpp 595 0x387e
+superkernels.cpp 594 0x3884
+superkernels.cpp 594 0x388a
+superkernels.cpp 595 0x38a0 x
+superkernels.cpp 621 0x38aa
+superkernels.cpp 621 0x38c0
+superkernels.cpp 599 0x38d0 x
+superkernels.cpp 600 0x38da
+superkernels.cpp 599 0x38e0
+superkernels.cpp 599 0x38e6
+superkernels.cpp 600 0x38f0 x
+superkernels.cpp 621 0x38fa
+superkernels.cpp 606 0x3904 x
+superkernels.cpp 611 0x3904 1
+superkernels.cpp 611 0x390e x
+superkernels.cpp 607 0x3912 x
+superkernels.cpp 607 0x3916
+superkernels.cpp 607 0x391c
+superkernels.cpp 606 0x3924
+superkernels.cpp 607 0x392a
+superkernels.cpp 606 0x392e x
+superkernels.cpp 611 0x392e 1
+superkernels.cpp 607 0x3938 x
+superkernels.cpp 611 0x393c x
+superkernels.cpp 608 0x3940 x
+superkernels.cpp 608 0x3944
+superkernels.cpp 611 0x3944 1 x
+superkernels.cpp 608 0x3950 x
+superkernels.cpp 614 0x3960
+superkernels.cpp 614 0x3966 x
+superkernels.cpp 616 0x3966 1
+superkernels.cpp 615 0x3970
+superkernels.cpp 616 0x3970 1 x
+superkernels.cpp 615 0x397a x
+superkernels.cpp 618 0x3986 x
+superkernels.cpp 618 0x3986 1 x
+superkernels.cpp 614 0x398c x
+superkernels.cpp 616 0x398c 1 x
+superkernels.cpp 615 0x3992 x
+superkernels.cpp 616 0x3996 x
+superkernels.cpp 615 0x399a x
+superkernels.cpp 614 0x399e x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x39a2
+io_buffer_main.h 218 0x39b2 x
+io_buffer_main.h 218 0x39b6
+io_buffer_main.h 218 0x39ba
+io_buffer_main.h 218 0x39be
+io_buffer_main.h 235 0x39c4 x
+io_buffer_main.h 218 0x39d0 x
+io_buffer_main.h 218 0x39d0 1 x
+io_buffer_main.h 218 0x39d4
+io_buffer_main.h 395 0x39d4 1
+io_buffer_main.h 395 0x39e2 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 621 0x39f6
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x39f6 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 621 0x39fa
+superkernels.cpp 621 0x39fe x
+superkernels.cpp 621 0x3a04
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x3a10
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 623 0x3a20
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3a20 1
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 623 0x3a2a x
+superkernels.cpp 623 0x3a2a 1
+superkernels.cpp 623 0x3a34
+superkernels.cpp 623 0x3a44
+superkernels.cpp 623 0x3a48
+superkernels.cpp 629 0x3a58
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3a58 1 x
+io_buffer_main.h 395 0x3a58 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 626 0x3a62
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3a62 1
+io_buffer_main.h 218 0x3a6c
+io_buffer_main.h 218 0x3a70
+io_buffer_main.h 235 0x3a74 x
+io_buffer_main.h 218 0x3a82 x
+io_buffer_main.h 218 0x3a82 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 626 0x3a86
+superkernels.cpp 630 0x3a86 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 218 0x3a86 2
+io_buffer_main.h 395 0x3a94 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 626 0x3a9e x
+superkernels.cpp 629 0x3aa2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x3aaa x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 626 0x3ab2 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 125 0x3ab2 1
+io_buffer_main.h 324 0x3ab2 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 630 0x3abc x
+superkernels.cpp 630 0x3ac0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x3ac6
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 626 0x3ad0 x
+superkernels.cpp 630 0x3ad8 x
+superkernels.cpp 633 0x3ae8 x
+superkernels.cpp 633 0x3aee
+superkernels.cpp 633 0x3afa
+superkernels.cpp 637 0x3b10 x
+superkernels.cpp 637 0x3b16
+superkernels.cpp 637 0x3b1c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x3b30
+io_buffer_main.h 327 0x3b30 1
+io_buffer_main.h 324 0x3b34
+io_buffer_main.h 327 0x3b34 1
+io_buffer_main.h 327 0x3b34 2
+io_buffer_main.h 425 0x3b34 3
+io_buffer_main.h 425 0x3b34 4
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 645 0x3b3a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 324 0x3b3a 1 x
+io_buffer_main.h 425 0x3b50 x
+io_buffer_main.h 327 0x3b54 x
+io_buffer_main.h 324 0x3b58 x
+io_buffer_main.h 327 0x3b66 x
+io_buffer_main.h 327 0x3b6a
+io_buffer_main.h 425 0x3b76 x
+io_buffer_main.h 327 0x3b7a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 645 0x3b8c
+superkernels.cpp 649 0x3b8c 1
+superkernels.cpp 645 0x3b90 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 327 0x3b90 1
+io_buffer_main.h 327 0x3b96 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp:
+superkernels.cpp 649 0x3ba0
+superkernels.cpp 648 0x3bb0
+superkernels.cpp 651 0x3bb0 1
+superkernels.cpp 648 0x3bba
+superkernels.cpp 648 0x3bba 1 x
+superkernels.cpp 649 0x3bba 2
+superkernels.cpp 648 0x3bc4
+superkernels.cpp 648 0x3bd4
+superkernels.cpp 648 0x3bd8
+superkernels.cpp 649 0x3bea x
+superkernels.cpp 651 0x3bf4 x
+superkernels.cpp 651 0x3bf8
+superkernels.cpp - 0x3bf9
+
+
+superkernels.cpp:
+File name Line number Starting address View Stmt
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 21 0x3c10 x
+0_0_reloadable5.cc 23 0x3c10 1
+0_0_reloadable5.cc 23 0x3c14 x
+0_0_reloadable5.cc 24 0x3c18 x
+0_0_reloadable5.cc 26 0x3c1c x
+0_0_reloadable5.cc 25 0x3c20 x
+0_0_reloadable5.cc 22 0x3c24 x
+0_0_reloadable5.cc 30 0x3c40 x
+0_0_reloadable5.cc 32 0x3c40 1
+0_0_reloadable5.cc 32 0x3c44 x
+0_0_reloadable5.cc 34 0x3c48 x
+0_0_reloadable5.cc 33 0x3c4c x
+0_0_reloadable5.cc 31 0x3c50 x
+0_0_reloadable5.cc 38 0x3c60 x
+0_0_reloadable5.cc 40 0x3c60 1
+0_0_reloadable5.cc 40 0x3c64 x
+0_0_reloadable5.cc 42 0x3c68 x
+0_0_reloadable5.cc 41 0x3c6c x
+0_0_reloadable5.cc 39 0x3c70 x
+0_0_reloadable5.cc 46 0x3c80 x
+0_0_reloadable5.cc 48 0x3c80 1
+0_0_reloadable5.cc 48 0x3c84 x
+0_0_reloadable5.cc 50 0x3c88 x
+0_0_reloadable5.cc 49 0x3c8c x
+0_0_reloadable5.cc 47 0x3c90 x
+0_0_reloadable5.cc 54 0x3ca0 x
+0_0_reloadable5.cc 56 0x3ca0 1
+0_0_reloadable5.cc 56 0x3ca4 x
+0_0_reloadable5.cc 57 0x3ca8 x
+0_0_reloadable5.cc 59 0x3cac x
+0_0_reloadable5.cc 58 0x3cb0 x
+0_0_reloadable5.cc 55 0x3cb4 x
+0_0_reloadable5.cc 63 0x3cd0 x
+0_0_reloadable5.cc 65 0x3cd0 1
+0_0_reloadable5.cc 65 0x3cd4 x
+0_0_reloadable5.cc 66 0x3cd8 x
+0_0_reloadable5.cc 67 0x3cdc x
+0_0_reloadable5.cc 69 0x3ce0 x
+0_0_reloadable5.cc 68 0x3ce4 x
+0_0_reloadable5.cc 64 0x3ce8 x
+0_0_reloadable5.cc 73 0x3d00 x
+0_0_reloadable5.cc 75 0x3d00 1
+0_0_reloadable5.cc 75 0x3d04 x
+0_0_reloadable5.cc 76 0x3d08 x
+0_0_reloadable5.cc 78 0x3d0c x
+0_0_reloadable5.cc 77 0x3d10 x
+0_0_reloadable5.cc 74 0x3d14 x
+0_0_reloadable5.cc 94 0x930 x
+0_0_reloadable5.cc 96 0x930 1 x
+0_0_reloadable5.cc 96 0x930 2
+0_0_reloadable5.cc 98 0x930 3
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x930 4
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 94 0x936
+0_0_reloadable5.cc 96 0x944
+0_0_reloadable5.cc 98 0x944 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0x944 2
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 96 0x94c x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x952
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 590 0x958 x
+io_buffer_compiler.h 590 0x95c
+io_buffer_compiler.h 590 0x960
+io_buffer_compiler.h 590 0x964
+io_buffer_compiler.h 590 0x968
+io_buffer_compiler.h 195 0x978 x
+io_buffer_compiler.h 195 0x978 1 x
+io_buffer_compiler.h 194 0x97c x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x980
+io_buffer_main.h 410 0x980 1
+io_buffer_main.h 410 0x98a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 98 0x98e
+0_0_reloadable5.cc 102 0x98e 1
+0_0_reloadable5.cc 98 0x992 x
+0_0_reloadable5.cc 98 0x996
+0_0_reloadable5.cc 98 0x99a
+0_0_reloadable5.cc 98 0x9a8
+0_0_reloadable5.cc 98 0x9ac
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 590 0x9b0 x
+io_buffer_compiler.h 590 0x9b8
+io_buffer_compiler.h 590 0x9bc
+io_buffer_compiler.h 590 0x9c0
+io_buffer_compiler.h 590 0x9c4
+io_buffer_compiler.h 195 0x9d4 x
+io_buffer_compiler.h 195 0x9d4 1 x
+io_buffer_compiler.h 194 0x9d8 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 410 0x9e4 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 102 0x9e8 x
+0_0_reloadable5.cc 102 0x9ec
+0_0_reloadable5.cc 102 0x9f0
+0_0_reloadable5.cc 102 0x9f6
+0_0_reloadable5.cc 102 0xa08
+0_0_reloadable5.cc 105 0xa0c
+0_0_reloadable5.cc 107 0xa0c 1
+0_0_reloadable5.cc 105 0xa20 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa20 1
+io_buffer_compiler.h 606 0xa20 2
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa20 3
+io_buffer_main.h 440 0xa20 4
+io_buffer_main.h 440 0xa26
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 107 0xa2a
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa2e
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa2e 1
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 605 0xa38 x
+io_buffer_compiler.h 605 0xa3c
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa4a
+io_buffer_main.h 440 0xa4e x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa52
+io_buffer_compiler.h 606 0xa52 1 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 107 0xa58 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa66 x
+io_buffer_compiler.h 605 0xa6a x
+io_buffer_compiler.h 606 0xa6a 1
+io_buffer_compiler.h 605 0xa70
+io_buffer_compiler.h 606 0xa70 1 x
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h:
+io_buffer_main.h 440 0xa82 x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 110 0xa86
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xa8a x
+
+/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc:
+0_0_reloadable5.cc 110 0xa96 x
+0_0_reloadable5.cc 110 0xaa0
+
+/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h:
+io_buffer_compiler.h 606 0xaa4
+io_buffer_compiler.h 606 0xaa8 x
+io_buffer_compiler.h 606 0xaac
+io_buffer_compiler.h 606 0xab0
+io_buffer_compiler.h - 0xab1
+
+
+CU: me_div.c:
+File name Line number Starting address View Stmt
+
+./me_div.c:[++]
+me_div.c 108 0x3d30
+me_div.c 108 0x3d30 1
+me_div.c 115 0x3d30 2 x
+me_div.c 108 0x3d36
+me_div.c 108 0x3d3a
+me_div.c 108 0x3d3e
+me_div.c 108 0x3d42
+me_div.c 108 0x3d46
+me_div.c 108 0x3d4a
+me_div.c 108 0x3d4e
+me_div.c 108 0x3d52
+me_div.c 108 0x3d56
+me_div.c 108 0x3d5a
+me_div.c 108 0x3d5e
+me_div.c 108 0x3d62
+me_div.c 108 0x3d66
+me_div.c 108 0x3d6a
+me_div.c 108 0x3d6e
+me_div.c 108 0x3d72
+me_div.c 108 0x3d76
+me_div.c 108 0x3d7a
+me_div.c 108 0x3d7e
+me_div.c 108 0x3d82
+me_div.c 108 0x3d86
+me_div.c 108 0x3d8a
+me_div.c 108 0x3d8e
+me_div.c 108 0x3d92
+me_div.c 108 0x3d96
+me_div.c 108 0x3d9a
+me_div.c 108 0x3d9e
+me_div.c 108 0x3da2
+me_div.c 119 0x3da6 x
+me_div.c 108 0x3daa x
+me_div.c 108 0x3dae
+me_div.c 108 0x3db2
+me_div.c 108 0x3db6
+me_div.c - 0x3db7
+
+
+CU: No directory table
+CU: Empty file name table
+ - 0x1
+
+
+CU: No directory table
+CU: Empty file name table
+ - 0x1
+
+
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/scripts/3_3_reloadable15.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/scripts/3_3_reloadable15.bcf
new file mode 100644
index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/scripts/3_3_reloadable15.bcf
@@ -0,0 +1,16 @@
+_reserved DMb 0x0 0x40000
+
+_reserved PM 0x0 0x930 //reserved for main elf
+
+_entry_point _Z13kernelWrapperPPvjjjj
+_symbol _Z13kernelWrapperPPvjjjj 0x930
+
+_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers
+_reserved DMb 0x7ba80 0x40 //reserved for sync buffer
+_stack DM_stack 0x7bac0 0x940 //stack for core
+_reserved DMb 0x7c400 0x40 //reserved for main elf heap
+//space for synopsys compiler at 0x7c440 0x880//heap
+_reserved DMb 0x40000 0x3b280
+
+_reserved DMb 0x7ccc0 0x3340
+_reserved DMb 0x80000 0x80000 // And everything else the core can't see
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/scripts/3_3_reloadable15.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/scripts/3_3_reloadable15.prx
new file mode 100644
index 0000000000000000000000000000000000000000..7885e65d3605a73d5a87e42291cb7013c3896748
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/scripts/3_3_reloadable15.prx
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/src/3_3_reloadable15.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/src/3_3_reloadable15.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ed266062f542d5fd9e7d7b554216254c298b9574
--- /dev/null
+++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/3_3_reloadable15/src/3_3_reloadable15.cc
@@ -0,0 +1,110 @@
+// Automatically generated processor driver using AIEngine tool-chain
+
+#include
+#include
+#include
+
+
+// Declare Kernel functions and initializers
+void conv2d_maxpool(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_add1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_clip1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_mul1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_mul1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict);
+void superkernel_conv_eltbinary(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict,const unsigned int (&)[17],adf::io_buffer